v_add_f32_e32 v6, s3, v7 ; 060C0E03 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_add_f32_e32 v6, s3, v10 ; 060C1403 v_min_f32_e32 v6, 0x3a83126f, v6 ; 1E0C0CFF 3A83126F v_max_f32_e32 v1, v6, v1 ; 20020306 v_mad_f32 v1, 2.0, v1, -v11 ; D2820001 842E02F4 exp 15, 37, 0, 0, 0, v0, v5, v2, v17 ; F800025F 11020500 exp 15, 38, 0, 0, 0, v25, v17, v17, v17 ; F800026F 11111119 exp 15, 12, 0, 0, 0, v3, v4, v1, v11 ; F80000CF 0B010403 exp 15, 13, 0, 1, 0, v17, v17, v17, v17 ; F80008DF 11111111 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 32 Code Size: 1508 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..37] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL CONST[7][0..1] DCL TEMP[0..2], LOCAL IMM[0] UINT32 {1, 160, 176, 192} IMM[1] UINT32 {208, 164, 180, 196} IMM[2] UINT32 {212, 168, 184, 200} IMM[3] UINT32 {216, 172, 188, 204} IMM[4] UINT32 {220, 144, 32, 16} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], CONST[2][10], TEMP[0], CONST[2][9] 3: MOV TEMP[1].xyz, TEMP[0].xyzx 4: MOV_SAT TEMP[0].x, TEMP[0].wwww 5: MOV TEMP[1].w, TEMP[0].xxxx 6: MUL TEMP[0], TEMP[1], IN[1] 7: MUL TEMP[1].xyz, CONST[2][2].zzzz, TEMP[0].xyzz 8: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[2][1].zzzz 9: MOV_SAT TEMP[2].xyz, TEMP[1].xyzz 10: LRP TEMP[1].xyz, CONST[2][2].yyyy, TEMP[2].xyzz, TEMP[1].xyzz 11: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[1].xyzz 12: MOV TEMP[2].x, TEMP[1].xxxx 13: MOV TEMP[2].y, TEMP[1].yyyy 14: MOV TEMP[2].z, TEMP[1].zzzz 15: MUL TEMP[0].x, CONST[2][1].wwww, TEMP[0].wwww 16: MOV TEMP[2].w, TEMP[0].xxxx 17: MOV OUT[0], TEMP[2] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %46 = bitcast float %40 to i32 %47 = bitcast float %41 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %37, <16 x i8> %39, i32 2) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = fmul float %32, %51 %56 = fadd float %55, %28 %57 = fmul float %33, %52 %58 = fadd float %57, %29 %59 = fmul float %34, %53 %60 = fadd float %59, %30 %61 = fmul float %35, %54 %62 = fadd float %61, %31 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fmul float %56, %42 %65 = fmul float %58, %43 %66 = fmul float %60, %44 %67 = fmul float %63, %45 %68 = fmul float %27, %64 %69 = fmul float %27, %65 %70 = fmul float %27, %66 %71 = fmul float %68, %24 %72 = fmul float %69, %24 %73 = fmul float %70, %24 %74 = call float @llvm.AMDIL.clamp.(float %71, float 0.000000e+00, float 1.000000e+00) %75 = call float @llvm.AMDIL.clamp.(float %72, float 0.000000e+00, float 1.000000e+00) %76 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00) %77 = call float @llvm.AMDGPU.lrp(float %26, float %74, float %71) %78 = call float @llvm.AMDGPU.lrp(float %26, float %75, float %72) %79 = call float @llvm.AMDGPU.lrp(float %26, float %76, float %73) %80 = fmul float %67, %77 %81 = fmul float %67, %78 %82 = fmul float %67, %79 %83 = fmul float %25, %67 %84 = call i32 @llvm.SI.packf16(float %80, float %81) %85 = bitcast i32 %84 to float %86 = call i32 @llvm.SI.packf16(float %82, float %83) %87 = bitcast i32 %86 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %85, float %87, float %85, float %87) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x28 ; C2048128 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 s_buffer_load_dword s10, s[0:3], 0x25 ; C2050125 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_buffer_load_dword s5, s[0:3], 0x27 ; C2028127 s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129 s_buffer_load_dword s7, s[0:3], 0x2a ; C203812A v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 s_buffer_load_dword s11, s[0:3], 0x2b ; C205812B v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640702 v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, s9, v7 ; 3E020E09 v_mov_b32_e32 v2, s10 ; 7E04020A s_buffer_load_dword s8, s[0:3], 0xa ; C204010A v_mac_f32_e32 v2, s6, v8 ; 3E041006 v_mov_b32_e32 v3, s4 ; 7E060204 v_mac_f32_e32 v3, s7, v9 ; 3E061207 v_mov_b32_e32 v7, s5 ; 7E0E0205 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 v_mac_f32_e32 v7, s11, v10 ; 3E0E140B s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_buffer_load_dword s0, s[0:3], 0x9 ; C2000109 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v3, v6, v3 ; 10060706 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_add_f32_e64 v4, 0, v1 clamp ; D2060804 00020280 v_sub_f32_e64 v5, 1.0, s0 ; D2080005 000000F2 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_add_f32_e64 v4, 0, v2 clamp ; D2060804 00020480 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mac_f32_e32 v2, s0, v4 ; 3E040800 v_mul_f32_e32 v4, v3, v5 ; 10080B03 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mac_f32_e32 v4, s0, v3 ; 3E080600 v_add_f32_e64 v3, 0, v7 clamp ; D2060803 00020E80 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v3, v4, v0 ; 10060104 v_mul_f32_e32 v0, s5, v0 ; 10000005 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 312 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyxy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], IN[1] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = extractelement <4 x float> %20, i32 3 %25 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float %23, float %24) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %25, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_xor_b32_e32 v0, 0x80000000, v3 ; 3A0006FF 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 0, 0, v2, v0, v1, v3 ; F80000CF 03010002 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.0000, -3.0000, -1.1824, 1.1824} IMM[1] UINT32 {0, 0, 0, 0} IMM[2] FLT32 { 3.0000, 0.0044, 0.2960, 0.3990} IMM[3] FLT32 { 0.8000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].y, IMM[0].xxxx 1: MUL TEMP[0].x, CONST[1][0].xxxx, IMM[0].yyyy 2: MOV TEMP[1].y, IMM[0].xxxx 3: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[0].zzzz 4: MOV TEMP[2].y, IMM[0].xxxx 5: MUL TEMP[2].x, CONST[1][0].xxxx, IMM[0].wwww 6: MOV TEMP[3].y, IMM[0].xxxx 7: MUL TEMP[3].x, CONST[1][0].xxxx, IMM[2].xxxx 8: MOV TEMP[4].xy, IN[0].xyyy 9: TEX TEMP[4], TEMP[4], SAMP[0], 2D 10: ADD TEMP[3].xy, TEMP[3].xyyy, IN[0].xyyy 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: TEX TEMP[3], TEMP[3], SAMP[0], 2D 13: ADD TEMP[2].xy, TEMP[2].xyyy, IN[0].xyyy 14: MOV TEMP[2].xy, TEMP[2].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 17: MOV TEMP[0].xy, TEMP[0].xyyy 18: TEX TEMP[0], TEMP[0], SAMP[0], 2D 19: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 20: MOV TEMP[1].xy, TEMP[1].xyyy 21: TEX TEMP[1], TEMP[1], SAMP[0], 2D 22: MUL TEMP[1], IMM[2].zzzz, TEMP[1] 23: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 24: MAD TEMP[0], IMM[2].wwww, TEMP[4], TEMP[0] 25: MAD TEMP[0], IMM[2].zzzz, TEMP[2], TEMP[0] 26: MAD TEMP[0], IMM[2].yyyy, TEMP[3], TEMP[0] 27: MOV TEMP[1].w, TEMP[0].wwww 28: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[0].xyzz 29: LRP TEMP[0].xyz, IMM[3].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 30: MIN TEMP[1].xyz, TEMP[4].xyzz, TEMP[0].xyzz 31: MOV OUT[0], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = fmul float %24, -3.000000e+00 %32 = fmul float %24, 0xBFF2EB3BC0000000 %33 = fmul float %24, 0x3FF2EB3BC0000000 %34 = fmul float %24, 3.000000e+00 %35 = bitcast float %29 to i32 %36 = bitcast float %30 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = bitcast <8 x i32> %26 to <32 x i8> %40 = bitcast <4 x i32> %28 to <16 x i8> %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %39, <16 x i8> %40, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fadd float %34, %29 %47 = fadd float %30, 0.000000e+00 %48 = bitcast float %46 to i32 %49 = bitcast float %47 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = bitcast <8 x i32> %26 to <32 x i8> %53 = bitcast <4 x i32> %28 to <16 x i8> %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %52, <16 x i8> %53, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fadd float %33, %29 %60 = fadd float %30, 0.000000e+00 %61 = bitcast float %59 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fadd float %31, %29 %73 = fadd float %30, 0.000000e+00 %74 = bitcast float %72 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %26 to <32 x i8> %79 = bitcast <4 x i32> %28 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %32, %29 %86 = fadd float %30, 0.000000e+00 %87 = bitcast float %85 to i32 %88 = bitcast float %86 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = bitcast <8 x i32> %26 to <32 x i8> %92 = bitcast <4 x i32> %28 to <16 x i8> %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = extractelement <4 x float> %93, i32 3 %98 = fmul float %94, 0x3FD2F25A20000000 %99 = fmul float %95, 0x3FD2F25A20000000 %100 = fmul float %96, 0x3FD2F25A20000000 %101 = fmul float %97, 0x3FD2F25A20000000 %102 = fmul float %81, 0x3F72285660000000 %103 = fadd float %102, %98 %104 = fmul float %82, 0x3F72285660000000 %105 = fadd float %104, %99 %106 = fmul float %83, 0x3F72285660000000 %107 = fadd float %106, %100 %108 = fmul float %84, 0x3F72285660000000 %109 = fadd float %108, %101 %110 = fmul float %42, 0x3FD98A0900000000 %111 = fadd float %110, %103 %112 = fmul float %43, 0x3FD98A0900000000 %113 = fadd float %112, %105 %114 = fmul float %44, 0x3FD98A0900000000 %115 = fadd float %114, %107 %116 = fmul float %45, 0x3FD98A0900000000 %117 = fadd float %116, %109 %118 = fmul float %68, 0x3FD2F25A20000000 %119 = fadd float %118, %111 %120 = fmul float %69, 0x3FD2F25A20000000 %121 = fadd float %120, %113 %122 = fmul float %70, 0x3FD2F25A20000000 %123 = fadd float %122, %115 %124 = fmul float %71, 0x3FD2F25A20000000 %125 = fadd float %124, %117 %126 = fmul float %55, 0x3F72285660000000 %127 = fadd float %126, %119 %128 = fmul float %56, 0x3F72285660000000 %129 = fadd float %128, %121 %130 = fmul float %57, 0x3F72285660000000 %131 = fadd float %130, %123 %132 = fmul float %58, 0x3F72285660000000 %133 = fadd float %132, %125 %134 = fmul float %127, %127 %135 = fmul float %129, %129 %136 = fmul float %131, %131 %137 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %134, float %127) %138 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %135, float %129) %139 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %136, float %131) %140 = call float @llvm.minnum.f32(float %42, float %137) %141 = call float @llvm.minnum.f32(float %43, float %138) %142 = call float @llvm.minnum.f32(float %44, float %139) %143 = call i32 @llvm.SI.packf16(float %140, float %141) %144 = bitcast i32 %143 to float %145 = call i32 @llvm.SI.packf16(float %142, float %133) %146 = bitcast i32 %145 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %144, float %146, float %144, float %146) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x0 ; C2060900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v1, 0x3f9759de ; 7E0202FF 3F9759DE s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, s12, v0, v2 ; D2820004 040A000C v_add_f32_e32 v5, 0, v3 ; 060A0680 image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010602 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010A04 v_mad_f32 v4, s12, v1, v2 ; D2820004 040A020C v_mov_b32_e32 v0, 0xc0400000 ; 7E0002FF C0400000 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010E04 v_mad_f32 v4, s12, v0, v2 ; D2820004 040A000C v_mov_b32_e32 v0, 0xbf9759de ; 7E0002FF BF9759DE v_mac_f32_e32 v2, s12, v0 ; 3E04000C v_mov_b32_e32 v3, v5 ; 7E060305 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00011204 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 v_mov_b32_e32 v4, 0x3e9792d1 ; 7E0802FF 3E9792D1 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mov_b32_e32 v5, 0x3b9142b3 ; 7E0A02FF 3B9142B3 v_mac_f32_e32 v0, v5, v18 ; 3E002505 v_mac_f32_e32 v1, v5, v19 ; 3E022705 v_mac_f32_e32 v2, v5, v20 ; 3E042905 v_mac_f32_e32 v3, v5, v21 ; 3E062B05 v_mov_b32_e32 v18, 0x3ecc5048 ; 7E2402FF 3ECC5048 v_mac_f32_e32 v0, v18, v6 ; 3E000D12 v_mac_f32_e32 v1, v18, v7 ; 3E020F12 v_mac_f32_e32 v2, v18, v8 ; 3E041112 v_mac_f32_e32 v3, v18, v9 ; 3E061312 v_mac_f32_e32 v0, v4, v14 ; 3E001D04 v_mac_f32_e32 v1, v4, v15 ; 3E021F04 v_mac_f32_e32 v2, v4, v16 ; 3E042104 v_mac_f32_e32 v3, v4, v17 ; 3E062304 v_mac_f32_e32 v0, v5, v10 ; 3E001505 v_mac_f32_e32 v1, v5, v11 ; 3E021705 v_mac_f32_e32 v2, v5, v12 ; 3E041905 v_mac_f32_e32 v3, v5, v13 ; 3E061B05 v_mul_f32_e32 v4, v0, v0 ; 10080100 v_mov_b32_e32 v5, 0x3e4ccccc ; 7E0A02FF 3E4CCCCC v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mov_b32_e32 v9, 0x3f4ccccd ; 7E1202FF 3F4CCCCD v_mac_f32_e32 v0, v9, v4 ; 3E000909 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mac_f32_e32 v1, v9, v4 ; 3E020909 v_mul_f32_e32 v4, v5, v2 ; 10080505 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mac_f32_e32 v4, v9, v2 ; 3E080509 v_min_f32_e32 v0, v0, v6 ; 1E000D00 v_min_f32_e32 v1, v1, v7 ; 1E020F01 v_min_f32_e32 v2, v4, v8 ; 1E041104 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 348 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.0000, -3.0000, -1.1824, 1.1824} IMM[1] UINT32 {0, 0, 0, 0} IMM[2] FLT32 { 3.0000, 0.0044, 0.2960, 0.3990} 0: MOV TEMP[0].x, IMM[0].xxxx 1: MUL TEMP[1].x, CONST[1][0].yyyy, IMM[0].yyyy 2: MOV TEMP[0].y, TEMP[1].xxxx 3: MOV TEMP[1].x, IMM[0].xxxx 4: MUL TEMP[2].x, CONST[1][0].yyyy, IMM[0].zzzz 5: MOV TEMP[1].y, TEMP[2].xxxx 6: MOV TEMP[2].x, IMM[0].xxxx 7: MUL TEMP[3].x, CONST[1][0].yyyy, IMM[0].wwww 8: MOV TEMP[2].y, TEMP[3].xxxx 9: MOV TEMP[3].x, IMM[0].xxxx 10: MUL TEMP[4].x, CONST[1][0].yyyy, IMM[2].xxxx 11: MOV TEMP[3].y, TEMP[4].xxxx 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4], TEMP[4], SAMP[0], 2D 14: ADD TEMP[3].xy, TEMP[3].xyyy, IN[0].xyyy 15: MOV TEMP[3].xy, TEMP[3].xyyy 16: TEX TEMP[3], TEMP[3], SAMP[0], 2D 17: ADD TEMP[2].xy, TEMP[2].xyyy, IN[0].xyyy 18: MOV TEMP[2].xy, TEMP[2].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 21: MOV TEMP[0].xy, TEMP[0].xyyy 22: TEX TEMP[0], TEMP[0], SAMP[0], 2D 23: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 24: MOV TEMP[1].xy, TEMP[1].xyyy 25: TEX TEMP[1], TEMP[1], SAMP[0], 2D 26: MUL TEMP[1], IMM[2].zzzz, TEMP[1] 27: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 28: MAD TEMP[0], IMM[2].wwww, TEMP[4], TEMP[0] 29: MAD TEMP[0], IMM[2].zzzz, TEMP[2], TEMP[0] 30: MAD TEMP[0], IMM[2].yyyy, TEMP[3], TEMP[0] 31: MOV TEMP[1].w, TEMP[0].wwww 32: MIN TEMP[1].xyz, TEMP[4].xyzz, TEMP[0].xyzz 33: MOV OUT[0], TEMP[1] 34: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = fmul float %24, -3.000000e+00 %32 = fmul float %24, 0xBFF2EB3BC0000000 %33 = fmul float %24, 0x3FF2EB3BC0000000 %34 = fmul float %24, 3.000000e+00 %35 = bitcast float %29 to i32 %36 = bitcast float %30 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = bitcast <8 x i32> %26 to <32 x i8> %40 = bitcast <4 x i32> %28 to <16 x i8> %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %39, <16 x i8> %40, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fadd float %29, 0.000000e+00 %47 = fadd float %34, %30 %48 = bitcast float %46 to i32 %49 = bitcast float %47 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = bitcast <8 x i32> %26 to <32 x i8> %53 = bitcast <4 x i32> %28 to <16 x i8> %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %52, <16 x i8> %53, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fadd float %29, 0.000000e+00 %60 = fadd float %33, %30 %61 = bitcast float %59 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fadd float %29, 0.000000e+00 %73 = fadd float %31, %30 %74 = bitcast float %72 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %26 to <32 x i8> %79 = bitcast <4 x i32> %28 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %29, 0.000000e+00 %86 = fadd float %32, %30 %87 = bitcast float %85 to i32 %88 = bitcast float %86 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = bitcast <8 x i32> %26 to <32 x i8> %92 = bitcast <4 x i32> %28 to <16 x i8> %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = extractelement <4 x float> %93, i32 3 %98 = fmul float %94, 0x3FD2F25A20000000 %99 = fmul float %95, 0x3FD2F25A20000000 %100 = fmul float %96, 0x3FD2F25A20000000 %101 = fmul float %97, 0x3FD2F25A20000000 %102 = fmul float %81, 0x3F72285660000000 %103 = fadd float %102, %98 %104 = fmul float %82, 0x3F72285660000000 %105 = fadd float %104, %99 %106 = fmul float %83, 0x3F72285660000000 %107 = fadd float %106, %100 %108 = fmul float %84, 0x3F72285660000000 %109 = fadd float %108, %101 %110 = fmul float %42, 0x3FD98A0900000000 %111 = fadd float %110, %103 %112 = fmul float %43, 0x3FD98A0900000000 %113 = fadd float %112, %105 %114 = fmul float %44, 0x3FD98A0900000000 %115 = fadd float %114, %107 %116 = fmul float %45, 0x3FD98A0900000000 %117 = fadd float %116, %109 %118 = fmul float %68, 0x3FD2F25A20000000 %119 = fadd float %118, %111 %120 = fmul float %69, 0x3FD2F25A20000000 %121 = fadd float %120, %113 %122 = fmul float %70, 0x3FD2F25A20000000 %123 = fadd float %122, %115 %124 = fmul float %71, 0x3FD2F25A20000000 %125 = fadd float %124, %117 %126 = fmul float %55, 0x3F72285660000000 %127 = fadd float %126, %119 %128 = fmul float %56, 0x3F72285660000000 %129 = fadd float %128, %121 %130 = fmul float %57, 0x3F72285660000000 %131 = fadd float %130, %123 %132 = fmul float %58, 0x3F72285660000000 %133 = fadd float %132, %125 %134 = call float @llvm.minnum.f32(float %42, float %127) %135 = call float @llvm.minnum.f32(float %43, float %129) %136 = call float @llvm.minnum.f32(float %44, float %131) %137 = call i32 @llvm.SI.packf16(float %134, float %135) %138 = bitcast i32 %137 to float %139 = call i32 @llvm.SI.packf16(float %136, float %133) %140 = bitcast i32 %139 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %138, float %140, float %138, float %140) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x1 ; C2060901 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_add_f32_e32 v4, 0, v2 ; 06080480 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s12, v0, v3 ; D2820005 040E000C v_mov_b32_e32 v0, 0x3f9759de ; 7E0002FF 3F9759DE image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010602 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010A04 v_mad_f32 v5, s12, v0, v3 ; D2820005 040E000C v_mov_b32_e32 v0, 0xc0400000 ; 7E0002FF C0400000 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010E04 v_mad_f32 v5, s12, v0, v3 ; D2820005 040E000C v_mov_b32_e32 v0, 0xbf9759de ; 7E0002FF BF9759DE v_mac_f32_e32 v3, s12, v0 ; 3E06000C image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00011204 v_mov_b32_e32 v5, v3 ; 7E0A0303 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010004 v_mov_b32_e32 v4, 0x3e9792d1 ; 7E0802FF 3E9792D1 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mov_b32_e32 v5, 0x3b9142b3 ; 7E0A02FF 3B9142B3 v_mac_f32_e32 v0, v5, v18 ; 3E002505 v_mac_f32_e32 v1, v5, v19 ; 3E022705 v_mac_f32_e32 v2, v5, v20 ; 3E042905 v_mac_f32_e32 v3, v5, v21 ; 3E062B05 v_mov_b32_e32 v18, 0x3ecc5048 ; 7E2402FF 3ECC5048 v_mac_f32_e32 v0, v18, v6 ; 3E000D12 v_mac_f32_e32 v1, v18, v7 ; 3E020F12 v_mac_f32_e32 v2, v18, v8 ; 3E041112 v_mac_f32_e32 v3, v18, v9 ; 3E061312 v_mac_f32_e32 v0, v4, v14 ; 3E001D04 v_mac_f32_e32 v1, v4, v15 ; 3E021F04 v_mac_f32_e32 v2, v4, v16 ; 3E042104 v_mac_f32_e32 v3, v4, v17 ; 3E062304 v_mac_f32_e32 v0, v5, v10 ; 3E001505 v_mac_f32_e32 v1, v5, v11 ; 3E021705 v_mac_f32_e32 v2, v5, v12 ; 3E041905 v_mac_f32_e32 v3, v5, v13 ; 3E061B05 v_min_f32_e32 v0, v0, v6 ; 1E000D00 v_min_f32_e32 v1, v1, v7 ; 1E020F01 v_min_f32_e32 v2, v2, v8 ; 1E041102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 3 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %23) %31 = bitcast i32 %30 to float %32 = call i32 @llvm.SI.packf16(float %24, float %25) %33 = bitcast i32 %32 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %31, float %33, float %31, float %33) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %22, float %23, float %24, float %25) %34 = call i32 @llvm.SI.packf16(float %22, float %23) %35 = bitcast i32 %34 to float %36 = call i32 @llvm.SI.packf16(float %24, float %25) %37 = bitcast i32 %36 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 3, i32 1, float %35, float %37, float %35, float %37) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v4, v0, v1 ; 5E080300 v_cvt_pkrtz_f16_f32_e32 v5, v2, v3 ; 5E0A0702 exp 15, 1, 1, 0, 0, v4, v5, v4, v5 ; F800041F 05040504 exp 15, 2, 0, 0, 0, v0, v1, v2, v3 ; F800002F 03020100 exp 15, 3, 1, 0, 0, v4, v5, v4, v5 ; F800043F 05040504 exp 15, 0, 1, 1, 1, v4, v5, v4, v5 ; F8001C0F 05040504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..10], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 12, 28, 44} IMM[5] UINT32 {60, 24, 32, 16} IMM[6] UINT32 {48, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[5].x, TEMP[0].xxxx 90: MOV TEMP[5].y, TEMP[2].xxxx 91: MOV TEMP[5].z, TEMP[3].xxxx 92: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 93: MOV TEMP[5].w, TEMP[8].xxxx 94: MOV TEMP[8].x, TEMP[7].xxxx 95: MOV TEMP[8].y, TEMP[7].yyyy 96: MOV TEMP[8].z, TEMP[7].zzzz 97: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 98: RSQ TEMP[9].x, TEMP[9].xxxx 99: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 100: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 102: MUL TEMP[7].xyz, IMM[2].zzzz, TEMP[7].xyzz 103: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 104: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 105: MOV_SAT TEMP[4].x, TEMP[4].xxxx 106: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[4].x, TEMP[4].xxxx 108: MOV TEMP[8].w, TEMP[4].xxxx 109: MOV TEMP[4].w, IMM[0].wwww 110: MOV TEMP[4].x, TEMP[0].xxxx 111: MOV TEMP[4].y, TEMP[2].xxxx 112: MOV TEMP[4].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].wwww 114: MOV TEMP[0].y, CONST[4][1].wwww 115: MOV TEMP[0].z, CONST[4][2].wwww 116: MOV TEMP[0].w, CONST[4][3].wwww 117: DP4 TEMP[0].x, TEMP[4], TEMP[0] 118: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 119: MOV TEMP[3].xy, IN[3].xyyy 120: MOV TEMP[3].w, IMM[0].xxxx 121: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 122: MOV TEMP[7].x, CONST[4][0].xxxx 123: MOV TEMP[7].y, CONST[4][1].xxxx 124: MOV TEMP[7].z, CONST[4][2].xxxx 125: MOV TEMP[7].w, CONST[4][3].xxxx 126: DP4 TEMP[7].x, TEMP[4], TEMP[7] 127: MOV TEMP[9].x, CONST[4][0].yyyy 128: MOV TEMP[9].y, CONST[4][1].yyyy 129: MOV TEMP[9].z, CONST[4][2].yyyy 130: MOV TEMP[9].w, CONST[4][3].yyyy 131: DP4 TEMP[9].x, TEMP[4], TEMP[9] 132: MOV TEMP[7].y, -TEMP[9].xxxx 133: MOV TEMP[9].x, CONST[4][0].zzzz 134: MOV TEMP[9].y, CONST[4][1].zzzz 135: MOV TEMP[9].z, CONST[4][2].zzzz 136: MOV TEMP[9].w, CONST[4][3].zzzz 137: DP4 TEMP[4].x, TEMP[4], TEMP[9] 138: MAD TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz, -TEMP[0].xxxx 139: MOV TEMP[7].z, TEMP[4].xxxx 140: MOV TEMP[7].w, TEMP[0].xxxx 141: MOV OUT[1], TEMP[1] 142: MOV OUT[2].xy, TEMP[2].xyxx 143: MOV OUT[4], TEMP[8] 144: MOV OUT[6], IMM[0].xxxx 145: MOV OUT[7], TEMP[6] 146: MOV OUT[5], TEMP[3] 147: MOV OUT[0], TEMP[7] 148: MOV OUT[3], TEMP[5] 149: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.load.const(<16 x i8> %42, i32 0) %44 = call float @llvm.SI.load.const(<16 x i8> %42, i32 4) %45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 8) %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %10, %6 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <4 x i32> undef, i32 %77, i32 0 %80 = insertelement <4 x i32> %79, i32 %78, i32 1 %81 = insertelement <4 x i32> %80, i32 0, i32 2 %82 = bitcast <8 x i32> %47 to <32 x i8> %83 = bitcast <4 x i32> %49 to <16 x i8> %84 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %81, <32 x i8> %82, <16 x i8> %83, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = bitcast float %75 to i32 %90 = bitcast float %76 to i32 %91 = insertelement <4 x i32> , i32 %89, i32 1 %92 = insertelement <4 x i32> %91, i32 %90, i32 2 %93 = insertelement <4 x i32> %92, i32 0, i32 3 %94 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %93, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = bitcast float %75 to i32 %100 = bitcast float %76 to i32 %101 = insertelement <4 x i32> , i32 %99, i32 1 %102 = insertelement <4 x i32> %101, i32 %100, i32 2 %103 = insertelement <4 x i32> %102, i32 0, i32 3 %104 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %103, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = extractelement <4 x float> %104, i32 2 %108 = extractelement <4 x float> %104, i32 3 %109 = fmul float %61, 2.550000e+02 %110 = fadd float %109, -1.280000e+02 %111 = fmul float %62, 2.550000e+02 %112 = fadd float %111, -1.280000e+02 %113 = fmul float %63, 2.550000e+02 %114 = fadd float %113, -1.280000e+02 %115 = fmul float %64, 2.550000e+02 %116 = fadd float %115, -1.280000e+02 %117 = fcmp olt float %110, 0.000000e+00 %118 = fcmp olt float %112, 0.000000e+00 %119 = fcmp olt float %114, 0.000000e+00 %120 = fcmp olt float %116, 0.000000e+00 %121 = select i1 %117, float 1.000000e+00, float 0.000000e+00 %122 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %123 = select i1 %120, float 1.000000e+00, float 0.000000e+00 %124 = call float @fabs(float %110) %125 = call float @fabs(float %112) %126 = call float @fabs(float %114) %127 = call float @fabs(float %116) %128 = fsub float %124, %121 %129 = select i1 %118, float -1.000000e+00, float -0.000000e+00 %130 = fadd float %125, %129 %131 = fsub float %126, %122 %132 = fsub float %127, %123 %133 = fadd float %128, -6.400000e+01 %134 = fadd float %130, -6.400000e+01 %135 = fadd float %131, -6.400000e+01 %136 = fadd float %132, -6.400000e+01 %137 = fcmp olt float %133, 0.000000e+00 %138 = fcmp olt float %134, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = fcmp olt float %136, 0.000000e+00 %141 = select i1 %137, float 1.000000e+00, float 0.000000e+00 %142 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %143 = select i1 %139, float 1.000000e+00, float 0.000000e+00 %144 = select i1 %140, float 1.000000e+00, float 0.000000e+00 %145 = call float @fabs(float %133) %146 = call float @fabs(float %134) %147 = call float @fabs(float %135) %148 = call float @fabs(float %136) %149 = fsub float %145, %141 %150 = fsub float %146, %142 %151 = fsub float %147, %143 %152 = fsub float %148, %144 %153 = fmul float %149, 0x3F90410420000000 %154 = fmul float %150, 0x3F90410420000000 %155 = fmul float %151, 0x3F90410420000000 %156 = fmul float %152, 0x3F90410420000000 %157 = fmul float %141, 2.000000e+00 %158 = fmul float %142, 2.000000e+00 %159 = fmul float %143, 2.000000e+00 %160 = fmul float %144, 2.000000e+00 %161 = fsub float 1.000000e+00, %157 %162 = fsub float 1.000000e+00, %158 %163 = fsub float 1.000000e+00, %159 %164 = fsub float 1.000000e+00, %160 %165 = fmul float %121, 2.000000e+00 %166 = fmul float %122, 2.000000e+00 %167 = fmul float %123, 2.000000e+00 %168 = fsub float 1.000000e+00, %165 %169 = fsub float 1.000000e+00, %166 %170 = fsub float 1.000000e+00, %167 %171 = fsub float 1.000000e+00, %153 %172 = fsub float %171, %154 %173 = fmul float %153, %153 %174 = fmul float %154, %154 %175 = fadd float %174, %173 %176 = fmul float %172, %172 %177 = fadd float %175, %176 %178 = call float @llvm.AMDGPU.rsq.clamped.f32(float %177) %179 = fmul float %153, %178 %180 = fmul float %154, %178 %181 = fmul float %172, %178 %182 = fmul float %179, %161 %183 = fmul float %180, %162 %184 = fsub float 1.000000e+00, %155 %185 = fsub float %184, %156 %186 = fmul float %155, %155 %187 = fmul float %156, %156 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %155, %191 %193 = fmul float %156, %191 %194 = fmul float %185, %191 %195 = fmul float %192, %163 %196 = fmul float %193, %164 %197 = fmul float %181, %168 %198 = fmul float %182, %85 %199 = fmul float %183, %86 %200 = fadd float %198, %199 %201 = fmul float %197, %87 %202 = fadd float %200, %201 %203 = fmul float %88, 0.000000e+00 %204 = fadd float %202, %203 %205 = fmul float %182, %95 %206 = fmul float %183, %96 %207 = fadd float %205, %206 %208 = fmul float %197, %97 %209 = fadd float %207, %208 %210 = fmul float %98, 0.000000e+00 %211 = fadd float %209, %210 %212 = fmul float %182, %105 %213 = fmul float %183, %106 %214 = fadd float %212, %213 %215 = fmul float %197, %107 %216 = fadd float %214, %215 %217 = fmul float %108, 0.000000e+00 %218 = fadd float %216, %217 %219 = fmul float %204, %204 %220 = fmul float %211, %211 %221 = fadd float %220, %219 %222 = fmul float %218, %218 %223 = fadd float %221, %222 %224 = call float @llvm.AMDGPU.rsq.clamped.f32(float %223) %225 = fmul float %204, %224 %226 = fmul float %211, %224 %227 = fmul float %218, %224 %228 = fmul float %194, %169 %229 = fmul float %195, %85 %230 = fmul float %196, %86 %231 = fadd float %229, %230 %232 = fmul float %228, %87 %233 = fadd float %231, %232 %234 = fmul float %88, 0.000000e+00 %235 = fadd float %233, %234 %236 = fmul float %195, %95 %237 = fmul float %196, %96 %238 = fadd float %236, %237 %239 = fmul float %228, %97 %240 = fadd float %238, %239 %241 = fmul float %98, 0.000000e+00 %242 = fadd float %240, %241 %243 = fmul float %195, %105 %244 = fmul float %196, %106 %245 = fadd float %243, %244 %246 = fmul float %228, %107 %247 = fadd float %245, %246 %248 = fmul float %108, 0.000000e+00 %249 = fadd float %247, %248 %250 = fmul float %235, %225 %251 = fmul float %242, %226 %252 = fadd float %251, %250 %253 = fmul float %249, %227 %254 = fadd float %252, %253 %255 = fmul float %254, %225 %256 = fmul float %254, %226 %257 = fmul float %254, %227 %258 = fsub float %235, %255 %259 = fsub float %242, %256 %260 = fsub float %249, %257 %261 = fmul float %258, %258 %262 = fmul float %259, %259 %263 = fadd float %262, %261 %264 = fmul float %260, %260 %265 = fadd float %263, %264 %266 = call float @llvm.AMDGPU.rsq.clamped.f32(float %265) %267 = fmul float %258, %266 %268 = fmul float %259, %266 %269 = fmul float %260, %266 %270 = fmul float %54, %85 %271 = fmul float %55, %86 %272 = fadd float %270, %271 %273 = fmul float %56, %87 %274 = fadd float %272, %273 %275 = fadd float %274, %88 %276 = fmul float %54, %95 %277 = fmul float %55, %96 %278 = fadd float %276, %277 %279 = fmul float %56, %97 %280 = fadd float %278, %279 %281 = fadd float %280, %98 %282 = fmul float %54, %105 %283 = fmul float %55, %106 %284 = fadd float %282, %283 %285 = fmul float %56, %107 %286 = fadd float %284, %285 %287 = fadd float %286, %108 %288 = fsub float %275, %35 %289 = fsub float %281, %36 %290 = fsub float %287, %37 %291 = fmul float %38, %288 %292 = fmul float %39, %289 %293 = fadd float %292, %291 %294 = fmul float %40, %290 %295 = fadd float %293, %294 %296 = fmul float %288, %288 %297 = fmul float %289, %289 %298 = fadd float %297, %296 %299 = fmul float %290, %290 %300 = fadd float %298, %299 %301 = call float @llvm.AMDGPU.rsq.clamped.f32(float %300) %302 = fmul float %288, %301 %303 = fmul float %289, %301 %304 = fmul float %290, %301 %305 = fmul float %225, %43 %306 = fmul float %226, %44 %307 = fadd float %306, %305 %308 = fmul float %227, %45 %309 = fadd float %307, %308 %310 = fmul float %309, %225 %311 = fmul float %309, %226 %312 = fmul float %309, %227 %313 = fmul float %310, 2.000000e+00 %314 = fmul float %311, 2.000000e+00 %315 = fmul float %312, 2.000000e+00 %316 = fsub float %43, %313 %317 = fsub float %44, %314 %318 = fsub float %45, %315 %319 = fmul float %302, %316 %320 = fsub float -0.000000e+00, %319 %321 = fmul float %303, %317 %322 = fsub float %320, %321 %323 = fmul float %304, %318 %324 = fsub float %322, %323 %325 = call float @llvm.AMDIL.clamp.(float %324, float 0.000000e+00, float 1.000000e+00) %326 = call float @llvm.pow.f32(float %325, float 1.600000e+01) %327 = call float @llvm.AMDIL.clamp.(float %326, float 0.000000e+00, float 1.000000e+00) %328 = fmul float %275, %22 %329 = fmul float %281, %26 %330 = fadd float %328, %329 %331 = fmul float %287, %30 %332 = fadd float %330, %331 %333 = fadd float %332, %34 %334 = fmul float %69, %13 %335 = fadd float %334, %15 %336 = fmul float %70, %14 %337 = fadd float %336, %16 %338 = bitcast float %75 to i32 %339 = bitcast float %76 to i32 %340 = insertelement <4 x i32> , i32 %338, i32 1 %341 = insertelement <4 x i32> %340, i32 %339, i32 2 %342 = insertelement <4 x i32> %341, i32 0, i32 3 %343 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %342, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = extractelement <4 x float> %343, i32 3 %348 = fmul float %275, %19 %349 = fmul float %281, %23 %350 = fadd float %348, %349 %351 = fmul float %287, %27 %352 = fadd float %350, %351 %353 = fadd float %352, %31 %354 = fmul float %275, %20 %355 = fmul float %281, %24 %356 = fadd float %354, %355 %357 = fmul float %287, %28 %358 = fadd float %356, %357 %359 = fadd float %358, %32 %360 = fsub float -0.000000e+00, %359 %361 = fmul float %275, %21 %362 = fmul float %281, %25 %363 = fadd float %361, %362 %364 = fmul float %287, %29 %365 = fadd float %363, %364 %366 = fadd float %365, %33 %367 = fmul float %366, 2.000000e+00 %368 = fsub float %367, %333 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %335, float %337, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %275, float %281, float %287, float %295) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %225, float %226, float %227, float %327) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %344, float %345, float %346, float %347) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %267, float %268, float %269, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %353, float %360, float %368, float %333) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[20:23], v3, s[8:11], 0 idxen ; E00C2000 80021403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v22, 0 ; 7E2C0280 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s23, s[44:47], 0x4d ; C20BAD4D s_buffer_load_dword s22, s[44:47], 0x4e ; C20B2D4E s_buffer_load_dword s14, s[44:47], 0x50 ; C2072D50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s26, s[40:43], 0x6 ; C20D2906 s_buffer_load_dword s25, s[40:43], 0x7 ; C20CA907 s_buffer_load_dword s27, s[40:43], 0x8 ; C20DA908 s_buffer_load_dword s40, s[40:43], 0x9 ; C2142909 s_buffer_load_dword s17, s[44:47], 0x51 ; C208AD51 s_buffer_load_dword s18, s[44:47], 0x52 ; C2092D52 s_buffer_load_dword s6, s[44:47], 0x5 ; C2032D05 s_buffer_load_dword s7, s[44:47], 0x6 ; C203AD06 s_buffer_load_dword s11, s[44:47], 0x7 ; C205AD07 s_buffer_load_dword s4, s[44:47], 0x8 ; C2022D08 s_buffer_load_dword s3, s[44:47], 0x9 ; C201AD09 s_buffer_load_dword s8, s[44:47], 0x0 ; C2042D00 s_buffer_load_dword s9, s[44:47], 0x1 ; C204AD01 s_buffer_load_dword s10, s[44:47], 0x2 ; C2052D02 s_buffer_load_dword s12, s[44:47], 0x3 ; C2062D03 s_buffer_load_dword s15, s[44:47], 0x4 ; C207AD04 s_buffer_load_dword s13, s[44:47], 0xa ; C206AD0A s_buffer_load_dword s16, s[44:47], 0xb ; C2082D0B s_buffer_load_dword s5, s[44:47], 0xc ; C202AD0C s_buffer_load_dword s2, s[44:47], 0xd ; C2012D0D s_buffer_load_dword s1, s[44:47], 0xe ; C200AD0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s27 ; 7E00021B v_mov_b32_e32 v3, s40 ; 7E060228 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[32:39], s[28:31] ; F0900F00 00E81714 v_mov_b32_e32 v19, 0x10001 ; 7E2602FF 00010001 image_sample_l_o v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81B13 v_mov_b32_e32 v19, 0x20002 ; 7E2602FF 00020002 image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81F13 v_mov_b32_e32 v19, 0x30003 ; 7E2602FF 00030003 image_sample_l_o v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81013 exp 15, 32, 0, 0, 0, v22, v22, v22, v22 ; F800020F 16161616 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v20, v24, v8 ; 10281118 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mad_f32 v11, v2, v11, v1 ; D282000B 04061702 v_mad_f32 v12, v2, v12, v1 ; D282000C 04061902 v_mac_f32_e32 v1, v2, v13 ; 3E021B02 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mac_f32_e32 v3, s25, v15 ; 3E061E19 v_mac_f32_e32 v20, v23, v7 ; 3E280F17 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v2, v28, v8 ; 1004111C v_mac_f32_e32 v2, v27, v7 ; 3E040F1B s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v32, v8 ; 10101120 v_mac_f32_e32 v8, v31, v7 ; 3E100F1F v_mac_f32_e32 v20, v25, v9 ; 3E281319 v_mac_f32_e32 v2, v29, v9 ; 3E04131D v_mac_f32_e32 v8, v33, v9 ; 3E101321 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v4, |v11|, v4 ; D2060104 0002090B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_sub_f32_e64 v9, |v10|, v7 ; D2080109 00020F0A v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v11, |v12|, v10 ; D208010B 0002150C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480 v_sub_f32_e64 v1, |v1|, v12 ; D2080101 00021901 v_add_f32_e32 v9, v5, v9 ; 06121305 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v11, v5, v11 ; 06161705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v9, |v9|, v5 ; D2080109 00020B09 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_sub_f32_e64 v4, |v4|, v13 ; D2080104 00021B04 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v14, 0, 1.0, vcc ; D200000E 01A9E480 v_sub_f32_e64 v11, |v11|, v14 ; D208010B 00021D0B v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 v_sub_f32_e64 v1, |v1|, v15 ; D2080101 00021F01 v_mul_f32_e32 v21, v6, v9 ; 102A1306 v_mad_f32 v9, -v9, v6, 1.0 ; D2820009 23CA0D09 v_mad_f32 v9, -v4, v6, v9 ; D2820009 24260D04 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v35, v6, v11 ; 10461706 v_mad_f32 v11, -v11, v6, 1.0 ; D282000B 23CA0D0B v_mad_f32 v11, -v1, v6, v11 ; D282000B 242E0D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v21, v21 ; 100C2B15 v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v9, v9 ; 3E0C1309 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v36, v35, v35 ; 10484723 v_mac_f32_e32 v36, v1, v1 ; 3E480301 v_mac_f32_e32 v36, v11, v11 ; 3E48170B v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v21, v6, v21 ; 102A2B06 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v9 ; 100C1306 v_mul_f32_e32 v9, v36, v35 ; 10124724 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v11, v36, v11 ; 10161724 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v21 ; 100A2B05 v_mad_f32 v13, -2.0, v13, 1.0 ; D282000D 03CA1AF5 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mad_f32 v13, -2.0, v14, 1.0 ; D282000D 03CA1CF5 v_mul_f32_e32 v9, v13, v9 ; 1012130D v_mad_f32 v13, -2.0, v15, 1.0 ; D282000D 03CA1EF5 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mad_f32 v7, -2.0, v7, 1.0 ; D2820007 03CA0EF5 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v7, -2.0, v10, 1.0 ; D2820007 03CA14F5 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_add_f32_e32 v10, v26, v20 ; 0614291A v_mul_f32_e32 v11, v24, v4 ; 10160918 v_mac_f32_e32 v11, v23, v5 ; 3E160B17 v_mul_f32_e32 v13, v24, v1 ; 101A0318 v_mac_f32_e32 v13, v23, v9 ; 3E1A1317 v_mac_f32_e32 v11, v25, v6 ; 3E160D19 v_mac_f32_e32 v13, v25, v7 ; 3E1A0F19 v_mac_f32_e32 v11, 0, v26 ; 3E163480 v_mac_f32_e32 v13, 0, v26 ; 3E1A3480 v_add_f32_e32 v2, v30, v2 ; 0604051E v_mul_f32_e32 v14, v28, v4 ; 101C091C v_mac_f32_e32 v14, v27, v5 ; 3E1C0B1B v_mul_f32_e32 v15, v28, v1 ; 101E031C v_mac_f32_e32 v15, v27, v9 ; 3E1E131B v_mac_f32_e32 v14, v29, v6 ; 3E1C0D1D v_mac_f32_e32 v15, v29, v7 ; 3E1E0F1D v_mac_f32_e32 v14, 0, v30 ; 3E1C3C80 v_mac_f32_e32 v15, 0, v30 ; 3E1E3C80 v_mul_f32_e32 v4, v32, v4 ; 10080920 v_mac_f32_e32 v4, v31, v5 ; 3E080B1F v_mul_f32_e32 v1, v32, v1 ; 10020320 v_mac_f32_e32 v1, v31, v9 ; 3E02131F v_mac_f32_e32 v4, v33, v6 ; 3E080D21 v_mac_f32_e32 v1, v33, v7 ; 3E020F21 v_mac_f32_e32 v4, 0, v34 ; 3E084480 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v14, v14 ; 3E0A1D0E v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_add_f32_e32 v6, v34, v8 ; 060C1122 v_mac_f32_e32 v1, 0, v34 ; 3E024480 exp 15, 33, 0, 0, 0, v0, v3, v22, v22 ; F800021F 16160300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, v5, v14 ; 10061D05 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_subrev_f32_e32 v5, s24, v10 ; 0A0A1418 v_subrev_f32_e32 v7, s23, v2 ; 0A0E0417 v_mul_f32_e32 v8, v5, v5 ; 10100B05 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_subrev_f32_e32 v9, s22, v6 ; 0A120C16 v_mac_f32_e32 v8, v9, v9 ; 3E101309 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v11, s21, v0 ; 10160015 v_mac_f32_e32 v11, s20, v3 ; 3E160614 v_mac_f32_e32 v11, s19, v4 ; 3E160813 v_mul_f32_e32 v14, v0, v11 ; 101C1700 v_mad_f32 v14, -2.0, v14, s21 ; D282000E 00561CF5 v_mul_f32_e32 v20, v8, v5 ; 10280B08 v_mul_f32_e32 v14, v14, v20 ; 101C290E v_mul_f32_e32 v20, v3, v11 ; 10281703 v_mad_f32 v20, -2.0, v20, s20 ; D2820014 005228F5 v_mul_f32_e32 v21, v8, v7 ; 102A0F08 v_mad_f32 v14, -v21, v20, -v14 ; D282000E A43A2915 v_mul_f32_e32 v11, v4, v11 ; 10161704 v_mad_f32 v11, -2.0, v11, s19 ; D282000B 004E16F5 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mad_f32 v8, -v8, v11, v14 ; D2820008 243A1708 v_mul_f32_e32 v5, s14, v5 ; 100A0A0E v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_log_f32_e32 v8, v8 ; 7E104F08 v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11 v_mac_f32_e32 v5, s18, v9 ; 3E0A1212 exp 15, 34, 0, 0, 0, v10, v2, v6, v5 ; F800022F 0506020A s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v5, 0x41800000, v8 ; 0E0A10FF 41800000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 exp 15, 35, 0, 0, 0, v0, v3, v4, v5 ; F800023F 05040300 exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s11, v2 ; 100A040B v_mul_f32_e32 v7, s15, v2 ; 100E040F v_mul_f32_e32 v8, s6, v2 ; 10100406 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v5, s12, v10 ; 3E0A140C v_mac_f32_e32 v7, s8, v10 ; 3E0E1408 v_mac_f32_e32 v8, s9, v10 ; 3E101409 v_mac_f32_e32 v2, s10, v10 ; 3E04140A v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10 v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04 v_mac_f32_e32 v8, s3, v6 ; 3E100C03 v_mac_f32_e32 v2, s13, v6 ; 3E040C0D v_mul_f32_e32 v6, v0, v13 ; 100C1B00 v_mac_f32_e32 v6, v3, v15 ; 3E0C1F03 v_mac_f32_e32 v6, v4, v1 ; 3E0C0304 v_mad_f32 v0, -v6, v0, v13 ; D2820000 24360106 v_mad_f32 v3, -v6, v3, v15 ; D2820003 243E0706 v_mad_f32 v1, -v6, v4, v1 ; D2820001 24060906 v_add_f32_e32 v4, s0, v5 ; 06080A00 v_add_f32_e32 v5, s5, v7 ; 060A0E05 v_mul_f32_e32 v6, v0, v0 ; 100C0100 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_add_f32_e32 v7, s2, v8 ; 060E1002 v_mad_f32 v8, -2.0, v12, 1.0 ; D2820008 03CA18F5 v_add_f32_e32 v2, s1, v2 ; 06040401 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v1, v6, v1 ; 10020306 exp 15, 37, 0, 0, 0, v22, v22, v22, v22 ; F800025F 16161616 exp 15, 38, 0, 0, 0, v0, v3, v1, v8 ; F800026F 08010300 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v7 ; 3A000EFF 80000000 v_mad_f32 v1, 2.0, v2, -v4 ; D2820001 841204F4 exp 15, 12, 0, 0, 0, v5, v0, v1, v4 ; F80000CF 04010005 exp 15, 13, 0, 1, 0, v22, v22, v22, v22 ; F80008DF 16161616 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 1284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..9], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} IMM[3] UINT32 {4, 0, 0, 0} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 6: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[3].xyzz 7: MOV TEMP[3].xy, IN[0].xyyy 8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 9: MUL TEMP[4].x, CONST[1][3].xxxx, TEMP[3].xxxx 10: MUL TEMP[5].x, CONST[1][2].wwww, TEMP[3].zzzz 11: MUL TEMP[6].xyz, TEMP[0].zxyy, IN[4].yzxx 12: MAD TEMP[6].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[6].xyzz 13: MOV TEMP[7].xy, IN[0].xyyy 14: TEX TEMP[7].yw, TEMP[7], SAMP[2], 2D 15: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[1].xxxx, IMM[1].yyyy 16: MOV TEMP[8].x, TEMP[7].xxxx 17: MOV TEMP[8].y, -TEMP[7].yyyy 18: MUL TEMP[8].xy, TEMP[8].xyyy, CONST[1][0].xxxx 19: MOV TEMP[9].x, TEMP[8].xxxx 20: MOV TEMP[9].y, TEMP[8].yyyy 21: DP2 TEMP[7].x, TEMP[7].xyyy, TEMP[7].xyyy 22: ADD TEMP[7].x, IMM[1].zzzz, -TEMP[7].xxxx 23: MOV_SAT TEMP[7].x, TEMP[7].xxxx 24: SQRT TEMP[7].x, TEMP[7].xxxx 25: MOV TEMP[9].z, TEMP[7].xxxx 26: DP3 TEMP[7].x, TEMP[9].xyzz, TEMP[9].xyzz 27: RSQ TEMP[7].x, TEMP[7].xxxx 28: MUL TEMP[7].xyz, TEMP[9].xyzz, TEMP[7].xxxx 29: DP3 TEMP[8].x, IN[4].xyzz, IN[4].xyzz 30: RSQ TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].xyz, IN[4].xyzz, TEMP[8].xxxx 32: DP3 TEMP[9].x, TEMP[6].xyzz, TEMP[6].xyzz 33: RSQ TEMP[9].x, TEMP[9].xxxx 34: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[9].xxxx 35: MUL TEMP[6].xyz, IN[4].wwww, TEMP[6].xyzz 36: MUL TEMP[6].xyz, TEMP[7].yyyy, TEMP[6].xyzz 37: MAD TEMP[6].xyz, TEMP[7].xxxx, TEMP[8].xyzz, TEMP[6].xyzz 38: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].zzzz, TEMP[6].xyzz 39: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[0].xyzz 40: RSQ TEMP[6].x, TEMP[6].xxxx 41: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xxxx 42: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 43: MOV TEMP[6].w, IMM[2].xxxx 44: MOV TEMP[6].x, TEMP[0].xxxx 45: MOV TEMP[6].y, TEMP[0].yyyy 46: MOV TEMP[6].z, TEMP[0].zzzz 47: FSLT TEMP[0].x, TEMP[1].wwww, CONST[1][0].yyyy 48: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 49: KILL_IF -TEMP[0].xxxx 50: MOV TEMP[0].w, IMM[2].xxxx 51: MOV TEMP[0].x, TEMP[2].xxxx 52: MOV TEMP[0].y, TEMP[2].yyyy 53: MOV TEMP[0].z, TEMP[2].zzzz 54: MOV TEMP[1].w, IMM[2].xxxx 55: MOV TEMP[1].x, TEMP[4].xxxx 56: MOV TEMP[1].y, TEMP[3].yyyy 57: MOV TEMP[1].z, TEMP[5].xxxx 58: MOV OUT[2], IN[1].wwww 59: MOV OUT[0], TEMP[0] 60: MOV OUT[3], TEMP[6] 61: MOV OUT[1], TEMP[1] 62: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %60 = fmul float %50, %50 %61 = fmul float %51, %51 %62 = fadd float %61, %60 %63 = fmul float %52, %52 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %50, %65 %67 = fmul float %51, %65 %68 = fmul float %52, %65 %69 = bitcast float %47 to i32 %70 = bitcast float %48 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %32, <16 x i8> %34, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = extractelement <4 x float> %73, i32 3 %78 = fmul float %26, %74 %79 = fmul float %27, %75 %80 = fmul float %28, %76 %81 = fmul float %78, %53 %82 = fmul float %79, %54 %83 = fmul float %80, %55 %84 = bitcast float %47 to i32 %85 = bitcast float %48 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %37, <16 x i8> %40, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = fmul float %30, %89 %93 = fmul float %29, %91 %94 = fmul float %68, %57 %95 = fmul float %66, %58 %96 = fmul float %67, %56 %97 = fmul float %67, %58 %98 = fsub float %97, %94 %99 = fmul float %68, %56 %100 = fsub float %99, %95 %101 = fmul float %66, %57 %102 = fsub float %101, %96 %103 = bitcast float %47 to i32 %104 = bitcast float %48 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %43, <16 x i8> %46, i32 2) %108 = extractelement <4 x float> %107, i32 1 %109 = extractelement <4 x float> %107, i32 3 %110 = fmul float %109, 2.000000e+00 %111 = fadd float %110, -1.000000e+00 %112 = fmul float %108, 2.000000e+00 %113 = fadd float %112, -1.000000e+00 %114 = fmul float %111, %24 %115 = fmul float %113, %24 %116 = fmul float %111, %111 %117 = fmul float %113, %113 %118 = fadd float %116, %117 %119 = fsub float 1.000000e+00, %118 %120 = call float @llvm.AMDIL.clamp.(float %119, float 0.000000e+00, float 1.000000e+00) %121 = call float @llvm.sqrt.f32(float %120) %122 = fmul float %114, %114 %123 = fmul float %115, %115 %124 = fadd float %123, %122 %125 = fmul float %121, %121 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = fmul float %114, %127 %129 = fmul float %115, %127 %130 = fsub float -0.000000e+00, %129 %131 = fmul float %121, %127 %132 = fmul float %56, %56 %133 = fmul float %57, %57 %134 = fadd float %133, %132 %135 = fmul float %58, %58 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = fmul float %56, %137 %139 = fmul float %57, %137 %140 = fmul float %58, %137 %141 = fmul float %98, %98 %142 = fmul float %100, %100 %143 = fadd float %142, %141 %144 = fmul float %102, %102 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %98, %146 %148 = fmul float %100, %146 %149 = fmul float %102, %146 %150 = fmul float %59, %147 %151 = fmul float %59, %148 %152 = fmul float %59, %149 %153 = fmul float %150, %130 %154 = fmul float %151, %130 %155 = fmul float %152, %130 %156 = fmul float %128, %138 %157 = fadd float %156, %153 %158 = fmul float %128, %139 %159 = fadd float %158, %154 %160 = fmul float %128, %140 %161 = fadd float %160, %155 %162 = fmul float %66, %131 %163 = fadd float %162, %157 %164 = fmul float %67, %131 %165 = fadd float %164, %159 %166 = fmul float %68, %131 %167 = fadd float %166, %161 %168 = fmul float %163, %163 %169 = fmul float %165, %165 %170 = fadd float %169, %168 %171 = fmul float %167, %167 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = fmul float %163, %173 %175 = fmul float %165, %173 %176 = fmul float %167, %173 %177 = fmul float %174, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %175, 5.000000e-01 %180 = fadd float %179, 5.000000e-01 %181 = fmul float %176, 5.000000e-01 %182 = fadd float %181, 5.000000e-01 %183 = fcmp olt float %77, %25 %184 = select i1 %183, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %184) %185 = call i32 @llvm.SI.packf16(float %81, float %82) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %83, float 0.000000e+00) %188 = bitcast i32 %187 to float %189 = call i32 @llvm.SI.packf16(float %92, float %90) %190 = bitcast i32 %189 to float %191 = call i32 @llvm.SI.packf16(float %93, float 0.000000e+00) %192 = bitcast i32 %191 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %186, float %188, float %186, float %188) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %190, float %192, float %190, float %192) %193 = call i32 @llvm.SI.packf16(float %178, float %180) %194 = bitcast i32 %193 to float %195 = call i32 @llvm.SI.packf16(float %182, float 0.000000e+00) %196 = bitcast i32 %195 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %49, float %49, float %49, float %49) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %194, float %196, float %194, float %196) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[0:3], 0x4 ; C2160104 s_buffer_load_dword s45, s[0:3], 0x5 ; C2168105 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s46, s[0:3], 0x1 ; C2170101 s_buffer_load_dword s47, s[0:3], 0x6 ; C2178106 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800F00 01090E02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s44, v14 ; 10021C2C v_mul_f32_e32 v14, s45, v15 ; 101C1E2D v_mul_f32_e32 v15, v5, v5 ; 101E0B05 v_mac_f32_e32 v15, v6, v6 ; 3E1E0D06 v_mac_f32_e32 v15, v7, v7 ; 3E1E0F07 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v16, s47, v16 ; 1020202F v_cmp_gt_f32_e32 vcc, s46, v17 ; 7C08222E image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441102 image_sample v[2:3], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660202 v_mul_f32_e32 v5, v15, v5 ; 100A0B0F v_mul_f32_e32 v6, v15, v6 ; 100C0D0F v_mul_f32_e32 v7, v15, v7 ; 100E0F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mul_f32_e32 v15, v12, v7 ; 101E0F0C v_mad_f32 v15, v6, v13, -v15 ; D282000F 843E1B06 v_mul_f32_e32 v20, v13, v5 ; 10280B0D v_mad_f32 v20, v7, v11, -v20 ; D2820014 84521707 v_mul_f32_e32 v21, v11, v6 ; 102A0D0B v_mad_f32 v21, v5, v12, -v21 ; D2820015 84561905 v_mul_f32_e32 v22, v11, v11 ; 102C170B v_mac_f32_e32 v22, v12, v12 ; 3E2C190C v_mac_f32_e32 v22, v13, v13 ; 3E2C1B0D v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v23, v15, v15 ; 102E1F0F v_mac_f32_e32 v23, v20, v20 ; 3E2E2914 v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v11, v22, v11 ; 10161716 v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mul_f32_e32 v13, v22, v13 ; 101A1B16 v_mul_f32_e32 v15, v23, v15 ; 101E1F17 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v21, v23, v21 ; 102A2B17 v_mad_f32 v22, -v2, v2, 1.0 ; D2820016 23CA0502 v_mad_f32 v22, -v3, v3, v22 ; D2820016 245A0703 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_sqrt_f32_e32 v22, v22 ; 7E2C6716 v_mul_f32_e32 v23, v3, v3 ; 102E0703 v_mac_f32_e32 v23, v2, v2 ; 3E2E0502 v_mac_f32_e32 v23, v22, v22 ; 3E2E2D16 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v15, v15, v0 ; 101E010F v_mul_f32_e32 v20, v20, v0 ; 10280114 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mul_f32_e32 v15, v2, v15 ; 101E1F02 v_mul_f32_e32 v20, v2, v20 ; 10282902 v_mul_f32_e32 v0, v2, v0 ; 10000102 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s0, s[0:3], 0xb ; C200010B v_mul_f32_e32 v2, v23, v3 ; 10040717 v_mad_f32 v3, v2, v11, -v15 ; D2820003 843E1702 v_mad_f32 v11, v2, v12, -v20 ; D282000B 84521902 v_mad_f32 v0, v2, v13, -v0 ; D2820000 84021B02 v_mul_f32_e32 v2, v23, v22 ; 10042D17 v_mac_f32_e32 v3, v2, v5 ; 3E060B02 v_mac_f32_e32 v11, v2, v6 ; 3E160D02 v_mac_f32_e32 v0, v2, v7 ; 3E000F02 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v17 ; 10042204 v_mul_f32_e32 v5, s0, v19 ; 100A2600 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v11, v11 ; 3E0C170B v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v7, v9, v14 ; 100E1D09 v_mul_f32_e32 v8, v10, v16 ; 1010210A v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v9, v6, v11 ; 10121706 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v6, 0.5, v9, 0.5 ; D2820006 03C212F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cndmask_b32_e64 v9, 0, -1.0, vcc ; D2000009 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v9 ; 7C261280 v_cvt_pkrtz_f16_f32_e32 v2, v2, v18 ; 5E042502 v_cvt_pkrtz_f16_f32_e32 v1, v1, v7 ; 5E020F01 v_cvt_pkrtz_f16_f32_e64 v7, v8, 0 ; D25E0007 00010108 exp 15, 0, 1, 0, 0, v1, v7, v1, v7 ; F800040F 07010701 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v5, 0 ; D25E0001 00010105 exp 15, 1, 1, 0, 0, v2, v1, v2, v1 ; F800041F 01020102 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v1, v3, v6 ; 5E020D03 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 652 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1].xyz, TEMP[2], IN[3] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[5].x, TEMP[4].xxxx 16: MOV TEMP[5].y, -TEMP[4].yyyy 17: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 18: MOV TEMP[6].x, TEMP[5].xxxx 19: MOV TEMP[6].y, TEMP[5].yyyy 20: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 21: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 22: MOV_SAT TEMP[4].x, TEMP[4].xxxx 23: SQRT TEMP[4].x, TEMP[4].xxxx 24: MOV TEMP[6].z, TEMP[4].xxxx 25: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 26: RSQ TEMP[4].x, TEMP[4].xxxx 27: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 28: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 29: RSQ TEMP[5].x, TEMP[5].xxxx 30: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[6].x, TEMP[6].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 34: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MOV TEMP[3].w, IMM[2].xxxx 43: MOV TEMP[3].x, TEMP[0].xxxx 44: MOV TEMP[3].y, TEMP[0].yyyy 45: MOV TEMP[3].z, TEMP[0].zzzz 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[1].xxxx 48: MOV TEMP[0].y, TEMP[1].yyyy 49: MOV TEMP[0].z, TEMP[1].zzzz 50: MOV TEMP[1].w, IMM[2].xxxx 51: MUL TEMP[4].x, CONST[1][3].xxxx, TEMP[2].xxxx 52: MUL TEMP[1].x, TEMP[4].xxxx, IN[3].wwww 53: MOV TEMP[1].y, TEMP[2].yyyy 54: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 55: MOV TEMP[1].z, TEMP[2].xxxx 56: MOV OUT[2], IN[1].wwww 57: MOV OUT[0], TEMP[0] 58: MOV OUT[3], TEMP[3] 59: MOV OUT[1], TEMP[1] 60: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %60 = fmul float %49, %49 %61 = fmul float %50, %50 %62 = fadd float %61, %60 %63 = fmul float %51, %51 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %49, %65 %67 = fmul float %50, %65 %68 = fmul float %51, %65 %69 = bitcast float %46 to i32 %70 = bitcast float %47 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %31, <16 x i8> %33, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %25, %74 %78 = fmul float %26, %75 %79 = fmul float %27, %76 %80 = fmul float %77, %52 %81 = fmul float %78, %53 %82 = fmul float %79, %54 %83 = bitcast float %46 to i32 %84 = bitcast float %47 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %36, <16 x i8> %39, i32 2) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = fmul float %68, %57 %92 = fmul float %66, %58 %93 = fmul float %67, %56 %94 = fmul float %67, %58 %95 = fsub float %94, %91 %96 = fmul float %68, %56 %97 = fsub float %96, %92 %98 = fmul float %66, %57 %99 = fsub float %98, %93 %100 = bitcast float %46 to i32 %101 = bitcast float %47 to i32 %102 = insertelement <2 x i32> undef, i32 %100, i32 0 %103 = insertelement <2 x i32> %102, i32 %101, i32 1 %104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %42, <16 x i8> %45, i32 2) %105 = extractelement <4 x float> %104, i32 1 %106 = extractelement <4 x float> %104, i32 3 %107 = fmul float %106, 2.000000e+00 %108 = fadd float %107, -1.000000e+00 %109 = fmul float %105, 2.000000e+00 %110 = fadd float %109, -1.000000e+00 %111 = fmul float %108, %24 %112 = fmul float %110, %24 %113 = fmul float %108, %108 %114 = fmul float %110, %110 %115 = fadd float %113, %114 %116 = fsub float 1.000000e+00, %115 %117 = call float @llvm.AMDIL.clamp.(float %116, float 0.000000e+00, float 1.000000e+00) %118 = call float @llvm.sqrt.f32(float %117) %119 = fmul float %111, %111 %120 = fmul float %112, %112 %121 = fadd float %120, %119 %122 = fmul float %118, %118 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = fmul float %111, %124 %126 = fmul float %112, %124 %127 = fsub float -0.000000e+00, %126 %128 = fmul float %118, %124 %129 = fmul float %56, %56 %130 = fmul float %57, %57 %131 = fadd float %130, %129 %132 = fmul float %58, %58 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = fmul float %56, %134 %136 = fmul float %57, %134 %137 = fmul float %58, %134 %138 = fmul float %95, %95 %139 = fmul float %97, %97 %140 = fadd float %139, %138 %141 = fmul float %99, %99 %142 = fadd float %140, %141 %143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142) %144 = fmul float %95, %143 %145 = fmul float %97, %143 %146 = fmul float %99, %143 %147 = fmul float %59, %144 %148 = fmul float %59, %145 %149 = fmul float %59, %146 %150 = fmul float %147, %127 %151 = fmul float %148, %127 %152 = fmul float %149, %127 %153 = fmul float %125, %135 %154 = fadd float %153, %150 %155 = fmul float %125, %136 %156 = fadd float %155, %151 %157 = fmul float %125, %137 %158 = fadd float %157, %152 %159 = fmul float %66, %128 %160 = fadd float %159, %154 %161 = fmul float %67, %128 %162 = fadd float %161, %156 %163 = fmul float %68, %128 %164 = fadd float %163, %158 %165 = fmul float %160, %160 %166 = fmul float %162, %162 %167 = fadd float %166, %165 %168 = fmul float %164, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = fmul float %160, %170 %172 = fmul float %162, %170 %173 = fmul float %164, %170 %174 = fmul float %171, 5.000000e-01 %175 = fadd float %174, 5.000000e-01 %176 = fmul float %172, 5.000000e-01 %177 = fadd float %176, 5.000000e-01 %178 = fmul float %173, 5.000000e-01 %179 = fadd float %178, 5.000000e-01 %180 = fmul float %29, %88 %181 = fmul float %180, %55 %182 = fmul float %28, %90 %183 = call i32 @llvm.SI.packf16(float %80, float %81) %184 = bitcast i32 %183 to float %185 = call i32 @llvm.SI.packf16(float %82, float 0.000000e+00) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %181, float %89) %188 = bitcast i32 %187 to float %189 = call i32 @llvm.SI.packf16(float %182, float 0.000000e+00) %190 = bitcast i32 %189 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %184, float %186, float %184, float %186) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %188, float %190, float %188, float %190) %191 = call i32 @llvm.SI.packf16(float %175, float %177) %192 = bitcast i32 %191 to float %193 = call i32 @llvm.SI.packf16(float %179, float 0.000000e+00) %194 = bitcast i32 %193 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %48, float %48, float %48, float %48) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %192, float %194, float %192, float %194) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 3, 3, [m0] ; C82C0F00 v_interp_p2_f32 v11, [v11], v1, 3, 3, [m0] ; C82D0F01 v_interp_p1_f32 v12, v0, 0, 4, [m0] ; C8301000 v_interp_p2_f32 v12, [v12], v1, 0, 4, [m0] ; C8311001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v14, v0, 2, 4, [m0] ; C8381200 v_interp_p2_f32 v14, [v14], v1, 2, 4, [m0] ; C8391201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450F02 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[12:15] ; F0800700 00671202 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800A00 00890102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v3, s4, v15 ; 10061E04 v_mul_f32_e32 v15, s5, v16 ; 101E2005 v_mul_f32_e32 v16, s6, v17 ; 10202206 v_mul_f32_e32 v17, v5, v5 ; 10220B05 v_mac_f32_e32 v17, v6, v6 ; 3E220D06 v_mac_f32_e32 v17, v7, v7 ; 3E220F07 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s7, v18 ; 10242407 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mul_f32_e32 v18, s0, v20 ; 10242800 v_cvt_pkrtz_f16_f32_e32 v11, v11, v19 ; 5E16270B v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v6, v17, v6 ; 100C0D11 v_mul_f32_e32 v7, v17, v7 ; 100E0F11 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v17, v13, v7 ; 10220F0D v_mad_f32 v17, v6, v14, -v17 ; D2820011 84461D06 v_mul_f32_e32 v19, v14, v5 ; 10260B0E v_mad_f32 v19, v7, v12, -v19 ; D2820013 844E1907 v_mul_f32_e32 v20, v12, v6 ; 10280D0C v_mad_f32 v20, v5, v13, -v20 ; D2820014 84521B05 v_mul_f32_e32 v21, v12, v12 ; 102A190C v_mac_f32_e32 v21, v13, v13 ; 3E2A1B0D v_mac_f32_e32 v21, v14, v14 ; 3E2A1D0E v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v22, v17, v17 ; 102C2311 v_mac_f32_e32 v22, v19, v19 ; 3E2C2713 v_mac_f32_e32 v22, v20, v20 ; 3E2C2914 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v13, v21, v13 ; 101A1B15 v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v17, v22, v17 ; 10222316 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mad_f32 v21, -v1, v1, 1.0 ; D2820015 23CA0301 v_mad_f32 v21, -v2, v2, v21 ; D2820015 24560502 v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v22, v2, v2 ; 102C0502 v_mac_f32_e32 v22, v1, v1 ; 3E2C0301 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v17, v17, v0 ; 10220111 v_mul_f32_e32 v19, v19, v0 ; 10260113 v_mul_f32_e32 v0, v20, v0 ; 10000114 v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v17, v1, v17 ; 10222301 v_mul_f32_e32 v19, v1, v19 ; 10262701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v22, v2 ; 10020516 v_mad_f32 v2, v1, v12, -v17 ; D2820002 84461901 v_mad_f32 v12, v1, v13, -v19 ; D282000C 844E1B01 v_mad_f32 v0, v1, v14, -v0 ; D2820000 84021D01 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v12, v1, v6 ; 3E180D01 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mul_f32_e32 v1, v8, v3 ; 10020708 v_mul_f32_e32 v3, v9, v15 ; 10061F09 v_mul_f32_e32 v5, v10, v16 ; 100A210A v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mul_f32_e32 v3, v2, v2 ; 10060502 v_mac_f32_e32 v3, v12, v12 ; 3E06190C v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_cvt_pkrtz_f16_f32_e64 v5, v5, 0 ; D25E0005 00010105 exp 15, 0, 1, 0, 0, v1, v5, v1, v5 ; F800040F 05010501 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v18, 0 ; D25E0001 00010112 exp 15, 1, 1, 0, 0, v11, v1, v11, v1 ; F800041F 010B010B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v3, v2 ; 10020503 v_mul_f32_e32 v2, v3, v12 ; 10041903 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 636 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].y, IMM[0].xxxx 15: SHL TEMP[2].x, IN[3].yyyy, IMM[2].xxxx 16: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 17: I2F TEMP[2].x, TEMP[2].xxxx 18: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 21: FLR TEMP[2].x, TEMP[0].xxxx 22: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[2].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 24: MOV TEMP[4].y, TEMP[0].xxxx 25: MOV TEMP[0].y, IMM[0].xxxx 26: SHL TEMP[2].x, IN[3].zzzz, IMM[2].xxxx 27: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 28: I2F TEMP[2].x, TEMP[2].xxxx 29: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 30: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 31: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 32: FLR TEMP[2].x, TEMP[0].xxxx 33: ADD TEMP[5].x, TEMP[0].xxxx, -TEMP[2].xxxx 34: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 35: MOV TEMP[5].y, TEMP[0].xxxx 36: MOV TEMP[0].xy, TEMP[5].xyyy 37: MOV TEMP[0].w, IMM[0].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[4].xyyy 40: MOV TEMP[2].w, IMM[0].xxxx 41: TXL TEMP[2], TEMP[2], SAMP[0], 2D 42: MOV TEMP[6].xy, TEMP[3].xyyy 43: MOV TEMP[6].w, IMM[0].xxxx 44: TXL TEMP[6], TEMP[6], SAMP[0], 2D 45: MUL TEMP[6], IN[4].xxxx, TEMP[6] 46: MAD TEMP[2], IN[4].yyyy, TEMP[2], TEMP[6] 47: MAD TEMP[0], IN[4].zzzz, TEMP[0], TEMP[2] 48: MOV TEMP[2].xy, TEMP[5].xyyy 49: MOV TEMP[2].w, IMM[0].xxxx 50: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 51: MOV TEMP[6].xy, TEMP[3].xyyy 52: MOV TEMP[6].w, IMM[0].xxxx 53: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[2].zwz 54: MOV TEMP[7].xy, TEMP[4].xyyy 55: MOV TEMP[7].w, IMM[0].xxxx 56: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[2].zwz 57: MUL TEMP[7], IN[4].yyyy, TEMP[7] 58: MAD TEMP[6], IN[4].xxxx, TEMP[6], TEMP[7] 59: MAD TEMP[2], IN[4].zzzz, TEMP[2], TEMP[6] 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[2].xwx 63: MOV TEMP[4].xy, TEMP[4].xyyy 64: MOV TEMP[4].w, IMM[0].xxxx 65: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[2].xwx 66: MOV TEMP[3].xy, TEMP[3].xyyy 67: MOV TEMP[3].w, IMM[0].xxxx 68: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 69: MUL TEMP[3], IN[4].xxxx, TEMP[3] 70: MAD TEMP[3], IN[4].yyyy, TEMP[4], TEMP[3] 71: MAD TEMP[3], IN[4].zzzz, TEMP[5], TEMP[3] 72: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 73: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 74: AND TEMP[5], TEMP[5], IMM[3].xxxx 75: ABS TEMP[4], TEMP[4] 76: ADD TEMP[4], TEMP[4], -TEMP[5] 77: ADD TEMP[4], TEMP[4], IMM[3].yyyy 78: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 79: AND TEMP[6], TEMP[6], IMM[3].xxxx 80: ABS TEMP[4], TEMP[4] 81: ADD TEMP[4], TEMP[4], -TEMP[6] 82: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 83: MOV TEMP[7].x, TEMP[4].xxxx 84: MOV TEMP[7].y, TEMP[4].yyyy 85: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 86: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 87: MOV TEMP[7].z, TEMP[4].xxxx 88: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 89: RSQ TEMP[4].x, TEMP[4].xxxx 90: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 91: MUL TEMP[6], TEMP[6], IMM[3].wwww 92: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 93: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 94: MOV TEMP[7].w, IMM[0].xxxx 95: MOV TEMP[7].x, TEMP[6].xxxx 96: MOV TEMP[7].y, TEMP[6].yyyy 97: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 98: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 99: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 100: MOV TEMP[7].z, TEMP[4].xxxx 101: DP4 TEMP[4].x, TEMP[7], TEMP[0] 102: DP4 TEMP[5].x, TEMP[7], TEMP[2] 103: MOV TEMP[4].y, TEMP[5].xxxx 104: DP4 TEMP[5].x, TEMP[7], TEMP[3] 105: MOV TEMP[4].z, TEMP[5].xxxx 106: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 109: MOV TEMP[5].xy, IN[5].xyyy 110: MOV TEMP[5].w, IMM[0].xxxx 111: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 112: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 113: MOV TEMP[6].w, IMM[3].xxxx 114: MOV TEMP[6].x, TEMP[5].xxxx 115: MOV TEMP[6].y, TEMP[5].yyyy 116: MOV TEMP[6].z, TEMP[5].zzzz 117: DP4 TEMP[0].x, TEMP[6], TEMP[0] 118: DP4 TEMP[2].x, TEMP[6], TEMP[2] 119: DP4 TEMP[3].x, TEMP[6], TEMP[3] 120: MOV TEMP[5].x, TEMP[0].xxxx 121: MOV TEMP[5].y, TEMP[2].xxxx 122: MOV TEMP[5].z, TEMP[3].xxxx 123: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 124: MOV TEMP[6].x, TEMP[0].xxxx 125: MOV TEMP[6].y, TEMP[2].xxxx 126: MOV TEMP[6].z, TEMP[3].xxxx 127: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 128: MOV TEMP[6].w, TEMP[7].xxxx 129: MOV TEMP[7].x, TEMP[4].xxxx 130: MOV TEMP[7].y, TEMP[4].yyyy 131: MOV TEMP[7].z, TEMP[4].zzzz 132: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 133: RSQ TEMP[8].x, TEMP[8].xxxx 134: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 135: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 136: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 137: MUL TEMP[4].xyz, IMM[3].wwww, TEMP[4].xyzz 138: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 139: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 140: MOV_SAT TEMP[4].x, TEMP[4].xxxx 141: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 142: MOV_SAT TEMP[4].x, TEMP[4].xxxx 143: MOV TEMP[7].w, TEMP[4].xxxx 144: MOV TEMP[4].w, IMM[3].xxxx 145: MOV TEMP[4].x, TEMP[0].xxxx 146: MOV TEMP[4].y, TEMP[2].xxxx 147: MOV TEMP[4].z, TEMP[3].xxxx 148: MOV TEMP[0].x, CONST[4][0].wwww 149: MOV TEMP[0].y, CONST[4][1].wwww 150: MOV TEMP[0].z, CONST[4][2].wwww 151: MOV TEMP[0].w, CONST[4][3].wwww 152: DP4 TEMP[0].x, TEMP[4], TEMP[0] 153: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 154: MOV TEMP[3].xy, IN[5].xyyy 155: MOV TEMP[3].w, IMM[0].xxxx 156: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 157: MOV TEMP[5].x, CONST[4][0].xxxx 158: MOV TEMP[5].y, CONST[4][1].xxxx 159: MOV TEMP[5].z, CONST[4][2].xxxx 160: MOV TEMP[5].w, CONST[4][3].xxxx 161: DP4 TEMP[5].x, TEMP[4], TEMP[5] 162: MOV TEMP[8].x, CONST[4][0].yyyy 163: MOV TEMP[8].y, CONST[4][1].yyyy 164: MOV TEMP[8].z, CONST[4][2].yyyy 165: MOV TEMP[8].w, CONST[4][3].yyyy 166: DP4 TEMP[8].x, TEMP[4], TEMP[8] 167: MOV TEMP[5].y, -TEMP[8].xxxx 168: MOV TEMP[8].x, CONST[4][0].zzzz 169: MOV TEMP[8].y, CONST[4][1].zzzz 170: MOV TEMP[8].z, CONST[4][2].zzzz 171: MOV TEMP[8].w, CONST[4][3].zzzz 172: DP4 TEMP[4].x, TEMP[4], TEMP[8] 173: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 174: MOV TEMP[5].z, TEMP[4].xxxx 175: MOV TEMP[5].w, TEMP[0].xxxx 176: MOV OUT[1], TEMP[1] 177: MOV OUT[2].xy, TEMP[2].xyxx 178: MOV OUT[4], TEMP[7] 179: MOV OUT[6], IMM[0].xxxx 180: MOV OUT[5], TEMP[3] 181: MOV OUT[0], TEMP[5] 182: MOV OUT[3], TEMP[6] 183: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %10, %6 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = bitcast float %77 to i32 %94 = shl i32 %93, 2 %95 = add i32 %94, 8 %96 = sitofp i32 %95 to float %97 = fadd float %96, 5.000000e-01 %98 = fmul float %97, %41 %99 = fadd float %98, %91 %100 = fadd float %92, 0.000000e+00 %101 = call float @floor(float %99) %102 = fsub float %99, %101 %103 = fmul float %101, %42 %104 = fadd float %103, %100 %105 = bitcast float %78 to i32 %106 = shl i32 %105, 2 %107 = add i32 %106, 8 %108 = sitofp i32 %107 to float %109 = fadd float %108, 5.000000e-01 %110 = fmul float %109, %41 %111 = fadd float %110, %91 %112 = fadd float %92, 0.000000e+00 %113 = call float @floor(float %111) %114 = fsub float %111, %113 %115 = fmul float %113, %42 %116 = fadd float %115, %112 %117 = bitcast float %79 to i32 %118 = shl i32 %117, 2 %119 = add i32 %118, 8 %120 = sitofp i32 %119 to float %121 = fadd float %120, 5.000000e-01 %122 = fmul float %121, %41 %123 = fadd float %122, %91 %124 = fadd float %92, 0.000000e+00 %125 = call float @floor(float %123) %126 = fsub float %123, %125 %127 = fmul float %125, %42 %128 = fadd float %127, %124 %129 = bitcast float %126 to i32 %130 = bitcast float %128 to i32 %131 = insertelement <4 x i32> undef, i32 %129, i32 0 %132 = insertelement <4 x i32> %131, i32 %130, i32 1 %133 = insertelement <4 x i32> %132, i32 0, i32 2 %134 = bitcast <8 x i32> %49 to <32 x i8> %135 = bitcast <4 x i32> %51 to <16 x i8> %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %133, <32 x i8> %134, <16 x i8> %135, i32 2) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = bitcast float %114 to i32 %142 = bitcast float %116 to i32 %143 = insertelement <4 x i32> undef, i32 %141, i32 0 %144 = insertelement <4 x i32> %143, i32 %142, i32 1 %145 = insertelement <4 x i32> %144, i32 0, i32 2 %146 = bitcast <8 x i32> %49 to <32 x i8> %147 = bitcast <4 x i32> %51 to <16 x i8> %148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %145, <32 x i8> %146, <16 x i8> %147, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = extractelement <4 x float> %148, i32 1 %151 = extractelement <4 x float> %148, i32 2 %152 = extractelement <4 x float> %148, i32 3 %153 = bitcast float %102 to i32 %154 = bitcast float %104 to i32 %155 = insertelement <4 x i32> undef, i32 %153, i32 0 %156 = insertelement <4 x i32> %155, i32 %154, i32 1 %157 = insertelement <4 x i32> %156, i32 0, i32 2 %158 = bitcast <8 x i32> %49 to <32 x i8> %159 = bitcast <4 x i32> %51 to <16 x i8> %160 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = extractelement <4 x float> %160, i32 3 %165 = fmul float %84, %161 %166 = fmul float %84, %162 %167 = fmul float %84, %163 %168 = fmul float %84, %164 %169 = fmul float %85, %149 %170 = fadd float %169, %165 %171 = fmul float %85, %150 %172 = fadd float %171, %166 %173 = fmul float %85, %151 %174 = fadd float %173, %167 %175 = fmul float %85, %152 %176 = fadd float %175, %168 %177 = fmul float %86, %137 %178 = fadd float %177, %170 %179 = fmul float %86, %138 %180 = fadd float %179, %172 %181 = fmul float %86, %139 %182 = fadd float %181, %174 %183 = fmul float %86, %140 %184 = fadd float %183, %176 %185 = bitcast float %126 to i32 %186 = bitcast float %128 to i32 %187 = insertelement <4 x i32> , i32 %185, i32 1 %188 = insertelement <4 x i32> %187, i32 %186, i32 2 %189 = insertelement <4 x i32> %188, i32 0, i32 3 %190 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %189, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 1 %193 = extractelement <4 x float> %190, i32 2 %194 = extractelement <4 x float> %190, i32 3 %195 = bitcast float %102 to i32 %196 = bitcast float %104 to i32 %197 = insertelement <4 x i32> , i32 %195, i32 1 %198 = insertelement <4 x i32> %197, i32 %196, i32 2 %199 = insertelement <4 x i32> %198, i32 0, i32 3 %200 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %199, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = extractelement <4 x float> %200, i32 3 %205 = bitcast float %114 to i32 %206 = bitcast float %116 to i32 %207 = insertelement <4 x i32> , i32 %205, i32 1 %208 = insertelement <4 x i32> %207, i32 %206, i32 2 %209 = insertelement <4 x i32> %208, i32 0, i32 3 %210 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %209, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %211 = extractelement <4 x float> %210, i32 0 %212 = extractelement <4 x float> %210, i32 1 %213 = extractelement <4 x float> %210, i32 2 %214 = extractelement <4 x float> %210, i32 3 %215 = fmul float %85, %211 %216 = fmul float %85, %212 %217 = fmul float %85, %213 %218 = fmul float %85, %214 %219 = fmul float %84, %201 %220 = fadd float %219, %215 %221 = fmul float %84, %202 %222 = fadd float %221, %216 %223 = fmul float %84, %203 %224 = fadd float %223, %217 %225 = fmul float %84, %204 %226 = fadd float %225, %218 %227 = fmul float %86, %191 %228 = fadd float %227, %220 %229 = fmul float %86, %192 %230 = fadd float %229, %222 %231 = fmul float %86, %193 %232 = fadd float %231, %224 %233 = fmul float %86, %194 %234 = fadd float %233, %226 %235 = bitcast float %126 to i32 %236 = bitcast float %128 to i32 %237 = insertelement <4 x i32> , i32 %235, i32 1 %238 = insertelement <4 x i32> %237, i32 %236, i32 2 %239 = insertelement <4 x i32> %238, i32 0, i32 3 %240 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %239, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %241 = extractelement <4 x float> %240, i32 0 %242 = extractelement <4 x float> %240, i32 1 %243 = extractelement <4 x float> %240, i32 2 %244 = extractelement <4 x float> %240, i32 3 %245 = bitcast float %114 to i32 %246 = bitcast float %116 to i32 %247 = insertelement <4 x i32> , i32 %245, i32 1 %248 = insertelement <4 x i32> %247, i32 %246, i32 2 %249 = insertelement <4 x i32> %248, i32 0, i32 3 %250 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %249, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %251 = extractelement <4 x float> %250, i32 0 %252 = extractelement <4 x float> %250, i32 1 %253 = extractelement <4 x float> %250, i32 2 %254 = extractelement <4 x float> %250, i32 3 %255 = bitcast float %102 to i32 %256 = bitcast float %104 to i32 %257 = insertelement <4 x i32> , i32 %255, i32 1 %258 = insertelement <4 x i32> %257, i32 %256, i32 2 %259 = insertelement <4 x i32> %258, i32 0, i32 3 %260 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %259, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = fmul float %84, %261 %266 = fmul float %84, %262 %267 = fmul float %84, %263 %268 = fmul float %84, %264 %269 = fmul float %85, %251 %270 = fadd float %269, %265 %271 = fmul float %85, %252 %272 = fadd float %271, %266 %273 = fmul float %85, %253 %274 = fadd float %273, %267 %275 = fmul float %85, %254 %276 = fadd float %275, %268 %277 = fmul float %86, %241 %278 = fadd float %277, %270 %279 = fmul float %86, %242 %280 = fadd float %279, %272 %281 = fmul float %86, %243 %282 = fadd float %281, %274 %283 = fmul float %86, %244 %284 = fadd float %283, %276 %285 = fmul float %63, 2.550000e+02 %286 = fadd float %285, -1.280000e+02 %287 = fmul float %64, 2.550000e+02 %288 = fadd float %287, -1.280000e+02 %289 = fmul float %65, 2.550000e+02 %290 = fadd float %289, -1.280000e+02 %291 = fmul float %66, 2.550000e+02 %292 = fadd float %291, -1.280000e+02 %293 = fcmp olt float %286, 0.000000e+00 %294 = fcmp olt float %288, 0.000000e+00 %295 = fcmp olt float %290, 0.000000e+00 %296 = fcmp olt float %292, 0.000000e+00 %297 = select i1 %293, float 1.000000e+00, float 0.000000e+00 %298 = call float @fabs(float %286) %299 = call float @fabs(float %288) %300 = call float @fabs(float %290) %301 = call float @fabs(float %292) %302 = fsub float %298, %297 %303 = select i1 %294, float -1.000000e+00, float -0.000000e+00 %304 = fadd float %299, %303 %305 = select i1 %295, float -1.000000e+00, float -0.000000e+00 %306 = fadd float %300, %305 %307 = select i1 %296, float -1.000000e+00, float -0.000000e+00 %308 = fadd float %301, %307 %309 = fadd float %302, -6.400000e+01 %310 = fadd float %304, -6.400000e+01 %311 = fadd float %306, -6.400000e+01 %312 = fadd float %308, -6.400000e+01 %313 = fcmp olt float %309, 0.000000e+00 %314 = fcmp olt float %310, 0.000000e+00 %315 = select i1 %313, float 1.000000e+00, float 0.000000e+00 %316 = select i1 %314, float 1.000000e+00, float 0.000000e+00 %317 = call float @fabs(float %309) %318 = call float @fabs(float %310) %319 = call float @fabs(float %311) %320 = call float @fabs(float %312) %321 = fsub float %317, %315 %322 = fsub float %318, %316 %323 = fmul float %321, 0x3F90410420000000 %324 = fmul float %322, 0x3F90410420000000 %325 = fsub float 1.000000e+00, %323 %326 = fsub float %325, %324 %327 = fmul float %323, %323 %328 = fmul float %324, %324 %329 = fadd float %328, %327 %330 = fmul float %326, %326 %331 = fadd float %329, %330 %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %323, %332 %334 = fmul float %324, %332 %335 = fmul float %326, %332 %336 = fmul float %315, 2.000000e+00 %337 = fmul float %316, 2.000000e+00 %338 = fsub float 1.000000e+00, %336 %339 = fsub float 1.000000e+00, %337 %340 = fmul float %333, %338 %341 = fmul float %334, %339 %342 = fmul float %297, 2.000000e+00 %343 = fsub float 1.000000e+00, %342 %344 = fmul float %343, %335 %345 = fmul float %340, %178 %346 = fmul float %341, %180 %347 = fadd float %345, %346 %348 = fmul float %344, %182 %349 = fadd float %347, %348 %350 = fmul float %184, 0.000000e+00 %351 = fadd float %349, %350 %352 = fmul float %340, %228 %353 = fmul float %341, %230 %354 = fadd float %352, %353 %355 = fmul float %344, %232 %356 = fadd float %354, %355 %357 = fmul float %234, 0.000000e+00 %358 = fadd float %356, %357 %359 = fmul float %340, %278 %360 = fmul float %341, %280 %361 = fadd float %359, %360 %362 = fmul float %344, %282 %363 = fadd float %361, %362 %364 = fmul float %284, 0.000000e+00 %365 = fadd float %363, %364 %366 = fmul float %351, %351 %367 = fmul float %358, %358 %368 = fadd float %367, %366 %369 = fmul float %365, %365 %370 = fadd float %368, %369 %371 = call float @llvm.AMDGPU.rsq.clamped.f32(float %370) %372 = fmul float %351, %371 %373 = fmul float %358, %371 %374 = fmul float %365, %371 %375 = bitcast float %91 to i32 %376 = bitcast float %92 to i32 %377 = insertelement <4 x i32> undef, i32 %375, i32 0 %378 = insertelement <4 x i32> %377, i32 %376, i32 1 %379 = insertelement <4 x i32> %378, i32 0, i32 2 %380 = bitcast <8 x i32> %49 to <32 x i8> %381 = bitcast <4 x i32> %51 to <16 x i8> %382 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %379, <32 x i8> %380, <16 x i8> %381, i32 2) %383 = extractelement <4 x float> %382, i32 2 %384 = fmul float %56, %383 %385 = fmul float %57, %383 %386 = fmul float %58, %383 %387 = fmul float %384, %178 %388 = fmul float %385, %180 %389 = fadd float %387, %388 %390 = fmul float %386, %182 %391 = fadd float %389, %390 %392 = fadd float %391, %184 %393 = fmul float %384, %228 %394 = fmul float %385, %230 %395 = fadd float %393, %394 %396 = fmul float %386, %232 %397 = fadd float %395, %396 %398 = fadd float %397, %234 %399 = fmul float %384, %278 %400 = fmul float %385, %280 %401 = fadd float %399, %400 %402 = fmul float %386, %282 %403 = fadd float %401, %402 %404 = fadd float %403, %284 %405 = fsub float %392, %35 %406 = fsub float %398, %36 %407 = fsub float %404, %37 %408 = fmul float %38, %405 %409 = fmul float %39, %406 %410 = fadd float %409, %408 %411 = fmul float %40, %407 %412 = fadd float %410, %411 %413 = fmul float %405, %405 %414 = fmul float %406, %406 %415 = fadd float %414, %413 %416 = fmul float %407, %407 %417 = fadd float %415, %416 %418 = call float @llvm.AMDGPU.rsq.clamped.f32(float %417) %419 = fmul float %405, %418 %420 = fmul float %406, %418 %421 = fmul float %407, %418 %422 = fmul float %372, %45 %423 = fmul float %373, %46 %424 = fadd float %423, %422 %425 = fmul float %374, %47 %426 = fadd float %424, %425 %427 = fmul float %426, %372 %428 = fmul float %426, %373 %429 = fmul float %426, %374 %430 = fmul float %427, 2.000000e+00 %431 = fmul float %428, 2.000000e+00 %432 = fmul float %429, 2.000000e+00 %433 = fsub float %45, %430 %434 = fsub float %46, %431 %435 = fsub float %47, %432 %436 = fmul float %419, %433 %437 = fsub float -0.000000e+00, %436 %438 = fmul float %420, %434 %439 = fsub float %437, %438 %440 = fmul float %421, %435 %441 = fsub float %439, %440 %442 = call float @llvm.AMDIL.clamp.(float %441, float 0.000000e+00, float 1.000000e+00) %443 = call float @llvm.pow.f32(float %442, float 1.600000e+01) %444 = call float @llvm.AMDIL.clamp.(float %443, float 0.000000e+00, float 1.000000e+00) %445 = fmul float %392, %22 %446 = fmul float %398, %26 %447 = fadd float %445, %446 %448 = fmul float %404, %30 %449 = fadd float %447, %448 %450 = fadd float %449, %34 %451 = fmul float %71, %13 %452 = fadd float %451, %15 %453 = fmul float %72, %14 %454 = fadd float %453, %16 %455 = bitcast float %91 to i32 %456 = bitcast float %92 to i32 %457 = insertelement <4 x i32> , i32 %455, i32 1 %458 = insertelement <4 x i32> %457, i32 %456, i32 2 %459 = insertelement <4 x i32> %458, i32 0, i32 3 %460 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %459, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %461 = extractelement <4 x float> %460, i32 0 %462 = extractelement <4 x float> %460, i32 1 %463 = extractelement <4 x float> %460, i32 2 %464 = extractelement <4 x float> %460, i32 3 %465 = fmul float %392, %19 %466 = fmul float %398, %23 %467 = fadd float %465, %466 %468 = fmul float %404, %27 %469 = fadd float %467, %468 %470 = fadd float %469, %31 %471 = fmul float %392, %20 %472 = fmul float %398, %24 %473 = fadd float %471, %472 %474 = fmul float %404, %28 %475 = fadd float %473, %474 %476 = fadd float %475, %32 %477 = fsub float -0.000000e+00, %476 %478 = fmul float %392, %21 %479 = fmul float %398, %25 %480 = fadd float %478, %479 %481 = fmul float %404, %29 %482 = fadd float %480, %481 %483 = fadd float %482, %33 %484 = fmul float %483, 2.000000e+00 %485 = fsub float %484, %450 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %452, float %454, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %392, float %398, float %404, float %412) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %372, float %373, float %374, float %444) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %461, float %462, float %463, float %464) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %470, float %477, float %485, float %450) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[48:51], s[2:3], 0x10 ; C0980310 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[14:17], v0, s[12:15], 0 idxen ; E00C2000 80030E00 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 buffer_load_format_xyzw v[17:20], v0, s[24:27], 0 idxen ; E00C2000 80061100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[20:23], v0, s[28:31], 0 idxen ; E00C2000 80071400 s_buffer_load_dword s29, s[8:11], 0x6 ; C20E8906 s_buffer_load_dword s30, s[8:11], 0x7 ; C20F0907 s_buffer_load_dword s15, s[8:11], 0x8 ; C2078908 s_buffer_load_dword s16, s[8:11], 0x9 ; C2080909 s_buffer_load_dword s9, s[48:51], 0x51 ; C204B151 s_buffer_load_dword s10, s[48:51], 0x52 ; C2053152 s_buffer_load_dword s31, s[48:51], 0x66 ; C20FB166 s_buffer_load_dword s52, s[48:51], 0x67 ; C21A3167 s_buffer_load_dword s4, s[48:51], 0xf ; C202310F s_buffer_load_dword s28, s[48:51], 0x4c ; C20E314C s_buffer_load_dword s26, s[48:51], 0x4d ; C20D314D s_buffer_load_dword s27, s[48:51], 0x4e ; C20DB14E s_buffer_load_dword s22, s[48:51], 0x50 ; C20B3150 s_buffer_load_dword s25, s[0:3], 0x0 ; C20C8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s23, s[0:3], 0x2 ; C20B8102 s_buffer_load_dword s7, s[48:51], 0x5 ; C203B105 s_buffer_load_dword s8, s[48:51], 0x6 ; C2043106 s_buffer_load_dword s14, s[48:51], 0x7 ; C2073107 s_buffer_load_dword s6, s[48:51], 0x8 ; C2033108 s_buffer_load_dword s5, s[48:51], 0x9 ; C202B109 s_buffer_load_dword s11, s[48:51], 0x0 ; C205B100 s_buffer_load_dword s12, s[48:51], 0x1 ; C2063101 s_buffer_load_dword s13, s[48:51], 0x2 ; C206B102 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s15 ; 7E00020F s_buffer_load_dword s15, s[48:51], 0x3 ; C207B103 v_mov_b32_e32 v23, s16 ; 7E2E0210 s_buffer_load_dword s20, s[48:51], 0x4 ; C20A3104 buffer_load_format_xyzw v[28:31], v3, s[44:47], 0 idxen ; E00C2000 800B1C03 s_buffer_load_dword s16, s[48:51], 0xa ; C208310A s_buffer_load_dword s21, s[48:51], 0xb ; C20AB10B s_buffer_load_dword s17, s[48:51], 0xc ; C208B10C s_buffer_load_dword s18, s[48:51], 0xd ; C209310D s_buffer_load_dword s19, s[48:51], 0xe ; C209B10E v_mad_f32 v3, v2, v7, v1 ; D2820003 04060F02 v_mac_f32_e32 v1, v2, v8 ; 3E021102 v_mac_f32_e32 v0, s29, v9 ; 3E00121D v_mac_f32_e32 v23, s30, v10 ; 3E2E141E v_cmp_gt_f32_e64 s[2:3], 0, v3 ; D0080002 00020680 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e64 s[0:1], 0, v1 ; D0080000 00020280 v_lshlrev_b32_e32 v2, 2, v17 ; 34042282 v_lshlrev_b32_e32 v4, 2, v18 ; 34082482 v_lshlrev_b32_e32 v7, 2, v19 ; 340E2682 v_add_i32_e32 v2, 8, v2 ; 4A040488 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v12, 0, v29 ; 06183A80 v_add_i32_e32 v4, 8, v4 ; 4A080888 v_cvt_f32_i32_e32 v4, v4 ; 7E080B04 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s31, v2, v28 ; D2820002 0472041F v_floor_f32_e32 v8, v2 ; 7E104902 v_subrev_f32_e32 v24, v8, v2 ; 0A300508 v_mad_f32 v25, s52, v8, v12 ; D2820019 04321034 v_add_i32_e32 v2, 8, v7 ; 4A040E88 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_add_f32_e32 v4, 0.5, v4 ; 060808F0 v_mad_f32 v4, s31, v4, v28 ; D2820004 0472081F v_floor_f32_e32 v7, v4 ; 7E0E4904 v_subrev_f32_e32 v30, v7, v4 ; 0A3C0907 v_mad_f32 v31, s52, v7, v12 ; D282001F 04320E34 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s31, v2, v28 ; D2820002 0472041F v_floor_f32_e32 v4, v2 ; 7E084902 v_subrev_f32_e32 v11, v4, v2 ; 0A160504 v_mac_f32_e32 v12, s52, v4 ; 3E180834 v_mov_b32_e32 v32, v13 ; 7E40030D v_mov_b32_e32 v26, v13 ; 7E34030D image_sample_l v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[36:43], s[32:35] ; F0900F00 0109210B s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0900F00 0109251E image_sample_l v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[32:35] ; F0900F00 01090718 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v7, v20 ; 10042907 v_mul_f32_e32 v4, v8, v20 ; 10082908 v_mul_f32_e32 v7, v9, v20 ; 100E2909 v_mul_f32_e32 v8, v10, v20 ; 1010290A v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 v_mac_f32_e32 v2, v37, v21 ; 3E042B25 v_mac_f32_e32 v4, v38, v21 ; 3E082B26 v_mac_f32_e32 v7, v39, v21 ; 3E0E2B27 v_mov_b32_e32 v41, v10 ; 7E52030A v_mov_b32_e32 v42, v11 ; 7E54030B v_mov_b32_e32 v43, v12 ; 7E56030C v_mov_b32_e32 v44, v13 ; 7E58030D v_mov_b32_e32 v45, v10 ; 7E5A030A v_mov_b32_e32 v46, v11 ; 7E5C030B v_mov_b32_e32 v47, v12 ; 7E5E030C v_mov_b32_e32 v48, v13 ; 7E60030D v_mac_f32_e32 v8, v40, v21 ; 3E102B28 v_mov_b32_e32 v42, v24 ; 7E540318 v_mov_b32_e32 v46, v30 ; 7E5C031E v_mac_f32_e32 v2, v33, v22 ; 3E042D21 v_mac_f32_e32 v4, v34, v22 ; 3E082D22 v_mac_f32_e32 v7, v35, v22 ; 3E0E2D23 v_mov_b32_e32 v43, v25 ; 7E560319 v_mac_f32_e32 v8, v36, v22 ; 3E102D24 v_mov_b32_e32 v47, v31 ; 7E5E031F v_mov_b32_e32 v44, v13 ; 7E58030D image_sample_l_o v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[36:43], s[32:35] ; F0D00F00 0109200A image_sample_l_o v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[36:43], s[32:35] ; F0D00F00 01092429 v_mov_b32_e32 v48, v13 ; 7E60030D image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[36:43], s[32:35] ; F0D00F00 0109282D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v40, v21 ; 10122B28 v_mul_f32_e32 v17, v41, v21 ; 10222B29 v_mul_f32_e32 v18, v42, v21 ; 10242B2A v_mul_f32_e32 v19, v43, v21 ; 10262B2B v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[36:43], s[32:35] ; F0D00F00 0109280A v_mov_b32_e32 v11, v30 ; 7E16031E v_mac_f32_e32 v9, v36, v20 ; 3E122924 v_mac_f32_e32 v17, v37, v20 ; 3E222925 v_mac_f32_e32 v18, v38, v20 ; 3E242926 v_mac_f32_e32 v19, v39, v20 ; 3E262927 v_mov_b32_e32 v12, v31 ; 7E18031F v_mac_f32_e32 v9, v32, v22 ; 3E122D20 v_mac_f32_e32 v17, v33, v22 ; 3E222D21 v_mac_f32_e32 v18, v34, v22 ; 3E242D22 image_sample_l_o v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[36:43], s[32:35] ; F0D00F00 01091E0A v_mov_b32_e32 v11, v24 ; 7E160318 v_mac_f32_e32 v19, v35, v22 ; 3E262D23 v_mov_b32_e32 v12, v25 ; 7E180319 image_sample_l_o v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[36:43], s[32:35] ; F0D00F00 0109180A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v24, v20 ; 10142918 v_mul_f32_e32 v11, v25, v20 ; 10162919 v_mul_f32_e32 v12, v26, v20 ; 1018291A v_mul_f32_e32 v20, v27, v20 ; 1028291B v_mac_f32_e32 v10, v30, v21 ; 3E142B1E v_mac_f32_e32 v11, v31, v21 ; 3E162B1F v_mac_f32_e32 v12, v32, v21 ; 3E182B20 v_mac_f32_e32 v20, v33, v21 ; 3E282B21 v_mac_f32_e32 v10, v40, v22 ; 3E142D28 v_mac_f32_e32 v11, v41, v22 ; 3E162D29 v_mac_f32_e32 v12, v42, v22 ; 3E182D2A v_mac_f32_e32 v20, v43, v22 ; 3E282D2B v_mov_b32_e32 v30, v13 ; 7E3C030D image_sample_l v21, 4, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900400 0109151C v_mov_b32_e32 v27, 0x30003 ; 7E3602FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v15, v21, v15 ; 101E1F15 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mov_b32_e32 v30, v13 ; 7E3C030D image_sample_l_o v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0D00F00 0109181B v_cndmask_b32_e64 v21, 0, 1.0, s[2:3] ; D2000015 0009E480 v_sub_f32_e64 v3, |v3|, v21 ; D2080103 00022B03 v_add_f32_e32 v3, v5, v3 ; 06060705 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v3, |v3|, v5 ; D2080103 00020B03 v_cndmask_b32_e64 v22, 0, 1.0, s[0:1] ; D2000016 0001E480 v_sub_f32_e64 v1, |v1|, v22 ; D2080101 00022D01 v_mul_f32_e32 v28, v6, v3 ; 10380706 v_mad_f32 v3, -v3, v6, 1.0 ; D2820003 23CA0D03 v_mad_f32 v3, -v1, v6, v3 ; D2820003 240E0D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v28, v28 ; 100C391C v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D exp 15, 33, 0, 0, 0, v0, v23, v13, v13 ; F800021F 0D0D1700 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mad_f32 v0, -2.0, v5, 1.0 ; D2820000 03CA0AF5 v_mul_f32_e32 v5, v6, v28 ; 100A3906 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v5, -2.0, v22, 1.0 ; D2820005 03CA2CF5 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mul_f32_e32 v5, v4, v1 ; 100A0304 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_mul_f32_e32 v4, v4, v15 ; 10081F04 v_mac_f32_e32 v4, v2, v14 ; 3E081D02 v_mul_f32_e32 v2, v17, v1 ; 10040311 v_mac_f32_e32 v2, v9, v0 ; 3E040109 v_mul_f32_e32 v17, v17, v15 ; 10221F11 v_mac_f32_e32 v17, v9, v14 ; 3E221D09 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v9, v11, v15 ; 10121F0B v_mac_f32_e32 v1, v10, v0 ; 3E02010A v_mac_f32_e32 v9, v10, v14 ; 3E121D0A v_mul_f32_e32 v0, v6, v3 ; 10000706 v_mad_f32 v3, -2.0, v21, 1.0 ; D2820003 03CA2AF5 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mac_f32_e32 v5, v7, v0 ; 3E0A0107 v_mac_f32_e32 v4, v7, v16 ; 3E082107 v_mac_f32_e32 v2, v18, v0 ; 3E040112 v_mac_f32_e32 v17, v18, v16 ; 3E222112 v_mac_f32_e32 v1, v12, v0 ; 3E02010C v_mac_f32_e32 v9, v12, v16 ; 3E12210C v_mac_f32_e32 v5, 0, v8 ; 3E0A1080 v_mac_f32_e32 v2, 0, v19 ; 3E042680 v_mul_f32_e32 v0, v5, v5 ; 10000B05 v_mac_f32_e32 v0, v2, v2 ; 3E000502 v_mac_f32_e32 v1, 0, v20 ; 3E022880 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_add_f32_e32 v3, v8, v4 ; 06060908 v_add_f32_e32 v4, v19, v17 ; 06082313 v_add_f32_e32 v6, v20, v9 ; 060C1314 v_mul_f32_e32 v5, v0, v5 ; 100A0B00 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_subrev_f32_e32 v1, s28, v3 ; 0A02061C v_subrev_f32_e32 v7, s26, v4 ; 0A0E081A v_subrev_f32_e32 v8, s27, v6 ; 0A100C1B v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, s25, v5 ; 10140A19 v_mac_f32_e32 v10, s24, v2 ; 3E140418 v_mac_f32_e32 v10, s23, v0 ; 3E140017 v_mul_f32_e32 v11, v5, v10 ; 10161505 v_mad_f32 v11, -2.0, v11, s25 ; D282000B 006616F5 v_mul_f32_e32 v12, v9, v1 ; 10180309 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v2, v10 ; 10181502 v_mad_f32 v12, -2.0, v12, s24 ; D282000C 006218F5 v_mul_f32_e32 v14, v9, v7 ; 101C0F09 v_mad_f32 v11, -v14, v12, -v11 ; D282000B A42E190E v_mul_f32_e32 v10, v0, v10 ; 10141500 v_mad_f32 v10, -2.0, v10, s23 ; D282000A 005E14F5 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mad_f32 v9, -v9, v10, v11 ; D2820009 242E1509 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v1, s9, v7 ; 3E020E09 v_mac_f32_e32 v1, s10, v8 ; 3E02100A exp 15, 34, 0, 0, 0, v3, v4, v6, v1 ; F800022F 01060403 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v1, 0x41800000, v9 ; 0E0212FF 41800000 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 exp 15, 35, 0, 0, 0, v5, v2, v0, v1 ; F800023F 01000205 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s14, v4 ; 1000080E v_mul_f32_e32 v1, s20, v4 ; 10020814 v_mul_f32_e32 v2, s7, v4 ; 10040807 v_mul_f32_e32 v4, s8, v4 ; 10080808 v_mac_f32_e32 v0, s15, v3 ; 3E00060F v_mac_f32_e32 v1, s11, v3 ; 3E02060B v_mac_f32_e32 v2, s12, v3 ; 3E04060C v_mac_f32_e32 v4, s13, v3 ; 3E08060D v_mac_f32_e32 v0, s21, v6 ; 3E000C15 v_mac_f32_e32 v1, s6, v6 ; 3E020C06 v_mac_f32_e32 v2, s5, v6 ; 3E040C05 v_mac_f32_e32 v4, s16, v6 ; 3E080C10 v_add_f32_e32 v0, s4, v0 ; 06000004 v_add_f32_e32 v1, s17, v1 ; 06020211 v_add_f32_e32 v2, s18, v2 ; 06040412 v_add_f32_e32 v3, s19, v4 ; 06060813 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v24, v25, v26, v27 ; F800024F 1B1A1918 exp 15, 37, 0, 0, 0, v13, v13, v13, v13 ; F800025F 0D0D0D0D exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 52 Code Size: 1432 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} IMM[2] UINT32 {4, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 6: MUL TEMP[3].x, CONST[1][3].xxxx, TEMP[2].xxxx 7: MUL TEMP[4].x, CONST[1][2].wwww, TEMP[2].zzzz 8: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz 9: RSQ TEMP[5].x, TEMP[5].xxxx 10: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx 11: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].xxxx 12: MOV TEMP[6].w, IMM[1].yyyy 13: MOV TEMP[6].x, TEMP[5].xxxx 14: MOV TEMP[6].y, TEMP[5].yyyy 15: MOV TEMP[6].z, TEMP[5].zzzz 16: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 17: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 18: KILL_IF -TEMP[0].xxxx 19: MOV TEMP[0].w, IMM[1].yyyy 20: MOV TEMP[0].x, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MOV TEMP[0].z, TEMP[1].zzzz 23: MOV TEMP[1].w, IMM[1].yyyy 24: MOV TEMP[1].x, TEMP[3].xxxx 25: MOV TEMP[1].y, TEMP[2].yyyy 26: MOV TEMP[1].z, TEMP[4].xxxx 27: MOV OUT[2], IN[1].wwww 28: MOV OUT[0], TEMP[0] 29: MOV OUT[3], TEMP[6] 30: MOV OUT[1], TEMP[1] 31: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %46 %62 = fmul float %59, %47 %63 = fmul float %60, %48 %64 = bitcast float %40 to i32 %65 = bitcast float %41 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %36, <16 x i8> %39, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = fmul float %29, %69 %73 = fmul float %28, %71 %74 = fmul float %43, %43 %75 = fmul float %44, %44 %76 = fadd float %75, %74 %77 = fmul float %45, %45 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %43, %79 %81 = fmul float %44, %79 %82 = fmul float %45, %79 %83 = fmul float %80, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %81, 5.000000e-01 %86 = fadd float %85, 5.000000e-01 %87 = fmul float %82, 5.000000e-01 %88 = fadd float %87, 5.000000e-01 %89 = fcmp olt float %57, %24 %90 = select i1 %89, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %90) %91 = call i32 @llvm.SI.packf16(float %61, float %62) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %63, float 0.000000e+00) %94 = bitcast i32 %93 to float %95 = call i32 @llvm.SI.packf16(float %72, float %70) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %73, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %92, float %94, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %96, float %98, float %96, float %98) %99 = call i32 @llvm.SI.packf16(float %84, float %86) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %88, float 0.000000e+00) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %42, float %42, float %42, float %42) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %100, float %102, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_cmp_gt_f32_e32 vcc, s7, v13 ; 7C081A07 v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_mac_f32_e32 v13, v7, v7 ; 3E1A0F07 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v7, v13, v7 ; 100E0F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 v_cndmask_b32_e64 v10, 0, -1.0, vcc ; D200000A 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v10 ; 7C261480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v8, v9 ; 5E041308 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v2, v0, v2, v0 ; F800040F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v3, 0 ; D25E0000 00010103 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v7, 0 ; D25E0001 00010107 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1].xyz, TEMP[2], IN[3] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[5].x, TEMP[4].xxxx 16: MOV TEMP[5].y, -TEMP[4].yyyy 17: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 18: MOV TEMP[6].x, TEMP[5].xxxx 19: MOV TEMP[6].y, TEMP[5].yyyy 20: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 21: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 22: MOV_SAT TEMP[4].x, TEMP[4].xxxx 23: SQRT TEMP[4].x, TEMP[4].xxxx 24: MOV TEMP[6].z, TEMP[4].xxxx 25: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 26: RSQ TEMP[4].x, TEMP[4].xxxx 27: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 28: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 29: RSQ TEMP[5].x, TEMP[5].xxxx 30: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[6].x, TEMP[6].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 34: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MOV TEMP[3].w, IMM[2].xxxx 43: MOV TEMP[3].x, TEMP[0].xxxx 44: MOV TEMP[3].y, TEMP[0].yyyy 45: MOV TEMP[3].z, TEMP[0].zzzz 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[1].xxxx 48: MOV TEMP[0].y, TEMP[1].yyyy 49: MOV TEMP[0].z, TEMP[1].zzzz 50: MOV TEMP[1].w, IMM[2].xxxx 51: MUL TEMP[1].x, CONST[1][3].xxxx, TEMP[2].xxxx 52: MOV TEMP[1].y, TEMP[2].yyyy 53: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 54: MOV TEMP[1].z, TEMP[2].xxxx 55: MOV OUT[2], IN[1].wwww 56: MOV OUT[0], TEMP[0] 57: MOV OUT[3], TEMP[3] 58: MOV OUT[1], TEMP[1] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %59 = fmul float %49, %49 %60 = fmul float %50, %50 %61 = fadd float %60, %59 %62 = fmul float %51, %51 %63 = fadd float %61, %62 %64 = call float @llvm.AMDGPU.rsq.clamped.f32(float %63) %65 = fmul float %49, %64 %66 = fmul float %50, %64 %67 = fmul float %51, %64 %68 = bitcast float %46 to i32 %69 = bitcast float %47 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %31, <16 x i8> %33, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fmul float %25, %73 %77 = fmul float %26, %74 %78 = fmul float %27, %75 %79 = fmul float %76, %52 %80 = fmul float %77, %53 %81 = fmul float %78, %54 %82 = bitcast float %46 to i32 %83 = bitcast float %47 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %36, <16 x i8> %39, i32 2) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = fmul float %67, %56 %91 = fmul float %65, %57 %92 = fmul float %66, %55 %93 = fmul float %66, %57 %94 = fsub float %93, %90 %95 = fmul float %67, %55 %96 = fsub float %95, %91 %97 = fmul float %65, %56 %98 = fsub float %97, %92 %99 = bitcast float %46 to i32 %100 = bitcast float %47 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %42, <16 x i8> %45, i32 2) %104 = extractelement <4 x float> %103, i32 1 %105 = extractelement <4 x float> %103, i32 3 %106 = fmul float %105, 2.000000e+00 %107 = fadd float %106, -1.000000e+00 %108 = fmul float %104, 2.000000e+00 %109 = fadd float %108, -1.000000e+00 %110 = fmul float %107, %24 %111 = fmul float %109, %24 %112 = fmul float %107, %107 %113 = fmul float %109, %109 %114 = fadd float %112, %113 %115 = fsub float 1.000000e+00, %114 %116 = call float @llvm.AMDIL.clamp.(float %115, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.sqrt.f32(float %116) %118 = fmul float %110, %110 %119 = fmul float %111, %111 %120 = fadd float %119, %118 %121 = fmul float %117, %117 %122 = fadd float %120, %121 %123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122) %124 = fmul float %110, %123 %125 = fmul float %111, %123 %126 = fsub float -0.000000e+00, %125 %127 = fmul float %117, %123 %128 = fmul float %55, %55 %129 = fmul float %56, %56 %130 = fadd float %129, %128 %131 = fmul float %57, %57 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %55, %133 %135 = fmul float %56, %133 %136 = fmul float %57, %133 %137 = fmul float %94, %94 %138 = fmul float %96, %96 %139 = fadd float %138, %137 %140 = fmul float %98, %98 %141 = fadd float %139, %140 %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) %143 = fmul float %94, %142 %144 = fmul float %96, %142 %145 = fmul float %98, %142 %146 = fmul float %58, %143 %147 = fmul float %58, %144 %148 = fmul float %58, %145 %149 = fmul float %146, %126 %150 = fmul float %147, %126 %151 = fmul float %148, %126 %152 = fmul float %124, %134 %153 = fadd float %152, %149 %154 = fmul float %124, %135 %155 = fadd float %154, %150 %156 = fmul float %124, %136 %157 = fadd float %156, %151 %158 = fmul float %65, %127 %159 = fadd float %158, %153 %160 = fmul float %66, %127 %161 = fadd float %160, %155 %162 = fmul float %67, %127 %163 = fadd float %162, %157 %164 = fmul float %159, %159 %165 = fmul float %161, %161 %166 = fadd float %165, %164 %167 = fmul float %163, %163 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = fmul float %159, %169 %171 = fmul float %161, %169 %172 = fmul float %163, %169 %173 = fmul float %170, 5.000000e-01 %174 = fadd float %173, 5.000000e-01 %175 = fmul float %171, 5.000000e-01 %176 = fadd float %175, 5.000000e-01 %177 = fmul float %172, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %29, %87 %180 = fmul float %28, %89 %181 = call i32 @llvm.SI.packf16(float %79, float %80) %182 = bitcast i32 %181 to float %183 = call i32 @llvm.SI.packf16(float %81, float 0.000000e+00) %184 = bitcast i32 %183 to float %185 = call i32 @llvm.SI.packf16(float %179, float %88) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %180, float 0.000000e+00) %188 = bitcast i32 %187 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %182, float %184, float %182, float %184) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %186, float %188, float %186, float %188) %189 = call i32 @llvm.SI.packf16(float %174, float %176) %190 = bitcast i32 %189 to float %191 = call i32 @llvm.SI.packf16(float %178, float 0.000000e+00) %192 = bitcast i32 %191 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %48, float %48, float %48, float %48) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %190, float %192, float %190, float %192) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450E02 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[12:15] ; F0800700 00671102 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800A00 00890102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v3, s4, v14 ; 10061C04 v_mul_f32_e32 v14, s5, v15 ; 101C1E05 v_mul_f32_e32 v15, s6, v16 ; 101E2006 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mac_f32_e32 v16, v6, v6 ; 3E200D06 v_mac_f32_e32 v16, v7, v7 ; 3E200F07 v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, s7, v17 ; 10222207 v_mul_f32_e32 v19, s8, v19 ; 10262608 v_cvt_pkrtz_f16_f32_e32 v17, v17, v18 ; 5E222511 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v16, v12, v7 ; 10200F0C v_mad_f32 v16, v6, v13, -v16 ; D2820010 84421B06 v_mul_f32_e32 v18, v13, v5 ; 10240B0D v_mad_f32 v18, v7, v11, -v18 ; D2820012 844A1707 v_mul_f32_e32 v20, v11, v6 ; 10280D0B v_mad_f32 v20, v5, v12, -v20 ; D2820014 84521905 v_mul_f32_e32 v21, v11, v11 ; 102A170B v_mac_f32_e32 v21, v12, v12 ; 3E2A190C v_mac_f32_e32 v21, v13, v13 ; 3E2A1B0D v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v22, v16, v16 ; 102C2110 v_mac_f32_e32 v22, v18, v18 ; 3E2C2512 v_mac_f32_e32 v22, v20, v20 ; 3E2C2914 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v13, v21, v13 ; 101A1B15 v_mul_f32_e32 v16, v22, v16 ; 10202116 v_mul_f32_e32 v18, v22, v18 ; 10242516 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mad_f32 v21, -v1, v1, 1.0 ; D2820015 23CA0301 v_mad_f32 v21, -v2, v2, v21 ; D2820015 24560502 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v22, v2, v2 ; 102C0502 v_mac_f32_e32 v22, v1, v1 ; 3E2C0301 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v16, v16, v0 ; 10200110 v_mul_f32_e32 v18, v18, v0 ; 10240112 v_mul_f32_e32 v0, v20, v0 ; 10000114 v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v16, v1, v16 ; 10202101 v_mul_f32_e32 v18, v1, v18 ; 10242501 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v22, v2 ; 10020516 v_mad_f32 v2, v1, v11, -v16 ; D2820002 84421701 v_mad_f32 v11, v1, v12, -v18 ; D282000B 844A1901 v_mad_f32 v0, v1, v13, -v0 ; D2820000 84021B01 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v11, v1, v6 ; 3E160D01 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mul_f32_e32 v1, v8, v3 ; 10020708 v_mul_f32_e32 v3, v9, v14 ; 10061D09 v_mul_f32_e32 v5, v10, v15 ; 100A1F0A v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mul_f32_e32 v3, v2, v2 ; 10060502 v_mac_f32_e32 v3, v11, v11 ; 3E06170B v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_cvt_pkrtz_f16_f32_e64 v5, v5, 0 ; D25E0005 00010105 exp 15, 0, 1, 0, 0, v1, v5, v1, v5 ; F800040F 05010501 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v19, 0 ; D25E0001 00010113 exp 15, 1, 1, 0, 0, v17, v1, v17, v1 ; F800041F 01110111 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v3, v2 ; 10020503 v_mul_f32_e32 v2, v3, v11 ; 10041703 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 624 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} IMM[2] UINT32 {4, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 6: MUL TEMP[3].x, CONST[1][3].xxxx, TEMP[2].xxxx 7: MUL TEMP[4].x, CONST[1][2].wwww, TEMP[2].zzzz 8: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz 9: RSQ TEMP[5].x, TEMP[5].xxxx 10: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx 11: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].xxxx 12: MOV TEMP[6].w, IMM[1].yyyy 13: MOV TEMP[6].x, TEMP[5].xxxx 14: MOV TEMP[6].y, TEMP[5].yyyy 15: MOV TEMP[6].z, TEMP[5].zzzz 16: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 17: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 18: KILL_IF -TEMP[0].xxxx 19: MOV TEMP[0].w, IMM[1].yyyy 20: MOV TEMP[0].x, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MOV TEMP[0].z, TEMP[1].zzzz 23: MOV TEMP[1].w, IMM[1].yyyy 24: MOV TEMP[1].x, TEMP[3].xxxx 25: MOV TEMP[1].y, TEMP[2].yyyy 26: MOV TEMP[1].z, TEMP[4].xxxx 27: MOV OUT[2], IN[1].wwww 28: MOV OUT[0], TEMP[0] 29: MOV OUT[3], TEMP[6] 30: MOV OUT[1], TEMP[1] 31: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %46 %62 = fmul float %59, %47 %63 = fmul float %60, %48 %64 = bitcast float %40 to i32 %65 = bitcast float %41 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %36, <16 x i8> %39, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = fmul float %29, %69 %73 = fmul float %28, %71 %74 = fmul float %43, %43 %75 = fmul float %44, %44 %76 = fadd float %75, %74 %77 = fmul float %45, %45 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %43, %79 %81 = fmul float %44, %79 %82 = fmul float %45, %79 %83 = fmul float %80, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %81, 5.000000e-01 %86 = fadd float %85, 5.000000e-01 %87 = fmul float %82, 5.000000e-01 %88 = fadd float %87, 5.000000e-01 %89 = fcmp olt float %57, %24 %90 = select i1 %89, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %90) %91 = call i32 @llvm.SI.packf16(float %61, float %62) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %63, float 0.000000e+00) %94 = bitcast i32 %93 to float %95 = call i32 @llvm.SI.packf16(float %72, float %70) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %73, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %92, float %94, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %96, float %98, float %96, float %98) %99 = call i32 @llvm.SI.packf16(float %84, float %86) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %88, float 0.000000e+00) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %42, float %42, float %42, float %42) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %100, float %102, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_cmp_gt_f32_e32 vcc, s7, v13 ; 7C081A07 v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_mac_f32_e32 v13, v7, v7 ; 3E1A0F07 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v7, v13, v7 ; 100E0F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 v_cndmask_b32_e64 v10, 0, -1.0, vcc ; D200000A 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v10 ; 7C261480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v8, v9 ; 5E041308 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v2, v0, v2, v0 ; F800040F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v3, 0 ; D25E0000 00010103 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v7, 0 ; D25E0001 00010107 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..10], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[4].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].xy, TEMP[3].xyyy 15: MOV TEMP[0].w, IMM[0].xxxx 16: TXL TEMP[0], TEMP[0], SAMP[0], 2D 17: MOV TEMP[2].xy, TEMP[3].xyyy 18: MOV TEMP[2].w, IMM[0].xxxx 19: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 20: MOV TEMP[3].xy, TEMP[3].xyyy 21: MOV TEMP[3].w, IMM[0].xxxx 22: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 23: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 24: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 25: AND TEMP[5], TEMP[5], IMM[3].xxxx 26: ABS TEMP[4], TEMP[4] 27: ADD TEMP[4], TEMP[4], -TEMP[5] 28: ADD TEMP[4], TEMP[4], IMM[3].yyyy 29: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 30: AND TEMP[6], TEMP[6], IMM[3].xxxx 31: ABS TEMP[4], TEMP[4] 32: ADD TEMP[4], TEMP[4], -TEMP[6] 33: MUL TEMP[4], TEMP[4], IMM[3].zzzz 34: MUL TEMP[6], TEMP[6], IMM[3].wwww 35: ADD TEMP[6], IMM[3].xxxx, -TEMP[6] 36: MUL TEMP[5], IMM[3].wwww, TEMP[5] 37: ADD TEMP[5].xzw, IMM[3].xxxx, -TEMP[5] 38: MOV TEMP[7].x, TEMP[4].xxxx 39: MOV TEMP[7].y, TEMP[4].yyyy 40: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 41: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 42: MOV TEMP[7].z, TEMP[8].xxxx 43: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 44: RSQ TEMP[8].x, TEMP[8].xxxx 45: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 46: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 47: MOV TEMP[9].x, TEMP[4].zzzz 48: MOV TEMP[9].y, TEMP[4].wwww 49: ADD TEMP[10].x, IMM[3].xxxx, -TEMP[4].zzzz 50: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 51: MOV TEMP[9].z, TEMP[4].xxxx 52: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 53: RSQ TEMP[4].x, TEMP[4].xxxx 54: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 55: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 56: MOV TEMP[9].w, IMM[0].xxxx 57: MOV TEMP[9].x, TEMP[8].xxxx 58: MOV TEMP[9].y, TEMP[8].yyyy 59: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 60: MOV TEMP[9].z, TEMP[7].xxxx 61: DP4 TEMP[7].x, TEMP[9], TEMP[0] 62: DP4 TEMP[8].x, TEMP[9], TEMP[2] 63: MOV TEMP[7].y, TEMP[8].xxxx 64: DP4 TEMP[8].x, TEMP[9], TEMP[3] 65: MOV TEMP[7].z, TEMP[8].xxxx 66: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 67: RSQ TEMP[8].x, TEMP[8].xxxx 68: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 69: MOV TEMP[8].w, IMM[0].xxxx 70: MOV TEMP[8].x, TEMP[6].xxxx 71: MOV TEMP[8].y, TEMP[6].yyyy 72: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 73: MOV TEMP[8].z, TEMP[4].xxxx 74: DP4 TEMP[4].x, TEMP[8], TEMP[0] 75: DP4 TEMP[6].x, TEMP[8], TEMP[2] 76: MOV TEMP[4].y, TEMP[6].xxxx 77: DP4 TEMP[6].x, TEMP[8], TEMP[3] 78: MOV TEMP[4].z, TEMP[6].xxxx 79: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 80: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 81: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 82: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 83: RSQ TEMP[6].x, TEMP[6].xxxx 84: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 85: MOV TEMP[6].x, TEMP[4].xxxx 86: MOV TEMP[6].y, TEMP[4].yyyy 87: MOV TEMP[6].z, TEMP[4].zzzz 88: MOV TEMP[6].w, TEMP[5].wwww 89: MOV TEMP[4].xy, IN[4].xyyy 90: MOV TEMP[4].w, IMM[0].xxxx 91: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 92: MUL TEMP[4].xyz, IN[0].xyzz, TEMP[4].zzzz 93: MOV TEMP[5].w, IMM[3].xxxx 94: MOV TEMP[5].x, TEMP[4].xxxx 95: MOV TEMP[5].y, TEMP[4].yyyy 96: MOV TEMP[5].z, TEMP[4].zzzz 97: DP4 TEMP[0].x, TEMP[5], TEMP[0] 98: DP4 TEMP[2].x, TEMP[5], TEMP[2] 99: DP4 TEMP[3].x, TEMP[5], TEMP[3] 100: MOV TEMP[4].x, TEMP[0].xxxx 101: MOV TEMP[4].y, TEMP[2].xxxx 102: MOV TEMP[4].z, TEMP[3].xxxx 103: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 104: MOV TEMP[5].x, TEMP[0].xxxx 105: MOV TEMP[5].y, TEMP[2].xxxx 106: MOV TEMP[5].z, TEMP[3].xxxx 107: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 108: MOV TEMP[5].w, TEMP[8].xxxx 109: MOV TEMP[8].x, TEMP[7].xxxx 110: MOV TEMP[8].y, TEMP[7].yyyy 111: MOV TEMP[8].z, TEMP[7].zzzz 112: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 113: RSQ TEMP[9].x, TEMP[9].xxxx 114: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 115: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 116: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 117: MUL TEMP[7].xyz, IMM[3].wwww, TEMP[7].xyzz 118: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 119: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 120: MOV_SAT TEMP[4].x, TEMP[4].xxxx 121: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 122: MOV_SAT TEMP[4].x, TEMP[4].xxxx 123: MOV TEMP[8].w, TEMP[4].xxxx 124: MOV TEMP[4].w, IMM[3].xxxx 125: MOV TEMP[4].x, TEMP[0].xxxx 126: MOV TEMP[4].y, TEMP[2].xxxx 127: MOV TEMP[4].z, TEMP[3].xxxx 128: MOV TEMP[0].x, CONST[4][0].wwww 129: MOV TEMP[0].y, CONST[4][1].wwww 130: MOV TEMP[0].z, CONST[4][2].wwww 131: MOV TEMP[0].w, CONST[4][3].wwww 132: DP4 TEMP[0].x, TEMP[4], TEMP[0] 133: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 134: MOV TEMP[3].xy, IN[4].xyyy 135: MOV TEMP[3].w, IMM[0].xxxx 136: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 137: MOV TEMP[7].x, CONST[4][0].xxxx 138: MOV TEMP[7].y, CONST[4][1].xxxx 139: MOV TEMP[7].z, CONST[4][2].xxxx 140: MOV TEMP[7].w, CONST[4][3].xxxx 141: DP4 TEMP[7].x, TEMP[4], TEMP[7] 142: MOV TEMP[9].x, CONST[4][0].yyyy 143: MOV TEMP[9].y, CONST[4][1].yyyy 144: MOV TEMP[9].z, CONST[4][2].yyyy 145: MOV TEMP[9].w, CONST[4][3].yyyy 146: DP4 TEMP[9].x, TEMP[4], TEMP[9] 147: MOV TEMP[7].y, -TEMP[9].xxxx 148: MOV TEMP[9].x, CONST[4][0].zzzz 149: MOV TEMP[9].y, CONST[4][1].zzzz 150: MOV TEMP[9].z, CONST[4][2].zzzz 151: MOV TEMP[9].w, CONST[4][3].zzzz 152: DP4 TEMP[4].x, TEMP[4], TEMP[9] 153: MAD TEMP[4].x, TEMP[4].xxxx, IMM[3].wwww, -TEMP[0].xxxx 154: MOV TEMP[7].z, TEMP[4].xxxx 155: MOV TEMP[7].w, TEMP[0].xxxx 156: MOV OUT[1], TEMP[1] 157: MOV OUT[2].xy, TEMP[2].xyxx 158: MOV OUT[4], TEMP[8] 159: MOV OUT[6], IMM[0].xxxx 160: MOV OUT[7], TEMP[6] 161: MOV OUT[5], TEMP[3] 162: MOV OUT[0], TEMP[7] 163: MOV OUT[3], TEMP[5] 164: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %10, %6 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = bitcast float %77 to i32 %85 = shl i32 %84, 2 %86 = add i32 %85, 8 %87 = sitofp i32 %86 to float %88 = fadd float %87, 5.000000e-01 %89 = fmul float %88, %41 %90 = fadd float %89, %82 %91 = fadd float %83, 0.000000e+00 %92 = call float @floor(float %90) %93 = fsub float %90, %92 %94 = fmul float %92, %42 %95 = fadd float %94, %91 %96 = bitcast float %93 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <4 x i32> undef, i32 %96, i32 0 %99 = insertelement <4 x i32> %98, i32 %97, i32 1 %100 = insertelement <4 x i32> %99, i32 0, i32 2 %101 = bitcast <8 x i32> %49 to <32 x i8> %102 = bitcast <4 x i32> %51 to <16 x i8> %103 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = extractelement <4 x float> %103, i32 3 %108 = bitcast float %93 to i32 %109 = bitcast float %95 to i32 %110 = insertelement <4 x i32> , i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 %109, i32 2 %112 = insertelement <4 x i32> %111, i32 0, i32 3 %113 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %112, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %93 to i32 %119 = bitcast float %95 to i32 %120 = insertelement <4 x i32> , i32 %118, i32 1 %121 = insertelement <4 x i32> %120, i32 %119, i32 2 %122 = insertelement <4 x i32> %121, i32 0, i32 3 %123 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %122, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = extractelement <4 x float> %123, i32 3 %128 = fmul float %63, 2.550000e+02 %129 = fadd float %128, -1.280000e+02 %130 = fmul float %64, 2.550000e+02 %131 = fadd float %130, -1.280000e+02 %132 = fmul float %65, 2.550000e+02 %133 = fadd float %132, -1.280000e+02 %134 = fmul float %66, 2.550000e+02 %135 = fadd float %134, -1.280000e+02 %136 = fcmp olt float %129, 0.000000e+00 %137 = fcmp olt float %131, 0.000000e+00 %138 = fcmp olt float %133, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = select i1 %136, float 1.000000e+00, float 0.000000e+00 %141 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %142 = select i1 %139, float 1.000000e+00, float 0.000000e+00 %143 = call float @fabs(float %129) %144 = call float @fabs(float %131) %145 = call float @fabs(float %133) %146 = call float @fabs(float %135) %147 = fsub float %143, %140 %148 = select i1 %137, float -1.000000e+00, float -0.000000e+00 %149 = fadd float %144, %148 %150 = fsub float %145, %141 %151 = fsub float %146, %142 %152 = fadd float %147, -6.400000e+01 %153 = fadd float %149, -6.400000e+01 %154 = fadd float %150, -6.400000e+01 %155 = fadd float %151, -6.400000e+01 %156 = fcmp olt float %152, 0.000000e+00 %157 = fcmp olt float %153, 0.000000e+00 %158 = fcmp olt float %154, 0.000000e+00 %159 = fcmp olt float %155, 0.000000e+00 %160 = select i1 %156, float 1.000000e+00, float 0.000000e+00 %161 = select i1 %157, float 1.000000e+00, float 0.000000e+00 %162 = select i1 %158, float 1.000000e+00, float 0.000000e+00 %163 = select i1 %159, float 1.000000e+00, float 0.000000e+00 %164 = call float @fabs(float %152) %165 = call float @fabs(float %153) %166 = call float @fabs(float %154) %167 = call float @fabs(float %155) %168 = fsub float %164, %160 %169 = fsub float %165, %161 %170 = fsub float %166, %162 %171 = fsub float %167, %163 %172 = fmul float %168, 0x3F90410420000000 %173 = fmul float %169, 0x3F90410420000000 %174 = fmul float %170, 0x3F90410420000000 %175 = fmul float %171, 0x3F90410420000000 %176 = fmul float %160, 2.000000e+00 %177 = fmul float %161, 2.000000e+00 %178 = fmul float %162, 2.000000e+00 %179 = fmul float %163, 2.000000e+00 %180 = fsub float 1.000000e+00, %176 %181 = fsub float 1.000000e+00, %177 %182 = fsub float 1.000000e+00, %178 %183 = fsub float 1.000000e+00, %179 %184 = fmul float %140, 2.000000e+00 %185 = fmul float %141, 2.000000e+00 %186 = fmul float %142, 2.000000e+00 %187 = fsub float 1.000000e+00, %184 %188 = fsub float 1.000000e+00, %185 %189 = fsub float 1.000000e+00, %186 %190 = fsub float 1.000000e+00, %172 %191 = fsub float %190, %173 %192 = fmul float %172, %172 %193 = fmul float %173, %173 %194 = fadd float %193, %192 %195 = fmul float %191, %191 %196 = fadd float %194, %195 %197 = call float @llvm.AMDGPU.rsq.clamped.f32(float %196) %198 = fmul float %172, %197 %199 = fmul float %173, %197 %200 = fmul float %191, %197 %201 = fmul float %198, %180 %202 = fmul float %199, %181 %203 = fsub float 1.000000e+00, %174 %204 = fsub float %203, %175 %205 = fmul float %174, %174 %206 = fmul float %175, %175 %207 = fadd float %206, %205 %208 = fmul float %204, %204 %209 = fadd float %207, %208 %210 = call float @llvm.AMDGPU.rsq.clamped.f32(float %209) %211 = fmul float %174, %210 %212 = fmul float %175, %210 %213 = fmul float %204, %210 %214 = fmul float %211, %182 %215 = fmul float %212, %183 %216 = fmul float %200, %187 %217 = fmul float %201, %104 %218 = fmul float %202, %105 %219 = fadd float %217, %218 %220 = fmul float %216, %106 %221 = fadd float %219, %220 %222 = fmul float %107, 0.000000e+00 %223 = fadd float %221, %222 %224 = fmul float %201, %114 %225 = fmul float %202, %115 %226 = fadd float %224, %225 %227 = fmul float %216, %116 %228 = fadd float %226, %227 %229 = fmul float %117, 0.000000e+00 %230 = fadd float %228, %229 %231 = fmul float %201, %124 %232 = fmul float %202, %125 %233 = fadd float %231, %232 %234 = fmul float %216, %126 %235 = fadd float %233, %234 %236 = fmul float %127, 0.000000e+00 %237 = fadd float %235, %236 %238 = fmul float %223, %223 %239 = fmul float %230, %230 %240 = fadd float %239, %238 %241 = fmul float %237, %237 %242 = fadd float %240, %241 %243 = call float @llvm.AMDGPU.rsq.clamped.f32(float %242) %244 = fmul float %223, %243 %245 = fmul float %230, %243 %246 = fmul float %237, %243 %247 = fmul float %213, %188 %248 = fmul float %214, %104 %249 = fmul float %215, %105 %250 = fadd float %248, %249 %251 = fmul float %247, %106 %252 = fadd float %250, %251 %253 = fmul float %107, 0.000000e+00 %254 = fadd float %252, %253 %255 = fmul float %214, %114 %256 = fmul float %215, %115 %257 = fadd float %255, %256 %258 = fmul float %247, %116 %259 = fadd float %257, %258 %260 = fmul float %117, 0.000000e+00 %261 = fadd float %259, %260 %262 = fmul float %214, %124 %263 = fmul float %215, %125 %264 = fadd float %262, %263 %265 = fmul float %247, %126 %266 = fadd float %264, %265 %267 = fmul float %127, 0.000000e+00 %268 = fadd float %266, %267 %269 = fmul float %254, %244 %270 = fmul float %261, %245 %271 = fadd float %270, %269 %272 = fmul float %268, %246 %273 = fadd float %271, %272 %274 = fmul float %273, %244 %275 = fmul float %273, %245 %276 = fmul float %273, %246 %277 = fsub float %254, %274 %278 = fsub float %261, %275 %279 = fsub float %268, %276 %280 = fmul float %277, %277 %281 = fmul float %278, %278 %282 = fadd float %281, %280 %283 = fmul float %279, %279 %284 = fadd float %282, %283 %285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284) %286 = fmul float %277, %285 %287 = fmul float %278, %285 %288 = fmul float %279, %285 %289 = bitcast float %82 to i32 %290 = bitcast float %83 to i32 %291 = insertelement <4 x i32> undef, i32 %289, i32 0 %292 = insertelement <4 x i32> %291, i32 %290, i32 1 %293 = insertelement <4 x i32> %292, i32 0, i32 2 %294 = bitcast <8 x i32> %49 to <32 x i8> %295 = bitcast <4 x i32> %51 to <16 x i8> %296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2) %297 = extractelement <4 x float> %296, i32 2 %298 = fmul float %56, %297 %299 = fmul float %57, %297 %300 = fmul float %58, %297 %301 = fmul float %298, %104 %302 = fmul float %299, %105 %303 = fadd float %301, %302 %304 = fmul float %300, %106 %305 = fadd float %303, %304 %306 = fadd float %305, %107 %307 = fmul float %298, %114 %308 = fmul float %299, %115 %309 = fadd float %307, %308 %310 = fmul float %300, %116 %311 = fadd float %309, %310 %312 = fadd float %311, %117 %313 = fmul float %298, %124 %314 = fmul float %299, %125 %315 = fadd float %313, %314 %316 = fmul float %300, %126 %317 = fadd float %315, %316 %318 = fadd float %317, %127 %319 = fsub float %306, %35 %320 = fsub float %312, %36 %321 = fsub float %318, %37 %322 = fmul float %38, %319 %323 = fmul float %39, %320 %324 = fadd float %323, %322 %325 = fmul float %40, %321 %326 = fadd float %324, %325 %327 = fmul float %319, %319 %328 = fmul float %320, %320 %329 = fadd float %328, %327 %330 = fmul float %321, %321 %331 = fadd float %329, %330 %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %319, %332 %334 = fmul float %320, %332 %335 = fmul float %321, %332 %336 = fmul float %244, %45 %337 = fmul float %245, %46 %338 = fadd float %337, %336 %339 = fmul float %246, %47 %340 = fadd float %338, %339 %341 = fmul float %340, %244 %342 = fmul float %340, %245 %343 = fmul float %340, %246 %344 = fmul float %341, 2.000000e+00 %345 = fmul float %342, 2.000000e+00 %346 = fmul float %343, 2.000000e+00 %347 = fsub float %45, %344 %348 = fsub float %46, %345 %349 = fsub float %47, %346 %350 = fmul float %333, %347 %351 = fsub float -0.000000e+00, %350 %352 = fmul float %334, %348 %353 = fsub float %351, %352 %354 = fmul float %335, %349 %355 = fsub float %353, %354 %356 = call float @llvm.AMDIL.clamp.(float %355, float 0.000000e+00, float 1.000000e+00) %357 = call float @llvm.pow.f32(float %356, float 1.600000e+01) %358 = call float @llvm.AMDIL.clamp.(float %357, float 0.000000e+00, float 1.000000e+00) %359 = fmul float %306, %22 %360 = fmul float %312, %26 %361 = fadd float %359, %360 %362 = fmul float %318, %30 %363 = fadd float %361, %362 %364 = fadd float %363, %34 %365 = fmul float %71, %13 %366 = fadd float %365, %15 %367 = fmul float %72, %14 %368 = fadd float %367, %16 %369 = bitcast float %82 to i32 %370 = bitcast float %83 to i32 %371 = insertelement <4 x i32> , i32 %369, i32 1 %372 = insertelement <4 x i32> %371, i32 %370, i32 2 %373 = insertelement <4 x i32> %372, i32 0, i32 3 %374 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %373, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %375 = extractelement <4 x float> %374, i32 0 %376 = extractelement <4 x float> %374, i32 1 %377 = extractelement <4 x float> %374, i32 2 %378 = extractelement <4 x float> %374, i32 3 %379 = fmul float %306, %19 %380 = fmul float %312, %23 %381 = fadd float %379, %380 %382 = fmul float %318, %27 %383 = fadd float %381, %382 %384 = fadd float %383, %31 %385 = fmul float %306, %20 %386 = fmul float %312, %24 %387 = fadd float %385, %386 %388 = fmul float %318, %28 %389 = fadd float %387, %388 %390 = fadd float %389, %32 %391 = fsub float -0.000000e+00, %390 %392 = fmul float %306, %21 %393 = fmul float %312, %25 %394 = fadd float %392, %393 %395 = fmul float %318, %29 %396 = fadd float %394, %395 %397 = fadd float %396, %33 %398 = fmul float %397, 2.000000e+00 %399 = fsub float %398, %364 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %366, float %368, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %306, float %312, float %318, float %326) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %244, float %245, float %246, float %358) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %375, float %376, float %377, float %378) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %286, float %287, float %288, float %189) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %384, float %391, float %399, float %364) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[14:17], v0, s[12:15], 0 idxen ; E00C2000 80030E00 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[17:20], v0, s[20:23], 0 idxen ; E00C2000 80051100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[19:22], v0, s[24:27], 0 idxen ; E00C2000 80061300 buffer_load_format_xyzw v[24:27], v3, s[8:11], 0 idxen ; E00C2000 80021803 s_buffer_load_dword s26, s[40:43], 0x6 ; C20D2906 s_buffer_load_dword s25, s[40:43], 0x7 ; C20CA907 s_buffer_load_dword s1, s[40:43], 0x8 ; C200A908 s_buffer_load_dword s2, s[40:43], 0x9 ; C2012909 s_buffer_load_dword s11, s[44:47], 0x51 ; C205AD51 s_buffer_load_dword s12, s[44:47], 0x52 ; C2062D52 s_buffer_load_dword s27, s[44:47], 0x66 ; C20DAD66 s_buffer_load_dword s40, s[44:47], 0x67 ; C2142D67 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s23, s[44:47], 0x4d ; C20BAD4D s_buffer_load_dword s22, s[44:47], 0x4e ; C20B2D4E s_buffer_load_dword s18, s[44:47], 0x50 ; C2092D50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s6, s[44:47], 0x5 ; C2032D05 s_buffer_load_dword s7, s[44:47], 0x6 ; C203AD06 s_buffer_load_dword s13, s[44:47], 0x7 ; C206AD07 s_buffer_load_dword s4, s[44:47], 0x8 ; C2022D08 s_buffer_load_dword s3, s[44:47], 0x9 ; C201AD09 s_buffer_load_dword s8, s[44:47], 0x0 ; C2042D00 s_buffer_load_dword s9, s[44:47], 0x1 ; C204AD01 s_buffer_load_dword s10, s[44:47], 0x2 ; C2052D02 s_buffer_load_dword s14, s[44:47], 0x3 ; C2072D03 s_buffer_load_dword s16, s[44:47], 0x4 ; C2082D04 s_buffer_load_dword s15, s[44:47], 0xa ; C207AD0A s_buffer_load_dword s17, s[44:47], 0xb ; C208AD0B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s1 ; 7E000201 s_buffer_load_dword s5, s[44:47], 0xc ; C202AD0C v_mov_b32_e32 v3, s2 ; 7E060202 s_buffer_load_dword s2, s[44:47], 0xd ; C2012D0D s_buffer_load_dword s1, s[44:47], 0xe ; C200AD0E v_mad_f32 v7, v2, v7, v1 ; D2820007 04060F02 v_lshlrev_b32_e32 v11, 2, v19 ; 34162682 v_add_i32_e32 v11, 8, v11 ; 4A161688 v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B v_mad_f32 v8, v2, v8, v1 ; D2820008 04061102 v_mad_f32 v9, v2, v9, v1 ; D2820009 04061302 v_mac_f32_e32 v1, v2, v10 ; 3E021502 v_add_f32_e32 v2, 0.5, v11 ; 060416F0 v_mad_f32 v2, s27, v2, v24 ; D2820002 0462041B v_add_f32_e32 v12, 0, v25 ; 06183280 v_floor_f32_e32 v10, v2 ; 7E144902 v_subrev_f32_e32 v11, v10, v2 ; 0A16050A v_mac_f32_e32 v12, s40, v10 ; 3E181428 v_mac_f32_e32 v0, s26, v17 ; 3E00221A image_sample_l v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[32:39], s[28:31] ; F0900F00 00E8130B v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 image_sample_l_o v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0D00F00 00E81B0A v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 v_mac_f32_e32 v3, s25, v18 ; 3E062419 v_mov_b32_e32 v26, v13 ; 7E34030D image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0D00F00 00E81F0A s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample_l v2, 4, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[32:39], s[28:31] ; F0900400 00E80218 v_mov_b32_e32 v23, 0x30003 ; 7E2E02FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v2, v14 ; 10141D02 v_mul_f32_e32 v11, v2, v15 ; 10161F02 v_mul_f32_e32 v2, v2, v16 ; 10042102 v_mov_b32_e32 v26, v13 ; 7E34030D image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[32:39], s[28:31] ; F0D00F00 00E80E17 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v4, |v8|, v4 ; D2060104 00020908 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480 v_sub_f32_e64 v7, |v7|, v8 ; D2080107 00021107 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480 v_sub_f32_e64 v9, |v9|, v12 ; D2080109 00021909 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v18, 0, 1.0, vcc ; D2000012 01A9E480 v_sub_f32_e64 v1, |v1|, v18 ; D2080101 00022501 v_add_f32_e32 v7, v5, v7 ; 060E0F05 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v9, v5, v9 ; 06121305 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v7, |v7|, v5 ; D2080107 00020B07 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v23, 0, 1.0, vcc ; D2000017 01A9E480 v_sub_f32_e64 v4, |v4|, v23 ; D2080104 00022F04 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v24, 0, 1.0, vcc ; D2000018 01A9E480 v_sub_f32_e64 v9, |v9|, v24 ; D2080109 00023109 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v25, 0, 1.0, vcc ; D2000019 01A9E480 v_sub_f32_e64 v1, |v1|, v25 ; D2080101 00023301 v_mul_f32_e32 v26, v6, v7 ; 10340F06 v_mad_f32 v7, -v7, v6, 1.0 ; D2820007 23CA0D07 v_mad_f32 v7, -v4, v6, v7 ; D2820007 241E0D04 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v35, v6, v9 ; 10461306 v_mad_f32 v9, -v9, v6, 1.0 ; D2820009 23CA0D09 v_mad_f32 v9, -v1, v6, v9 ; D2820009 24260D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v26, v26 ; 100C351A v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v7, v7 ; 3E0C0F07 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v36, v35, v35 ; 10484723 v_mac_f32_e32 v36, v1, v1 ; 3E480301 v_mac_f32_e32 v36, v9, v9 ; 3E481309 v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v26, v6, v26 ; 10343506 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_mul_f32_e32 v7, v36, v35 ; 100E4724 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v9, v36, v9 ; 10121324 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v26 ; 100A3505 v_mad_f32 v23, -2.0, v23, 1.0 ; D2820017 03CA2EF5 v_mul_f32_e32 v4, v23, v4 ; 10080917 v_mad_f32 v23, -2.0, v24, 1.0 ; D2820017 03CA30F5 v_mul_f32_e32 v7, v23, v7 ; 100E0F17 v_mad_f32 v23, -2.0, v25, 1.0 ; D2820017 03CA32F5 v_mul_f32_e32 v1, v23, v1 ; 10020317 v_mad_f32 v8, -2.0, v8, 1.0 ; D2820008 03CA10F5 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_mad_f32 v8, -2.0, v12, 1.0 ; D2820008 03CA18F5 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v9, v20, v11 ; 10121714 v_mac_f32_e32 v9, v19, v10 ; 3E121513 v_mac_f32_e32 v9, v21, v2 ; 3E120515 v_add_f32_e32 v9, v22, v9 ; 06121316 v_mul_f32_e32 v12, v20, v4 ; 10180914 v_mac_f32_e32 v12, v19, v5 ; 3E180B13 v_mul_f32_e32 v20, v20, v1 ; 10280314 v_mac_f32_e32 v20, v19, v7 ; 3E280F13 v_mac_f32_e32 v12, v21, v6 ; 3E180D15 v_mac_f32_e32 v20, v21, v8 ; 3E281115 v_mac_f32_e32 v12, 0, v22 ; 3E182C80 v_mac_f32_e32 v20, 0, v22 ; 3E282C80 v_mul_f32_e32 v19, v28, v11 ; 1026171C v_mac_f32_e32 v19, v27, v10 ; 3E26151B v_mac_f32_e32 v19, v29, v2 ; 3E26051D v_add_f32_e32 v19, v30, v19 ; 0626271E v_mul_f32_e32 v21, v28, v4 ; 102A091C v_mac_f32_e32 v21, v27, v5 ; 3E2A0B1B v_mul_f32_e32 v22, v28, v1 ; 102C031C v_mac_f32_e32 v22, v27, v7 ; 3E2C0F1B v_mac_f32_e32 v21, v29, v6 ; 3E2A0D1D v_mac_f32_e32 v22, v29, v8 ; 3E2C111D v_mac_f32_e32 v21, 0, v30 ; 3E2A3C80 v_mac_f32_e32 v22, 0, v30 ; 3E2C3C80 v_mul_f32_e32 v11, v32, v11 ; 10161720 v_mac_f32_e32 v11, v31, v10 ; 3E16151F v_mac_f32_e32 v11, v33, v2 ; 3E160521 v_mul_f32_e32 v2, v32, v4 ; 10040920 v_mac_f32_e32 v2, v31, v5 ; 3E040B1F v_mul_f32_e32 v1, v32, v1 ; 10020320 v_mac_f32_e32 v1, v31, v7 ; 3E020F1F v_mac_f32_e32 v2, v33, v6 ; 3E040D21 v_mac_f32_e32 v1, v33, v8 ; 3E021121 v_add_f32_e32 v4, v34, v11 ; 06081722 v_mac_f32_e32 v2, 0, v34 ; 3E044480 v_mul_f32_e32 v5, v12, v12 ; 100A190C v_mac_f32_e32 v5, v21, v21 ; 3E0A2B15 v_mac_f32_e32 v5, v2, v2 ; 3E0A0502 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mac_f32_e32 v1, 0, v34 ; 3E024480 exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D exp 15, 33, 0, 0, 0, v0, v3, v13, v13 ; F800021F 0D0D0300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v5, v12 ; 10001905 v_mul_f32_e32 v3, v5, v21 ; 10062B05 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_subrev_f32_e32 v5, s24, v9 ; 0A0A1218 v_subrev_f32_e32 v6, s23, v19 ; 0A0C2617 v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 v_subrev_f32_e32 v8, s22, v4 ; 0A100816 v_mac_f32_e32 v7, v8, v8 ; 3E0E1108 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v10, s20, v3 ; 3E140614 v_mac_f32_e32 v10, s19, v2 ; 3E140413 v_mul_f32_e32 v11, v0, v10 ; 10161500 v_mad_f32 v11, -2.0, v11, s21 ; D282000B 005616F5 v_mul_f32_e32 v12, v7, v5 ; 10180B07 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v3, v10 ; 10181503 v_mad_f32 v12, -2.0, v12, s20 ; D282000C 005218F5 v_mul_f32_e32 v21, v7, v6 ; 102A0D07 v_mad_f32 v11, -v21, v12, -v11 ; D282000B A42E1915 v_mul_f32_e32 v10, v2, v10 ; 10141502 v_mad_f32 v10, -2.0, v10, s19 ; D282000A 004E14F5 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mad_f32 v7, -v7, v10, v11 ; D2820007 242E1507 v_mul_f32_e32 v5, s18, v5 ; 100A0A12 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mac_f32_e32 v5, s11, v6 ; 3E0A0C0B v_mac_f32_e32 v5, s12, v8 ; 3E0A100C exp 15, 34, 0, 0, 0, v9, v19, v4, v5 ; F800022F 05041309 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v5, 0x41800000, v7 ; 0E0A0EFF 41800000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 exp 15, 35, 0, 0, 0, v0, v3, v2, v5 ; F800023F 05020300 exp 15, 36, 0, 0, 0, v14, v15, v16, v17 ; F800024F 11100F0E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s13, v19 ; 100A260D v_mul_f32_e32 v6, s16, v19 ; 100C2610 v_mul_f32_e32 v7, s6, v19 ; 100E2606 v_mul_f32_e32 v8, s7, v19 ; 10102607 v_mac_f32_e32 v5, s14, v9 ; 3E0A120E v_mac_f32_e32 v6, s8, v9 ; 3E0C1208 v_mac_f32_e32 v7, s9, v9 ; 3E0E1209 v_mac_f32_e32 v8, s10, v9 ; 3E10120A v_mac_f32_e32 v5, s17, v4 ; 3E0A0811 v_mac_f32_e32 v6, s4, v4 ; 3E0C0804 v_mac_f32_e32 v7, s3, v4 ; 3E0E0803 v_mac_f32_e32 v8, s15, v4 ; 3E10080F v_mul_f32_e32 v4, v0, v20 ; 10082900 v_mac_f32_e32 v4, v3, v22 ; 3E082D03 v_mac_f32_e32 v4, v2, v1 ; 3E080302 v_mad_f32 v0, -v4, v0, v20 ; D2820000 24520104 v_mad_f32 v3, -v4, v3, v22 ; D2820003 245A0704 v_mad_f32 v1, -v4, v2, v1 ; D2820001 24060504 v_add_f32_e32 v2, s0, v5 ; 06040A00 v_add_f32_e32 v4, s5, v6 ; 06080C05 v_mul_f32_e32 v5, v0, v0 ; 100A0100 v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 v_mac_f32_e32 v5, v1, v1 ; 3E0A0301 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_add_f32_e32 v6, s2, v7 ; 060C0E02 v_mad_f32 v7, -2.0, v18, 1.0 ; D2820007 03CA24F5 v_add_f32_e32 v8, s1, v8 ; 06101001 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mul_f32_e32 v1, v5, v1 ; 10020305 exp 15, 37, 0, 0, 0, v13, v13, v13, v13 ; F800025F 0D0D0D0D exp 15, 38, 0, 0, 0, v0, v3, v1, v7 ; F800026F 07010300 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v6 ; 3A000CFF 80000000 v_mad_f32 v1, 2.0, v8, -v2 ; D2820001 840A10F4 exp 15, 12, 0, 0, 0, v4, v0, v1, v2 ; F80000CF 02010004 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 1368 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 320, 304} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {12, 28, 44, 60} IMM[5] UINT32 {0, 24, 32, 16} IMM[6] INT32 {3, 0, 0, 0} IMM[7] UINT32 {48, 4, 20, 36} IMM[8] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].y, IMM[0].xxxx 15: SHL TEMP[2].x, IN[3].yyyy, IMM[2].xxxx 16: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 17: I2F TEMP[2].x, TEMP[2].xxxx 18: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 21: FLR TEMP[2].x, TEMP[0].xxxx 22: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[2].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 24: MOV TEMP[4].y, TEMP[0].xxxx 25: MOV TEMP[0].y, IMM[0].xxxx 26: SHL TEMP[2].x, IN[3].zzzz, IMM[2].xxxx 27: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 28: I2F TEMP[2].x, TEMP[2].xxxx 29: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 30: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 31: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 32: FLR TEMP[2].x, TEMP[0].xxxx 33: ADD TEMP[5].x, TEMP[0].xxxx, -TEMP[2].xxxx 34: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 35: MOV TEMP[5].y, TEMP[0].xxxx 36: MOV TEMP[0].xy, TEMP[5].xyyy 37: MOV TEMP[0].w, IMM[0].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[4].xyyy 40: MOV TEMP[2].w, IMM[0].xxxx 41: TXL TEMP[2], TEMP[2], SAMP[0], 2D 42: MOV TEMP[6].xy, TEMP[3].xyyy 43: MOV TEMP[6].w, IMM[0].xxxx 44: TXL TEMP[6], TEMP[6], SAMP[0], 2D 45: MUL TEMP[6], IN[4].xxxx, TEMP[6] 46: MAD TEMP[2], IN[4].yyyy, TEMP[2], TEMP[6] 47: MAD TEMP[0], IN[4].zzzz, TEMP[0], TEMP[2] 48: MOV TEMP[2].xy, TEMP[5].xyyy 49: MOV TEMP[2].w, IMM[0].xxxx 50: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 51: MOV TEMP[6].xy, TEMP[3].xyyy 52: MOV TEMP[6].w, IMM[0].xxxx 53: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[2].zwz 54: MOV TEMP[7].xy, TEMP[4].xyyy 55: MOV TEMP[7].w, IMM[0].xxxx 56: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[2].zwz 57: MUL TEMP[7], IN[4].yyyy, TEMP[7] 58: MAD TEMP[6], IN[4].xxxx, TEMP[6], TEMP[7] 59: MAD TEMP[2], IN[4].zzzz, TEMP[2], TEMP[6] 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[2].xwx 63: MOV TEMP[4].xy, TEMP[4].xyyy 64: MOV TEMP[4].w, IMM[0].xxxx 65: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[2].xwx 66: MOV TEMP[3].xy, TEMP[3].xyyy 67: MOV TEMP[3].w, IMM[0].xxxx 68: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 69: MUL TEMP[3], IN[4].xxxx, TEMP[3] 70: MAD TEMP[3], IN[4].yyyy, TEMP[4], TEMP[3] 71: MAD TEMP[3], IN[4].zzzz, TEMP[5], TEMP[3] 72: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 73: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 74: AND TEMP[5], TEMP[5], IMM[3].xxxx 75: ABS TEMP[4], TEMP[4] 76: ADD TEMP[4], TEMP[4], -TEMP[5] 77: ADD TEMP[4], TEMP[4], IMM[3].yyyy 78: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 79: AND TEMP[6], TEMP[6], IMM[3].xxxx 80: ABS TEMP[4], TEMP[4] 81: ADD TEMP[4], TEMP[4], -TEMP[6] 82: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 83: MOV TEMP[7].x, TEMP[4].xxxx 84: MOV TEMP[7].y, TEMP[4].yyyy 85: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 86: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 87: MOV TEMP[7].z, TEMP[4].xxxx 88: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 89: RSQ TEMP[4].x, TEMP[4].xxxx 90: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 91: MUL TEMP[6], TEMP[6], IMM[3].wwww 92: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 93: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 94: MOV TEMP[7].w, IMM[0].xxxx 95: MOV TEMP[7].x, TEMP[6].xxxx 96: MOV TEMP[7].y, TEMP[6].yyyy 97: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 98: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 99: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 100: MOV TEMP[7].z, TEMP[4].xxxx 101: DP4 TEMP[4].x, TEMP[7], TEMP[0] 102: DP4 TEMP[5].x, TEMP[7], TEMP[2] 103: MOV TEMP[4].y, TEMP[5].xxxx 104: DP4 TEMP[5].x, TEMP[7], TEMP[3] 105: MOV TEMP[4].z, TEMP[5].xxxx 106: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 109: MOV TEMP[5].xy, IN[5].xyyy 110: MOV TEMP[5].w, IMM[0].xxxx 111: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 112: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 113: MOV TEMP[6].w, IMM[3].xxxx 114: MOV TEMP[6].x, TEMP[5].xxxx 115: MOV TEMP[6].y, TEMP[5].yyyy 116: MOV TEMP[6].z, TEMP[5].zzzz 117: DP4 TEMP[0].x, TEMP[6], TEMP[0] 118: DP4 TEMP[2].x, TEMP[6], TEMP[2] 119: DP4 TEMP[3].x, TEMP[6], TEMP[3] 120: MOV TEMP[5].x, TEMP[0].xxxx 121: MOV TEMP[5].y, TEMP[2].xxxx 122: MOV TEMP[5].z, TEMP[3].xxxx 123: MOV TEMP[6].x, TEMP[0].xxxx 124: MOV TEMP[6].y, TEMP[2].xxxx 125: MOV TEMP[6].z, TEMP[3].xxxx 126: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 127: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[5].xyzz 128: MOV TEMP[6].w, TEMP[5].xxxx 129: MOV TEMP[5].w, IMM[0].xxxx 130: MOV TEMP[5].x, TEMP[4].xxxx 131: MOV TEMP[5].y, TEMP[4].yyyy 132: MOV TEMP[5].z, TEMP[4].zzzz 133: MOV TEMP[4].w, IMM[3].xxxx 134: MOV TEMP[4].x, TEMP[0].xxxx 135: MOV TEMP[4].y, TEMP[2].xxxx 136: MOV TEMP[4].z, TEMP[3].xxxx 137: MOV TEMP[0].x, CONST[4][0].wwww 138: MOV TEMP[0].y, CONST[4][1].wwww 139: MOV TEMP[0].z, CONST[4][2].wwww 140: MOV TEMP[0].w, CONST[4][3].wwww 141: DP4 TEMP[0].x, TEMP[4], TEMP[0] 142: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 143: MOV TEMP[3].xy, IN[5].xyyy 144: MOV TEMP[3].w, IMM[0].xxxx 145: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[6].xyx 146: MOV TEMP[7].x, CONST[4][0].xxxx 147: MOV TEMP[7].y, CONST[4][1].xxxx 148: MOV TEMP[7].z, CONST[4][2].xxxx 149: MOV TEMP[7].w, CONST[4][3].xxxx 150: DP4 TEMP[7].x, TEMP[4], TEMP[7] 151: MOV TEMP[8].x, CONST[4][0].yyyy 152: MOV TEMP[8].y, CONST[4][1].yyyy 153: MOV TEMP[8].z, CONST[4][2].yyyy 154: MOV TEMP[8].w, CONST[4][3].yyyy 155: DP4 TEMP[8].x, TEMP[4], TEMP[8] 156: MOV TEMP[7].y, -TEMP[8].xxxx 157: MOV TEMP[8].x, CONST[4][0].zzzz 158: MOV TEMP[8].y, CONST[4][1].zzzz 159: MOV TEMP[8].z, CONST[4][2].zzzz 160: MOV TEMP[8].w, CONST[4][3].zzzz 161: DP4 TEMP[4].x, TEMP[4], TEMP[8] 162: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 163: MOV TEMP[7].z, TEMP[4].xxxx 164: MOV TEMP[7].w, TEMP[0].xxxx 165: MOV OUT[1], TEMP[1] 166: MOV OUT[2].xy, TEMP[2].xyxx 167: MOV OUT[4], TEMP[5] 168: MOV OUT[6], IMM[0].xxxx 169: MOV OUT[5], TEMP[3] 170: MOV OUT[0], TEMP[7] 171: MOV OUT[3], TEMP[6] 172: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %10, %6 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = bitcast float %72 to i32 %89 = shl i32 %88, 2 %90 = add i32 %89, 8 %91 = sitofp i32 %90 to float %92 = fadd float %91, 5.000000e-01 %93 = fmul float %92, %41 %94 = fadd float %93, %86 %95 = fadd float %87, 0.000000e+00 %96 = call float @floor(float %94) %97 = fsub float %94, %96 %98 = fmul float %96, %42 %99 = fadd float %98, %95 %100 = bitcast float %73 to i32 %101 = shl i32 %100, 2 %102 = add i32 %101, 8 %103 = sitofp i32 %102 to float %104 = fadd float %103, 5.000000e-01 %105 = fmul float %104, %41 %106 = fadd float %105, %86 %107 = fadd float %87, 0.000000e+00 %108 = call float @floor(float %106) %109 = fsub float %106, %108 %110 = fmul float %108, %42 %111 = fadd float %110, %107 %112 = bitcast float %74 to i32 %113 = shl i32 %112, 2 %114 = add i32 %113, 8 %115 = sitofp i32 %114 to float %116 = fadd float %115, 5.000000e-01 %117 = fmul float %116, %41 %118 = fadd float %117, %86 %119 = fadd float %87, 0.000000e+00 %120 = call float @floor(float %118) %121 = fsub float %118, %120 %122 = fmul float %120, %42 %123 = fadd float %122, %119 %124 = bitcast float %121 to i32 %125 = bitcast float %123 to i32 %126 = insertelement <4 x i32> undef, i32 %124, i32 0 %127 = insertelement <4 x i32> %126, i32 %125, i32 1 %128 = insertelement <4 x i32> %127, i32 0, i32 2 %129 = bitcast <8 x i32> %44 to <32 x i8> %130 = bitcast <4 x i32> %46 to <16 x i8> %131 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = extractelement <4 x float> %131, i32 3 %136 = bitcast float %109 to i32 %137 = bitcast float %111 to i32 %138 = insertelement <4 x i32> undef, i32 %136, i32 0 %139 = insertelement <4 x i32> %138, i32 %137, i32 1 %140 = insertelement <4 x i32> %139, i32 0, i32 2 %141 = bitcast <8 x i32> %44 to <32 x i8> %142 = bitcast <4 x i32> %46 to <16 x i8> %143 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %140, <32 x i8> %141, <16 x i8> %142, i32 2) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = extractelement <4 x float> %143, i32 3 %148 = bitcast float %97 to i32 %149 = bitcast float %99 to i32 %150 = insertelement <4 x i32> undef, i32 %148, i32 0 %151 = insertelement <4 x i32> %150, i32 %149, i32 1 %152 = insertelement <4 x i32> %151, i32 0, i32 2 %153 = bitcast <8 x i32> %44 to <32 x i8> %154 = bitcast <4 x i32> %46 to <16 x i8> %155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %152, <32 x i8> %153, <16 x i8> %154, i32 2) %156 = extractelement <4 x float> %155, i32 0 %157 = extractelement <4 x float> %155, i32 1 %158 = extractelement <4 x float> %155, i32 2 %159 = extractelement <4 x float> %155, i32 3 %160 = fmul float %79, %156 %161 = fmul float %79, %157 %162 = fmul float %79, %158 %163 = fmul float %79, %159 %164 = fmul float %80, %144 %165 = fadd float %164, %160 %166 = fmul float %80, %145 %167 = fadd float %166, %161 %168 = fmul float %80, %146 %169 = fadd float %168, %162 %170 = fmul float %80, %147 %171 = fadd float %170, %163 %172 = fmul float %81, %132 %173 = fadd float %172, %165 %174 = fmul float %81, %133 %175 = fadd float %174, %167 %176 = fmul float %81, %134 %177 = fadd float %176, %169 %178 = fmul float %81, %135 %179 = fadd float %178, %171 %180 = bitcast float %121 to i32 %181 = bitcast float %123 to i32 %182 = insertelement <4 x i32> , i32 %180, i32 1 %183 = insertelement <4 x i32> %182, i32 %181, i32 2 %184 = insertelement <4 x i32> %183, i32 0, i32 3 %185 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %184, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %186 = extractelement <4 x float> %185, i32 0 %187 = extractelement <4 x float> %185, i32 1 %188 = extractelement <4 x float> %185, i32 2 %189 = extractelement <4 x float> %185, i32 3 %190 = bitcast float %97 to i32 %191 = bitcast float %99 to i32 %192 = insertelement <4 x i32> , i32 %190, i32 1 %193 = insertelement <4 x i32> %192, i32 %191, i32 2 %194 = insertelement <4 x i32> %193, i32 0, i32 3 %195 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %194, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = extractelement <4 x float> %195, i32 3 %200 = bitcast float %109 to i32 %201 = bitcast float %111 to i32 %202 = insertelement <4 x i32> , i32 %200, i32 1 %203 = insertelement <4 x i32> %202, i32 %201, i32 2 %204 = insertelement <4 x i32> %203, i32 0, i32 3 %205 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %204, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %206 = extractelement <4 x float> %205, i32 0 %207 = extractelement <4 x float> %205, i32 1 %208 = extractelement <4 x float> %205, i32 2 %209 = extractelement <4 x float> %205, i32 3 %210 = fmul float %80, %206 %211 = fmul float %80, %207 %212 = fmul float %80, %208 %213 = fmul float %80, %209 %214 = fmul float %79, %196 %215 = fadd float %214, %210 %216 = fmul float %79, %197 %217 = fadd float %216, %211 %218 = fmul float %79, %198 %219 = fadd float %218, %212 %220 = fmul float %79, %199 %221 = fadd float %220, %213 %222 = fmul float %81, %186 %223 = fadd float %222, %215 %224 = fmul float %81, %187 %225 = fadd float %224, %217 %226 = fmul float %81, %188 %227 = fadd float %226, %219 %228 = fmul float %81, %189 %229 = fadd float %228, %221 %230 = bitcast float %121 to i32 %231 = bitcast float %123 to i32 %232 = insertelement <4 x i32> , i32 %230, i32 1 %233 = insertelement <4 x i32> %232, i32 %231, i32 2 %234 = insertelement <4 x i32> %233, i32 0, i32 3 %235 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %234, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = extractelement <4 x float> %235, i32 3 %240 = bitcast float %109 to i32 %241 = bitcast float %111 to i32 %242 = insertelement <4 x i32> , i32 %240, i32 1 %243 = insertelement <4 x i32> %242, i32 %241, i32 2 %244 = insertelement <4 x i32> %243, i32 0, i32 3 %245 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %244, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = extractelement <4 x float> %245, i32 2 %249 = extractelement <4 x float> %245, i32 3 %250 = bitcast float %97 to i32 %251 = bitcast float %99 to i32 %252 = insertelement <4 x i32> , i32 %250, i32 1 %253 = insertelement <4 x i32> %252, i32 %251, i32 2 %254 = insertelement <4 x i32> %253, i32 0, i32 3 %255 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %254, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %256 = extractelement <4 x float> %255, i32 0 %257 = extractelement <4 x float> %255, i32 1 %258 = extractelement <4 x float> %255, i32 2 %259 = extractelement <4 x float> %255, i32 3 %260 = fmul float %79, %256 %261 = fmul float %79, %257 %262 = fmul float %79, %258 %263 = fmul float %79, %259 %264 = fmul float %80, %246 %265 = fadd float %264, %260 %266 = fmul float %80, %247 %267 = fadd float %266, %261 %268 = fmul float %80, %248 %269 = fadd float %268, %262 %270 = fmul float %80, %249 %271 = fadd float %270, %263 %272 = fmul float %81, %236 %273 = fadd float %272, %265 %274 = fmul float %81, %237 %275 = fadd float %274, %267 %276 = fmul float %81, %238 %277 = fadd float %276, %269 %278 = fmul float %81, %239 %279 = fadd float %278, %271 %280 = fmul float %58, 2.550000e+02 %281 = fadd float %280, -1.280000e+02 %282 = fmul float %59, 2.550000e+02 %283 = fadd float %282, -1.280000e+02 %284 = fmul float %60, 2.550000e+02 %285 = fadd float %284, -1.280000e+02 %286 = fmul float %61, 2.550000e+02 %287 = fadd float %286, -1.280000e+02 %288 = fcmp olt float %281, 0.000000e+00 %289 = fcmp olt float %283, 0.000000e+00 %290 = fcmp olt float %285, 0.000000e+00 %291 = fcmp olt float %287, 0.000000e+00 %292 = select i1 %288, float 1.000000e+00, float 0.000000e+00 %293 = call float @fabs(float %281) %294 = call float @fabs(float %283) %295 = call float @fabs(float %285) %296 = call float @fabs(float %287) %297 = fsub float %293, %292 %298 = select i1 %289, float -1.000000e+00, float -0.000000e+00 %299 = fadd float %294, %298 %300 = select i1 %290, float -1.000000e+00, float -0.000000e+00 %301 = fadd float %295, %300 %302 = select i1 %291, float -1.000000e+00, float -0.000000e+00 %303 = fadd float %296, %302 %304 = fadd float %297, -6.400000e+01 %305 = fadd float %299, -6.400000e+01 %306 = fadd float %301, -6.400000e+01 %307 = fadd float %303, -6.400000e+01 %308 = fcmp olt float %304, 0.000000e+00 %309 = fcmp olt float %305, 0.000000e+00 %310 = select i1 %308, float 1.000000e+00, float 0.000000e+00 %311 = select i1 %309, float 1.000000e+00, float 0.000000e+00 %312 = call float @fabs(float %304) %313 = call float @fabs(float %305) %314 = call float @fabs(float %306) %315 = call float @fabs(float %307) %316 = fsub float %312, %310 %317 = fsub float %313, %311 %318 = fmul float %316, 0x3F90410420000000 %319 = fmul float %317, 0x3F90410420000000 %320 = fsub float 1.000000e+00, %318 %321 = fsub float %320, %319 %322 = fmul float %318, %318 %323 = fmul float %319, %319 %324 = fadd float %323, %322 %325 = fmul float %321, %321 %326 = fadd float %324, %325 %327 = call float @llvm.AMDGPU.rsq.clamped.f32(float %326) %328 = fmul float %318, %327 %329 = fmul float %319, %327 %330 = fmul float %321, %327 %331 = fmul float %310, 2.000000e+00 %332 = fmul float %311, 2.000000e+00 %333 = fsub float 1.000000e+00, %331 %334 = fsub float 1.000000e+00, %332 %335 = fmul float %328, %333 %336 = fmul float %329, %334 %337 = fmul float %292, 2.000000e+00 %338 = fsub float 1.000000e+00, %337 %339 = fmul float %338, %330 %340 = fmul float %335, %173 %341 = fmul float %336, %175 %342 = fadd float %340, %341 %343 = fmul float %339, %177 %344 = fadd float %342, %343 %345 = fmul float %179, 0.000000e+00 %346 = fadd float %344, %345 %347 = fmul float %335, %223 %348 = fmul float %336, %225 %349 = fadd float %347, %348 %350 = fmul float %339, %227 %351 = fadd float %349, %350 %352 = fmul float %229, 0.000000e+00 %353 = fadd float %351, %352 %354 = fmul float %335, %273 %355 = fmul float %336, %275 %356 = fadd float %354, %355 %357 = fmul float %339, %277 %358 = fadd float %356, %357 %359 = fmul float %279, 0.000000e+00 %360 = fadd float %358, %359 %361 = fmul float %346, %346 %362 = fmul float %353, %353 %363 = fadd float %362, %361 %364 = fmul float %360, %360 %365 = fadd float %363, %364 %366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365) %367 = fmul float %346, %366 %368 = fmul float %353, %366 %369 = fmul float %360, %366 %370 = bitcast float %86 to i32 %371 = bitcast float %87 to i32 %372 = insertelement <4 x i32> undef, i32 %370, i32 0 %373 = insertelement <4 x i32> %372, i32 %371, i32 1 %374 = insertelement <4 x i32> %373, i32 0, i32 2 %375 = bitcast <8 x i32> %44 to <32 x i8> %376 = bitcast <4 x i32> %46 to <16 x i8> %377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2) %378 = extractelement <4 x float> %377, i32 2 %379 = fmul float %51, %378 %380 = fmul float %52, %378 %381 = fmul float %53, %378 %382 = fmul float %379, %173 %383 = fmul float %380, %175 %384 = fadd float %382, %383 %385 = fmul float %381, %177 %386 = fadd float %384, %385 %387 = fadd float %386, %179 %388 = fmul float %379, %223 %389 = fmul float %380, %225 %390 = fadd float %388, %389 %391 = fmul float %381, %227 %392 = fadd float %390, %391 %393 = fadd float %392, %229 %394 = fmul float %379, %273 %395 = fmul float %380, %275 %396 = fadd float %394, %395 %397 = fmul float %381, %277 %398 = fadd float %396, %397 %399 = fadd float %398, %279 %400 = fsub float %387, %35 %401 = fsub float %393, %36 %402 = fsub float %399, %37 %403 = fmul float %38, %400 %404 = fmul float %39, %401 %405 = fadd float %404, %403 %406 = fmul float %40, %402 %407 = fadd float %405, %406 %408 = fmul float %387, %22 %409 = fmul float %393, %26 %410 = fadd float %408, %409 %411 = fmul float %399, %30 %412 = fadd float %410, %411 %413 = fadd float %412, %34 %414 = fmul float %66, %13 %415 = fadd float %414, %15 %416 = fmul float %67, %14 %417 = fadd float %416, %16 %418 = bitcast float %86 to i32 %419 = bitcast float %87 to i32 %420 = insertelement <4 x i32> , i32 %418, i32 1 %421 = insertelement <4 x i32> %420, i32 %419, i32 2 %422 = insertelement <4 x i32> %421, i32 0, i32 3 %423 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %422, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %424 = extractelement <4 x float> %423, i32 0 %425 = extractelement <4 x float> %423, i32 1 %426 = extractelement <4 x float> %423, i32 2 %427 = extractelement <4 x float> %423, i32 3 %428 = fmul float %387, %19 %429 = fmul float %393, %23 %430 = fadd float %428, %429 %431 = fmul float %399, %27 %432 = fadd float %430, %431 %433 = fadd float %432, %31 %434 = fmul float %387, %20 %435 = fmul float %393, %24 %436 = fadd float %434, %435 %437 = fmul float %399, %28 %438 = fadd float %436, %437 %439 = fadd float %438, %32 %440 = fsub float -0.000000e+00, %439 %441 = fmul float %387, %21 %442 = fmul float %393, %25 %443 = fadd float %441, %442 %444 = fmul float %399, %29 %445 = fadd float %443, %444 %446 = fadd float %445, %33 %447 = fmul float %446, 2.000000e+00 %448 = fsub float %447, %413 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %415, float %417, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %387, float %393, float %399, float %407) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %367, float %368, float %369, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %424, float %425, float %426, float %427) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %433, float %440, float %448, float %413) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[44:47], s[2:3], 0x4 ; C0960304 s_load_dwordx4 s[48:51], s[2:3], 0x10 ; C0980310 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[14:17], v0, s[12:15], 0 idxen ; E00C2000 80030E00 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[17:20], v0, s[20:23], 0 idxen ; E00C2000 80051100 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[9:12], v0, s[36:39], 0 idxen ; E00C2000 80090900 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[19:22], v0, s[40:43], 0 idxen ; E00C2000 800A1300 buffer_load_format_xyzw v[26:29], v3, s[8:11], 0 idxen ; E00C2000 80021A03 s_buffer_load_dword s23, s[44:47], 0x6 ; C20BAD06 s_buffer_load_dword s22, s[44:47], 0x7 ; C20B2D07 s_buffer_load_dword s37, s[44:47], 0x8 ; C212AD08 s_buffer_load_dword s36, s[44:47], 0x9 ; C2122D09 s_buffer_load_dword s18, s[48:51], 0x51 ; C2093151 s_buffer_load_dword s8, s[48:51], 0x52 ; C2043152 s_buffer_load_dword s38, s[48:51], 0x66 ; C2133166 s_buffer_load_dword s39, s[48:51], 0x67 ; C213B167 s_buffer_load_dword s0, s[48:51], 0xf ; C200310F s_buffer_load_dword s20, s[48:51], 0x4c ; C20A314C s_buffer_load_dword s19, s[48:51], 0x4d ; C209B14D s_buffer_load_dword s15, s[48:51], 0x4e ; C207B14E s_buffer_load_dword s21, s[48:51], 0x50 ; C20AB150 s_buffer_load_dword s4, s[48:51], 0x5 ; C2023105 s_buffer_load_dword s3, s[48:51], 0x6 ; C201B106 s_buffer_load_dword s9, s[48:51], 0x7 ; C204B107 s_buffer_load_dword s1, s[48:51], 0x8 ; C200B108 s_buffer_load_dword s2, s[48:51], 0x9 ; C2013109 s_buffer_load_dword s5, s[48:51], 0x0 ; C202B100 s_buffer_load_dword s6, s[48:51], 0x1 ; C2033101 s_buffer_load_dword s7, s[48:51], 0x2 ; C203B102 s_buffer_load_dword s10, s[48:51], 0x3 ; C2053103 s_buffer_load_dword s16, s[48:51], 0x4 ; C2083104 s_buffer_load_dword s11, s[48:51], 0xa ; C205B10A s_buffer_load_dword s17, s[48:51], 0xb ; C208B10B s_buffer_load_dword s12, s[48:51], 0xc ; C206310C s_buffer_load_dword s13, s[48:51], 0xd ; C206B10D s_buffer_load_dword s14, s[48:51], 0xe ; C207310E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s37 ; 7E000225 v_mad_f32 v3, v2, v7, v1 ; D2820003 04060F02 v_mac_f32_e32 v1, v2, v8 ; 3E021102 v_lshlrev_b32_e32 v2, 2, v9 ; 34041282 v_lshlrev_b32_e32 v7, 2, v10 ; 340E1482 v_lshlrev_b32_e32 v8, 2, v11 ; 34101682 v_add_i32_e32 v2, 8, v2 ; 4A040488 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_add_f32_e32 v12, 0, v27 ; 06183680 v_add_i32_e32 v7, 8, v7 ; 4A0E0E88 v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s38, v2, v26 ; D2820002 046A0426 v_floor_f32_e32 v9, v2 ; 7E124902 v_subrev_f32_e32 v22, v9, v2 ; 0A2C0509 v_mad_f32 v23, s39, v9, v12 ; D2820017 04321227 v_add_i32_e32 v2, 8, v8 ; 4A041088 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_add_f32_e32 v7, 0.5, v7 ; 060E0EF0 v_mad_f32 v7, s38, v7, v26 ; D2820007 046A0E26 v_floor_f32_e32 v8, v7 ; 7E104907 v_subrev_f32_e32 v28, v8, v7 ; 0A380F08 v_mad_f32 v29, s39, v8, v12 ; D282001D 04321027 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s38, v2, v26 ; D2820002 046A0426 v_floor_f32_e32 v7, v2 ; 7E0E4902 v_subrev_f32_e32 v11, v7, v2 ; 0A160507 v_mac_f32_e32 v12, s39, v7 ; 3E180E27 v_mov_b32_e32 v30, v13 ; 7E3C030D v_mov_b32_e32 v24, v13 ; 7E30030D image_sample_l v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[28:35], s[24:27] ; F0900F00 00C71F0B s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[28:35], s[24:27] ; F0900F00 00C7231C image_sample_l v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[28:35], s[24:27] ; F0900F00 00C70716 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v7, v19 ; 10042707 v_mul_f32_e32 v7, v8, v19 ; 100E2708 v_mul_f32_e32 v8, v9, v19 ; 10102709 v_mul_f32_e32 v9, v10, v19 ; 1012270A v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 v_mac_f32_e32 v2, v35, v20 ; 3E042923 v_mac_f32_e32 v7, v36, v20 ; 3E0E2924 v_mac_f32_e32 v8, v37, v20 ; 3E102925 v_mov_b32_e32 v39, v10 ; 7E4E030A v_mov_b32_e32 v40, v11 ; 7E50030B v_mov_b32_e32 v41, v12 ; 7E52030C v_mov_b32_e32 v42, v13 ; 7E54030D v_mov_b32_e32 v43, v10 ; 7E56030A v_mov_b32_e32 v44, v11 ; 7E58030B v_mov_b32_e32 v45, v12 ; 7E5A030C v_mov_b32_e32 v46, v13 ; 7E5C030D v_mac_f32_e32 v9, v38, v20 ; 3E122926 v_mov_b32_e32 v40, v22 ; 7E500316 v_mov_b32_e32 v44, v28 ; 7E58031C v_mac_f32_e32 v2, v31, v21 ; 3E042B1F v_mac_f32_e32 v7, v32, v21 ; 3E0E2B20 v_mac_f32_e32 v8, v33, v21 ; 3E102B21 v_mov_b32_e32 v41, v23 ; 7E520317 v_mac_f32_e32 v9, v34, v21 ; 3E122B22 v_mov_b32_e32 v45, v29 ; 7E5A031D v_mov_b32_e32 v42, v13 ; 7E54030D image_sample_l_o v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[24:27] ; F0D00F00 00C71E0A image_sample_l_o v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[28:35], s[24:27] ; F0D00F00 00C72227 v_mov_b32_e32 v46, v13 ; 7E5C030D image_sample_l_o v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[28:35], s[24:27] ; F0D00F00 00C7262B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, v38, v20 ; 10302926 v_mul_f32_e32 v38, v39, v20 ; 104C2927 v_mul_f32_e32 v39, v40, v20 ; 104E2928 v_mul_f32_e32 v40, v41, v20 ; 10502929 v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 image_sample_l_o v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[24:27] ; F0D00F00 00C7290A v_mov_b32_e32 v11, v28 ; 7E16031C v_mac_f32_e32 v24, v34, v19 ; 3E302722 v_mac_f32_e32 v38, v35, v19 ; 3E4C2723 v_mac_f32_e32 v39, v36, v19 ; 3E4E2724 v_mac_f32_e32 v40, v37, v19 ; 3E502725 v_mov_b32_e32 v12, v29 ; 7E18031D v_mac_f32_e32 v24, v30, v21 ; 3E302B1E v_mac_f32_e32 v38, v31, v21 ; 3E4C2B1F v_mac_f32_e32 v39, v32, v21 ; 3E4E2B20 image_sample_l_o v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[24:27] ; F0D00F00 00C71C0A v_mov_b32_e32 v11, v22 ; 7E160316 v_mac_f32_e32 v40, v33, v21 ; 3E502B21 v_mov_b32_e32 v12, v23 ; 7E180317 image_sample_l_o v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[24:27] ; F0D00F00 00C7200A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v32, v19 ; 10142720 v_mul_f32_e32 v11, v33, v19 ; 10162721 v_mul_f32_e32 v12, v34, v19 ; 10182722 v_mul_f32_e32 v19, v35, v19 ; 10262723 v_mac_f32_e32 v10, v28, v20 ; 3E14291C v_mac_f32_e32 v11, v29, v20 ; 3E16291D v_mac_f32_e32 v12, v30, v20 ; 3E18291E v_mac_f32_e32 v19, v31, v20 ; 3E26291F v_mac_f32_e32 v10, v41, v21 ; 3E142B29 v_mac_f32_e32 v11, v42, v21 ; 3E162B2A v_mac_f32_e32 v12, v43, v21 ; 3E182B2B v_mac_f32_e32 v19, v44, v21 ; 3E262B2C v_mac_f32_e32 v0, s23, v17 ; 3E002217 v_mov_b32_e32 v17, s36 ; 7E220224 v_mac_f32_e32 v17, s22, v18 ; 3E222416 v_mov_b32_e32 v28, v13 ; 7E38030D image_sample_l v18, 4, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[28:35], s[24:27] ; F0900400 00C7121A v_mov_b32_e32 v25, 0x30003 ; 7E3202FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, v18, v14 ; 101C1D12 v_mul_f32_e32 v15, v18, v15 ; 101E1F12 v_mul_f32_e32 v16, v18, v16 ; 10202112 v_mov_b32_e32 v28, v13 ; 7E38030D image_sample_l_o v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[28:35], s[24:27] ; F0D00F00 00C71419 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v1, |v1|, v4 ; D2060101 00020901 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 v_sub_f32_e64 v3, |v3|, v4 ; D2080103 00020903 v_add_f32_e32 v3, v5, v3 ; 06060705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v3, |v3|, v5 ; D2080103 00020B03 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v18, 0, 1.0, vcc ; D2000012 01A9E480 v_sub_f32_e64 v1, |v1|, v18 ; D2080101 00022501 v_mul_f32_e32 v25, v6, v3 ; 10320706 v_mad_f32 v3, -v3, v6, 1.0 ; D2820003 23CA0D03 v_mad_f32 v3, -v1, v6, v3 ; D2820003 240E0D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v25, v25 ; 100C3319 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D exp 15, 33, 0, 0, 0, v0, v17, v13, v13 ; F800021F 0D0D1100 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mad_f32 v0, -2.0, v5, 1.0 ; D2820000 03CA0AF5 v_mul_f32_e32 v5, v6, v25 ; 100A3306 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v5, -2.0, v18, 1.0 ; D2820005 03CA24F5 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mul_f32_e32 v5, v7, v1 ; 100A0307 v_mac_f32_e32 v5, v2, v0 ; 3E0A0102 v_mul_f32_e32 v7, v7, v15 ; 100E1F07 v_mac_f32_e32 v7, v2, v14 ; 3E0E1D02 v_mul_f32_e32 v2, v38, v1 ; 10040326 v_mac_f32_e32 v2, v24, v0 ; 3E040118 v_mul_f32_e32 v17, v38, v15 ; 10221F26 v_mac_f32_e32 v17, v24, v14 ; 3E221D18 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mul_f32_e32 v11, v11, v15 ; 10161F0B v_mac_f32_e32 v1, v10, v0 ; 3E02010A v_mac_f32_e32 v11, v10, v14 ; 3E161D0A v_mul_f32_e32 v0, v6, v3 ; 10000706 v_mad_f32 v3, -2.0, v4, 1.0 ; D2820003 03CA08F5 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mac_f32_e32 v5, v8, v0 ; 3E0A0108 v_mac_f32_e32 v7, v8, v16 ; 3E0E2108 v_mac_f32_e32 v2, v39, v0 ; 3E040127 v_mac_f32_e32 v17, v39, v16 ; 3E222127 v_mac_f32_e32 v1, v12, v0 ; 3E02010C v_mac_f32_e32 v11, v12, v16 ; 3E16210C v_add_f32_e32 v0, v9, v7 ; 06000F09 v_subrev_f32_e32 v3, s20, v0 ; 0A060014 v_mul_f32_e32 v3, s21, v3 ; 10060615 v_add_f32_e32 v4, v40, v17 ; 06082328 v_subrev_f32_e32 v6, s19, v4 ; 0A0C0813 v_mac_f32_e32 v3, s18, v6 ; 3E060C12 v_mac_f32_e32 v5, 0, v9 ; 3E0A1280 v_mac_f32_e32 v2, 0, v40 ; 3E045080 v_mac_f32_e32 v1, 0, v19 ; 3E022680 v_add_f32_e32 v6, v19, v11 ; 060C1713 v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mac_f32_e32 v7, v2, v2 ; 3E0E0502 v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_subrev_f32_e32 v8, s15, v6 ; 0A100C0F v_mac_f32_e32 v3, s8, v8 ; 3E061008 exp 15, 34, 0, 0, 0, v0, v4, v6, v3 ; F800022F 03060400 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v7, v5 ; 10060B07 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v1, v7, v1 ; 10020307 exp 15, 35, 0, 0, 0, v3, v2, v1, v13 ; F800023F 0D010203 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s9, v4 ; 10020809 v_mul_f32_e32 v2, s16, v4 ; 10040810 v_mul_f32_e32 v3, s4, v4 ; 10060804 v_mul_f32_e32 v4, s3, v4 ; 10080803 v_mac_f32_e32 v1, s10, v0 ; 3E02000A v_mac_f32_e32 v2, s5, v0 ; 3E040005 v_mac_f32_e32 v3, s6, v0 ; 3E060006 v_mac_f32_e32 v4, s7, v0 ; 3E080007 v_mac_f32_e32 v1, s17, v6 ; 3E020C11 v_mac_f32_e32 v2, s1, v6 ; 3E040C01 v_mac_f32_e32 v3, s2, v6 ; 3E060C02 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_add_f32_e32 v0, s0, v1 ; 06000200 v_add_f32_e32 v1, s12, v2 ; 0602040C v_add_f32_e32 v2, s13, v3 ; 0604060D v_add_f32_e32 v3, s14, v4 ; 0606080E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v20, v21, v22, v23 ; F800024F 17161514 exp 15, 37, 0, 0, 0, v13, v13, v13, v13 ; F800025F 0D0D0D0D exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 48 Code Size: 1280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 5: RSQ TEMP[2].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 7: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 8: MOV TEMP[3].w, IMM[1].yyyy 9: MOV TEMP[3].x, TEMP[2].xxxx 10: MOV TEMP[3].y, TEMP[2].yyyy 11: MOV TEMP[3].z, TEMP[2].zzzz 12: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 13: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 14: KILL_IF -TEMP[0].xxxx 15: MOV TEMP[0].w, IMM[1].yyyy 16: MOV TEMP[0].x, TEMP[1].xxxx 17: MOV TEMP[0].y, TEMP[1].yyyy 18: MOV TEMP[0].z, TEMP[1].zzzz 19: MOV OUT[2], IN[1].wwww 20: MOV OUT[0], TEMP[0] 21: MOV OUT[3], TEMP[3] 22: MOV OUT[1], IMM[1].yyyy 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %41 = bitcast float %32 to i32 %42 = bitcast float %33 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %29, <16 x i8> %31, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %25, %46 %51 = fmul float %26, %47 %52 = fmul float %27, %48 %53 = fmul float %50, %38 %54 = fmul float %51, %39 %55 = fmul float %52, %40 %56 = fmul float %35, %35 %57 = fmul float %36, %36 %58 = fadd float %57, %56 %59 = fmul float %37, %37 %60 = fadd float %58, %59 %61 = call float @llvm.AMDGPU.rsq.clamped.f32(float %60) %62 = fmul float %35, %61 %63 = fmul float %36, %61 %64 = fmul float %37, %61 %65 = fmul float %62, 5.000000e-01 %66 = fadd float %65, 5.000000e-01 %67 = fmul float %63, 5.000000e-01 %68 = fadd float %67, 5.000000e-01 %69 = fmul float %64, 5.000000e-01 %70 = fadd float %69, 5.000000e-01 %71 = fcmp olt float %49, %24 %72 = select i1 %71, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %72) %73 = call i32 @llvm.SI.packf16(float %53, float %54) %74 = bitcast i32 %73 to float %75 = call i32 @llvm.SI.packf16(float %55, float 0.000000e+00) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %74, float %76, float %74, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %78, float %80, float %78, float %80) %81 = call i32 @llvm.SI.packf16(float %66, float %68) %82 = bitcast i32 %81 to float %83 = call i32 @llvm.SI.packf16(float %70, float 0.000000e+00) %84 = bitcast i32 %83 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %34, float %34, float %34, float %34) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %82, float %84, float %82, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s9, v10 ; 10021409 v_mul_f32_e32 v2, s10, v11 ; 1004160A v_mul_f32_e32 v3, s0, v12 ; 10061800 v_cmp_gt_f32_e32 vcc, s8, v13 ; 7C081A08 v_mul_f32_e32 v10, v5, v5 ; 10140B05 v_mac_f32_e32 v10, v6, v6 ; 3E140D06 v_mac_f32_e32 v10, v7, v7 ; 3E140F07 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v3, v10, v5 ; 10060B0A v_mul_f32_e32 v5, v10, v6 ; 100A0D0A v_mul_f32_e32 v6, v10, v7 ; 100C0F0A v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 1, 1, 0, 0, v0, v0, v0, v0 ; F800041F 00000000 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[4].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].xy, TEMP[3].xyyy 15: MOV TEMP[0].w, IMM[0].xxxx 16: TXL TEMP[0], TEMP[0], SAMP[0], 2D 17: MOV TEMP[2].xy, TEMP[3].xyyy 18: MOV TEMP[2].w, IMM[0].xxxx 19: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 20: MOV TEMP[3].xy, TEMP[3].xyyy 21: MOV TEMP[3].w, IMM[0].xxxx 22: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 23: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 24: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 25: AND TEMP[5], TEMP[5], IMM[3].xxxx 26: ABS TEMP[4], TEMP[4] 27: ADD TEMP[4], TEMP[4], -TEMP[5] 28: ADD TEMP[4], TEMP[4], IMM[3].yyyy 29: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 30: AND TEMP[6], TEMP[6], IMM[3].xxxx 31: ABS TEMP[4], TEMP[4] 32: ADD TEMP[4], TEMP[4], -TEMP[6] 33: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 34: MOV TEMP[7].x, TEMP[4].xxxx 35: MOV TEMP[7].y, TEMP[4].yyyy 36: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 37: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 38: MOV TEMP[7].z, TEMP[4].xxxx 39: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 40: RSQ TEMP[4].x, TEMP[4].xxxx 41: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 42: MUL TEMP[6], TEMP[6], IMM[3].wwww 43: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 45: MOV TEMP[7].w, IMM[0].xxxx 46: MOV TEMP[7].x, TEMP[6].xxxx 47: MOV TEMP[7].y, TEMP[6].yyyy 48: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 49: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 50: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 51: MOV TEMP[7].z, TEMP[4].xxxx 52: DP4 TEMP[4].x, TEMP[7], TEMP[0] 53: DP4 TEMP[5].x, TEMP[7], TEMP[2] 54: MOV TEMP[4].y, TEMP[5].xxxx 55: DP4 TEMP[5].x, TEMP[7], TEMP[3] 56: MOV TEMP[4].z, TEMP[5].xxxx 57: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 58: RSQ TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 60: MOV TEMP[5].xy, IN[4].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 63: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 64: MOV TEMP[6].w, IMM[3].xxxx 65: MOV TEMP[6].x, TEMP[5].xxxx 66: MOV TEMP[6].y, TEMP[5].yyyy 67: MOV TEMP[6].z, TEMP[5].zzzz 68: DP4 TEMP[0].x, TEMP[6], TEMP[0] 69: DP4 TEMP[2].x, TEMP[6], TEMP[2] 70: DP4 TEMP[3].x, TEMP[6], TEMP[3] 71: MOV TEMP[5].x, TEMP[0].xxxx 72: MOV TEMP[5].y, TEMP[2].xxxx 73: MOV TEMP[5].z, TEMP[3].xxxx 74: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 75: MOV TEMP[6].x, TEMP[0].xxxx 76: MOV TEMP[6].y, TEMP[2].xxxx 77: MOV TEMP[6].z, TEMP[3].xxxx 78: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 79: MOV TEMP[6].w, TEMP[7].xxxx 80: MOV TEMP[7].x, TEMP[4].xxxx 81: MOV TEMP[7].y, TEMP[4].yyyy 82: MOV TEMP[7].z, TEMP[4].zzzz 83: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 84: RSQ TEMP[8].x, TEMP[8].xxxx 85: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 86: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 87: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 88: MUL TEMP[4].xyz, IMM[3].wwww, TEMP[4].xyzz 89: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 90: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 91: MOV_SAT TEMP[4].x, TEMP[4].xxxx 92: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 93: MOV_SAT TEMP[4].x, TEMP[4].xxxx 94: MOV TEMP[7].w, TEMP[4].xxxx 95: MOV TEMP[4].w, IMM[3].xxxx 96: MOV TEMP[4].x, TEMP[0].xxxx 97: MOV TEMP[4].y, TEMP[2].xxxx 98: MOV TEMP[4].z, TEMP[3].xxxx 99: MOV TEMP[0].x, CONST[4][0].wwww 100: MOV TEMP[0].y, CONST[4][1].wwww 101: MOV TEMP[0].z, CONST[4][2].wwww 102: MOV TEMP[0].w, CONST[4][3].wwww 103: DP4 TEMP[0].x, TEMP[4], TEMP[0] 104: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 105: MOV TEMP[3].xy, IN[4].xyyy 106: MOV TEMP[3].w, IMM[0].xxxx 107: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 108: MOV TEMP[5].x, CONST[4][0].xxxx 109: MOV TEMP[5].y, CONST[4][1].xxxx 110: MOV TEMP[5].z, CONST[4][2].xxxx 111: MOV TEMP[5].w, CONST[4][3].xxxx 112: DP4 TEMP[5].x, TEMP[4], TEMP[5] 113: MOV TEMP[8].x, CONST[4][0].yyyy 114: MOV TEMP[8].y, CONST[4][1].yyyy 115: MOV TEMP[8].z, CONST[4][2].yyyy 116: MOV TEMP[8].w, CONST[4][3].yyyy 117: DP4 TEMP[8].x, TEMP[4], TEMP[8] 118: MOV TEMP[5].y, -TEMP[8].xxxx 119: MOV TEMP[8].x, CONST[4][0].zzzz 120: MOV TEMP[8].y, CONST[4][1].zzzz 121: MOV TEMP[8].z, CONST[4][2].zzzz 122: MOV TEMP[8].w, CONST[4][3].zzzz 123: DP4 TEMP[4].x, TEMP[4], TEMP[8] 124: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 125: MOV TEMP[5].z, TEMP[4].xxxx 126: MOV TEMP[5].w, TEMP[0].xxxx 127: MOV OUT[1], TEMP[1] 128: MOV OUT[2].xy, TEMP[2].xyxx 129: MOV OUT[4], TEMP[7] 130: MOV OUT[6], IMM[0].xxxx 131: MOV OUT[5], TEMP[3] 132: MOV OUT[0], TEMP[5] 133: MOV OUT[3], TEMP[6] 134: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %10, %6 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = bitcast float %77 to i32 %85 = shl i32 %84, 2 %86 = add i32 %85, 8 %87 = sitofp i32 %86 to float %88 = fadd float %87, 5.000000e-01 %89 = fmul float %88, %41 %90 = fadd float %89, %82 %91 = fadd float %83, 0.000000e+00 %92 = call float @floor(float %90) %93 = fsub float %90, %92 %94 = fmul float %92, %42 %95 = fadd float %94, %91 %96 = bitcast float %93 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <4 x i32> undef, i32 %96, i32 0 %99 = insertelement <4 x i32> %98, i32 %97, i32 1 %100 = insertelement <4 x i32> %99, i32 0, i32 2 %101 = bitcast <8 x i32> %49 to <32 x i8> %102 = bitcast <4 x i32> %51 to <16 x i8> %103 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = extractelement <4 x float> %103, i32 3 %108 = bitcast float %93 to i32 %109 = bitcast float %95 to i32 %110 = insertelement <4 x i32> , i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 %109, i32 2 %112 = insertelement <4 x i32> %111, i32 0, i32 3 %113 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %112, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %93 to i32 %119 = bitcast float %95 to i32 %120 = insertelement <4 x i32> , i32 %118, i32 1 %121 = insertelement <4 x i32> %120, i32 %119, i32 2 %122 = insertelement <4 x i32> %121, i32 0, i32 3 %123 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %122, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = extractelement <4 x float> %123, i32 3 %128 = fmul float %63, 2.550000e+02 %129 = fadd float %128, -1.280000e+02 %130 = fmul float %64, 2.550000e+02 %131 = fadd float %130, -1.280000e+02 %132 = fmul float %65, 2.550000e+02 %133 = fadd float %132, -1.280000e+02 %134 = fmul float %66, 2.550000e+02 %135 = fadd float %134, -1.280000e+02 %136 = fcmp olt float %129, 0.000000e+00 %137 = fcmp olt float %131, 0.000000e+00 %138 = fcmp olt float %133, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = select i1 %136, float 1.000000e+00, float 0.000000e+00 %141 = call float @fabs(float %129) %142 = call float @fabs(float %131) %143 = call float @fabs(float %133) %144 = call float @fabs(float %135) %145 = fsub float %141, %140 %146 = select i1 %137, float -1.000000e+00, float -0.000000e+00 %147 = fadd float %142, %146 %148 = select i1 %138, float -1.000000e+00, float -0.000000e+00 %149 = fadd float %143, %148 %150 = select i1 %139, float -1.000000e+00, float -0.000000e+00 %151 = fadd float %144, %150 %152 = fadd float %145, -6.400000e+01 %153 = fadd float %147, -6.400000e+01 %154 = fadd float %149, -6.400000e+01 %155 = fadd float %151, -6.400000e+01 %156 = fcmp olt float %152, 0.000000e+00 %157 = fcmp olt float %153, 0.000000e+00 %158 = select i1 %156, float 1.000000e+00, float 0.000000e+00 %159 = select i1 %157, float 1.000000e+00, float 0.000000e+00 %160 = call float @fabs(float %152) %161 = call float @fabs(float %153) %162 = call float @fabs(float %154) %163 = call float @fabs(float %155) %164 = fsub float %160, %158 %165 = fsub float %161, %159 %166 = fmul float %164, 0x3F90410420000000 %167 = fmul float %165, 0x3F90410420000000 %168 = fsub float 1.000000e+00, %166 %169 = fsub float %168, %167 %170 = fmul float %166, %166 %171 = fmul float %167, %167 %172 = fadd float %171, %170 %173 = fmul float %169, %169 %174 = fadd float %172, %173 %175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174) %176 = fmul float %166, %175 %177 = fmul float %167, %175 %178 = fmul float %169, %175 %179 = fmul float %158, 2.000000e+00 %180 = fmul float %159, 2.000000e+00 %181 = fsub float 1.000000e+00, %179 %182 = fsub float 1.000000e+00, %180 %183 = fmul float %176, %181 %184 = fmul float %177, %182 %185 = fmul float %140, 2.000000e+00 %186 = fsub float 1.000000e+00, %185 %187 = fmul float %186, %178 %188 = fmul float %183, %104 %189 = fmul float %184, %105 %190 = fadd float %188, %189 %191 = fmul float %187, %106 %192 = fadd float %190, %191 %193 = fmul float %107, 0.000000e+00 %194 = fadd float %192, %193 %195 = fmul float %183, %114 %196 = fmul float %184, %115 %197 = fadd float %195, %196 %198 = fmul float %187, %116 %199 = fadd float %197, %198 %200 = fmul float %117, 0.000000e+00 %201 = fadd float %199, %200 %202 = fmul float %183, %124 %203 = fmul float %184, %125 %204 = fadd float %202, %203 %205 = fmul float %187, %126 %206 = fadd float %204, %205 %207 = fmul float %127, 0.000000e+00 %208 = fadd float %206, %207 %209 = fmul float %194, %194 %210 = fmul float %201, %201 %211 = fadd float %210, %209 %212 = fmul float %208, %208 %213 = fadd float %211, %212 %214 = call float @llvm.AMDGPU.rsq.clamped.f32(float %213) %215 = fmul float %194, %214 %216 = fmul float %201, %214 %217 = fmul float %208, %214 %218 = bitcast float %82 to i32 %219 = bitcast float %83 to i32 %220 = insertelement <4 x i32> undef, i32 %218, i32 0 %221 = insertelement <4 x i32> %220, i32 %219, i32 1 %222 = insertelement <4 x i32> %221, i32 0, i32 2 %223 = bitcast <8 x i32> %49 to <32 x i8> %224 = bitcast <4 x i32> %51 to <16 x i8> %225 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %222, <32 x i8> %223, <16 x i8> %224, i32 2) %226 = extractelement <4 x float> %225, i32 2 %227 = fmul float %56, %226 %228 = fmul float %57, %226 %229 = fmul float %58, %226 %230 = fmul float %227, %104 %231 = fmul float %228, %105 %232 = fadd float %230, %231 %233 = fmul float %229, %106 %234 = fadd float %232, %233 %235 = fadd float %234, %107 %236 = fmul float %227, %114 %237 = fmul float %228, %115 %238 = fadd float %236, %237 %239 = fmul float %229, %116 %240 = fadd float %238, %239 %241 = fadd float %240, %117 %242 = fmul float %227, %124 %243 = fmul float %228, %125 %244 = fadd float %242, %243 %245 = fmul float %229, %126 %246 = fadd float %244, %245 %247 = fadd float %246, %127 %248 = fsub float %235, %35 %249 = fsub float %241, %36 %250 = fsub float %247, %37 %251 = fmul float %38, %248 %252 = fmul float %39, %249 %253 = fadd float %252, %251 %254 = fmul float %40, %250 %255 = fadd float %253, %254 %256 = fmul float %248, %248 %257 = fmul float %249, %249 %258 = fadd float %257, %256 %259 = fmul float %250, %250 %260 = fadd float %258, %259 %261 = call float @llvm.AMDGPU.rsq.clamped.f32(float %260) %262 = fmul float %248, %261 %263 = fmul float %249, %261 %264 = fmul float %250, %261 %265 = fmul float %215, %45 %266 = fmul float %216, %46 %267 = fadd float %266, %265 %268 = fmul float %217, %47 %269 = fadd float %267, %268 %270 = fmul float %269, %215 %271 = fmul float %269, %216 %272 = fmul float %269, %217 %273 = fmul float %270, 2.000000e+00 %274 = fmul float %271, 2.000000e+00 %275 = fmul float %272, 2.000000e+00 %276 = fsub float %45, %273 %277 = fsub float %46, %274 %278 = fsub float %47, %275 %279 = fmul float %262, %276 %280 = fsub float -0.000000e+00, %279 %281 = fmul float %263, %277 %282 = fsub float %280, %281 %283 = fmul float %264, %278 %284 = fsub float %282, %283 %285 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %286 = call float @llvm.pow.f32(float %285, float 1.600000e+01) %287 = call float @llvm.AMDIL.clamp.(float %286, float 0.000000e+00, float 1.000000e+00) %288 = fmul float %235, %22 %289 = fmul float %241, %26 %290 = fadd float %288, %289 %291 = fmul float %247, %30 %292 = fadd float %290, %291 %293 = fadd float %292, %34 %294 = fmul float %71, %13 %295 = fadd float %294, %15 %296 = fmul float %72, %14 %297 = fadd float %296, %16 %298 = bitcast float %82 to i32 %299 = bitcast float %83 to i32 %300 = insertelement <4 x i32> , i32 %298, i32 1 %301 = insertelement <4 x i32> %300, i32 %299, i32 2 %302 = insertelement <4 x i32> %301, i32 0, i32 3 %303 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %302, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %304 = extractelement <4 x float> %303, i32 0 %305 = extractelement <4 x float> %303, i32 1 %306 = extractelement <4 x float> %303, i32 2 %307 = extractelement <4 x float> %303, i32 3 %308 = fmul float %235, %19 %309 = fmul float %241, %23 %310 = fadd float %308, %309 %311 = fmul float %247, %27 %312 = fadd float %310, %311 %313 = fadd float %312, %31 %314 = fmul float %235, %20 %315 = fmul float %241, %24 %316 = fadd float %314, %315 %317 = fmul float %247, %28 %318 = fadd float %316, %317 %319 = fadd float %318, %32 %320 = fsub float -0.000000e+00, %319 %321 = fmul float %235, %21 %322 = fmul float %241, %25 %323 = fadd float %321, %322 %324 = fmul float %247, %29 %325 = fadd float %323, %324 %326 = fadd float %325, %33 %327 = fmul float %326, 2.000000e+00 %328 = fsub float %327, %293 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %295, float %297, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %241, float %247, float %255) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %215, float %216, float %217, float %287) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %304, float %305, float %306, float %307) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %313, float %320, float %328, float %293) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[14:17], v0, s[12:15], 0 idxen ; E00C2000 80030E00 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[9:12], v0, s[20:23], 0 idxen ; E00C2000 80050900 buffer_load_format_xyzw v[17:20], v0, s[24:27], 0 idxen ; E00C2000 80061100 buffer_load_format_xyzw v[22:25], v3, s[8:11], 0 idxen ; E00C2000 80021603 s_buffer_load_dword s25, s[40:43], 0x6 ; C20CA906 s_buffer_load_dword s26, s[40:43], 0x7 ; C20D2907 s_buffer_load_dword s13, s[40:43], 0x8 ; C206A908 s_buffer_load_dword s14, s[40:43], 0x9 ; C2072909 s_buffer_load_dword s5, s[44:47], 0x51 ; C202AD51 s_buffer_load_dword s6, s[44:47], 0x52 ; C2032D52 s_buffer_load_dword s27, s[44:47], 0x66 ; C20DAD66 s_buffer_load_dword s40, s[44:47], 0x67 ; C2142D67 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s22, s[44:47], 0x4d ; C20B2D4D s_buffer_load_dword s23, s[44:47], 0x4e ; C20BAD4E s_buffer_load_dword s18, s[44:47], 0x50 ; C2092D50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s3, s[44:47], 0x5 ; C201AD05 s_buffer_load_dword s4, s[44:47], 0x6 ; C2022D06 s_buffer_load_dword s10, s[44:47], 0x7 ; C2052D07 s_buffer_load_dword s2, s[44:47], 0x8 ; C2012D08 s_buffer_load_dword s1, s[44:47], 0x9 ; C200AD09 s_buffer_load_dword s7, s[44:47], 0x0 ; C203AD00 s_buffer_load_dword s8, s[44:47], 0x1 ; C2042D01 s_buffer_load_dword s9, s[44:47], 0x2 ; C204AD02 s_buffer_load_dword s11, s[44:47], 0x3 ; C205AD03 s_buffer_load_dword s16, s[44:47], 0x4 ; C2082D04 s_buffer_load_dword s12, s[44:47], 0xa ; C2062D0A s_buffer_load_dword s17, s[44:47], 0xb ; C208AD0B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s13 ; 7E00020D s_buffer_load_dword s13, s[44:47], 0xc ; C206AD0C v_mov_b32_e32 v3, s14 ; 7E06020E s_buffer_load_dword s14, s[44:47], 0xd ; C2072D0D s_buffer_load_dword s15, s[44:47], 0xe ; C207AD0E v_mad_f32 v7, v2, v7, v1 ; D2820007 04060F02 v_mac_f32_e32 v1, v2, v8 ; 3E021102 v_lshlrev_b32_e32 v2, 2, v17 ; 34042282 v_mac_f32_e32 v0, s25, v9 ; 3E001219 v_mac_f32_e32 v3, s26, v10 ; 3E06141A v_add_i32_e32 v2, 8, v2 ; 4A040488 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v1, |v1|, v4 ; D2060101 00020901 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 v_sub_f32_e64 v7, |v7|, v4 ; D2080107 00020907 v_add_f32_e32 v7, v5, v7 ; 060E0F05 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v7, |v7|, v5 ; D2080107 00020B07 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480 v_sub_f32_e64 v1, |v1|, v8 ; D2080101 00021101 v_mul_f32_e32 v9, v6, v7 ; 10120F06 v_mad_f32 v7, -v7, v6, 1.0 ; D2820007 23CA0D07 v_mad_f32 v7, -v1, v6, v7 ; D2820007 241E0D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s27, v2, v22 ; D2820002 045A041B v_add_f32_e32 v12, 0, v23 ; 06182E80 v_floor_f32_e32 v6, v2 ; 7E0C4902 v_subrev_f32_e32 v11, v6, v2 ; 0A160506 v_mac_f32_e32 v12, s40, v6 ; 3E180C28 v_mul_f32_e32 v2, v9, v9 ; 10041309 v_mac_f32_e32 v2, v1, v1 ; 3E040301 image_sample_l v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[32:39], s[28:31] ; F0900F00 00E8110B v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 v_mac_f32_e32 v2, v7, v7 ; 3E040F07 image_sample_l_o v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0D00F00 00E8190A v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mov_b32_e32 v24, v13 ; 7E30030D image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0D00F00 00E81D0A s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample_l v6, 4, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[32:39], s[28:31] ; F0900400 00E80616 v_mov_b32_e32 v21, 0x30003 ; 7E2A02FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v6, v14 ; 10141D06 v_mul_f32_e32 v11, v6, v15 ; 10161F06 v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_mov_b32_e32 v24, v13 ; 7E30030D image_sample_l_o v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[32:39], s[28:31] ; F0D00F00 00E81515 v_mul_f32_e32 v9, v2, v9 ; 10121302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v2, v2, v7 ; 10040F02 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v7, -2.0, v8, 1.0 ; D2820007 03CA10F5 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mad_f32 v4, -2.0, v4, 1.0 ; D2820004 03CA08F5 v_mul_f32_e32 v2, v2, v4 ; 10040902 v_mul_f32_e32 v4, v18, v11 ; 10081712 v_mac_f32_e32 v4, v17, v10 ; 3E081511 v_mac_f32_e32 v4, v19, v6 ; 3E080D13 v_add_f32_e32 v4, v20, v4 ; 06080914 v_mul_f32_e32 v7, v18, v1 ; 100E0312 v_mac_f32_e32 v7, v17, v5 ; 3E0E0B11 v_mac_f32_e32 v7, v19, v2 ; 3E0E0513 v_mac_f32_e32 v7, 0, v20 ; 3E0E2880 v_mul_f32_e32 v8, v26, v11 ; 1010171A v_mac_f32_e32 v8, v25, v10 ; 3E101519 v_mac_f32_e32 v8, v27, v6 ; 3E100D1B v_add_f32_e32 v8, v28, v8 ; 0610111C v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mac_f32_e32 v9, v25, v5 ; 3E120B19 v_mac_f32_e32 v9, v27, v2 ; 3E12051B v_mac_f32_e32 v9, 0, v28 ; 3E123880 v_mul_f32_e32 v11, v30, v11 ; 1016171E v_mac_f32_e32 v11, v29, v10 ; 3E16151D v_mac_f32_e32 v11, v31, v6 ; 3E160D1F v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mac_f32_e32 v1, v29, v5 ; 3E020B1D v_mac_f32_e32 v1, v31, v2 ; 3E02051F v_mac_f32_e32 v1, 0, v32 ; 3E024080 v_mul_f32_e32 v2, v7, v7 ; 10040F07 v_mac_f32_e32 v2, v9, v9 ; 3E041309 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_add_f32_e32 v5, v32, v11 ; 060A1720 exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D exp 15, 33, 0, 0, 0, v0, v3, v13, v13 ; F800021F 0D0D0300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v2, v7 ; 10000F02 v_mul_f32_e32 v3, v2, v9 ; 10061302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_subrev_f32_e32 v2, s24, v4 ; 0A040818 v_subrev_f32_e32 v6, s22, v8 ; 0A0C1016 v_subrev_f32_e32 v7, s23, v5 ; 0A0E0A17 v_mul_f32_e32 v9, v2, v2 ; 10120502 v_mac_f32_e32 v9, v6, v6 ; 3E120D06 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v10, s20, v3 ; 3E140614 v_mac_f32_e32 v10, s19, v1 ; 3E140213 v_mul_f32_e32 v11, v0, v10 ; 10161500 v_mad_f32 v11, -2.0, v11, s21 ; D282000B 005616F5 v_mul_f32_e32 v12, v9, v2 ; 10180509 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v3, v10 ; 10181503 v_mad_f32 v12, -2.0, v12, s20 ; D282000C 005218F5 v_mul_f32_e32 v14, v9, v6 ; 101C0D09 v_mad_f32 v11, -v14, v12, -v11 ; D282000B A42E190E v_mul_f32_e32 v10, v1, v10 ; 10141501 v_mad_f32 v10, -2.0, v10, s19 ; D282000A 004E14F5 v_mul_f32_e32 v9, v9, v7 ; 10120F09 v_mad_f32 v9, -v9, v10, v11 ; D2820009 242E1509 v_mul_f32_e32 v2, s18, v2 ; 10040412 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v2, s5, v6 ; 3E040C05 v_mac_f32_e32 v2, s6, v7 ; 3E040E06 exp 15, 34, 0, 0, 0, v4, v8, v5, v2 ; F800022F 02050804 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v2, 0x41800000, v9 ; 0E0412FF 41800000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 exp 15, 35, 0, 0, 0, v0, v3, v1, v2 ; F800023F 02010300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v8 ; 1000100A v_mul_f32_e32 v1, s16, v8 ; 10021010 v_mul_f32_e32 v2, s3, v8 ; 10041003 v_mul_f32_e32 v3, s4, v8 ; 10061004 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mac_f32_e32 v1, s7, v4 ; 3E020807 v_mac_f32_e32 v2, s8, v4 ; 3E040808 v_mac_f32_e32 v3, s9, v4 ; 3E060809 v_mac_f32_e32 v0, s17, v5 ; 3E000A11 v_mac_f32_e32 v1, s2, v5 ; 3E020A02 v_mac_f32_e32 v2, s1, v5 ; 3E040A01 v_mac_f32_e32 v3, s12, v5 ; 3E060A0C v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v2, s14, v2 ; 0604040E v_add_f32_e32 v3, s15, v3 ; 0606060F v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v21, v22, v23, v24 ; F800024F 18171615 exp 15, 37, 0, 0, 0, v13, v13, v13, v13 ; F800025F 0D0D0D0D exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 36 Code Size: 1040 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0].xyz, TEMP[1], IN[3] 5: MOV TEMP[1].xy, IN[0].xyyy 6: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 7: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 10: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 11: MOV TEMP[3].w, IMM[1].yyyy 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MOV TEMP[3].z, TEMP[2].zzzz 15: MOV TEMP[2].w, IMM[1].yyyy 16: MOV TEMP[2].x, TEMP[0].xxxx 17: MOV TEMP[2].y, TEMP[0].yyyy 18: MOV TEMP[2].z, TEMP[0].zzzz 19: MOV TEMP[0].w, IMM[1].yyyy 20: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 23: MOV TEMP[0].z, TEMP[1].xxxx 24: MOV OUT[2], IN[1].wwww 25: MOV OUT[0], TEMP[2] 26: MOV OUT[3], TEMP[3] 27: MOV OUT[1], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %30, <16 x i8> %32, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = fmul float %24, %53 %57 = fmul float %25, %54 %58 = fmul float %26, %55 %59 = fmul float %56, %45 %60 = fmul float %57, %46 %61 = fmul float %58, %47 %62 = bitcast float %39 to i32 %63 = bitcast float %40 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %35, <16 x i8> %38, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = fmul float %42, %42 %71 = fmul float %43, %43 %72 = fadd float %71, %70 %73 = fmul float %44, %44 %74 = fadd float %72, %73 %75 = call float @llvm.AMDGPU.rsq.clamped.f32(float %74) %76 = fmul float %42, %75 %77 = fmul float %43, %75 %78 = fmul float %44, %75 %79 = fmul float %76, 5.000000e-01 %80 = fadd float %79, 5.000000e-01 %81 = fmul float %77, 5.000000e-01 %82 = fadd float %81, 5.000000e-01 %83 = fmul float %78, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %28, %67 %86 = fmul float %27, %69 %87 = call i32 @llvm.SI.packf16(float %59, float %60) %88 = bitcast i32 %87 to float %89 = call i32 @llvm.SI.packf16(float %61, float 0.000000e+00) %90 = bitcast i32 %89 to float %91 = call i32 @llvm.SI.packf16(float %85, float %68) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %86, float 0.000000e+00) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %88, float %90, float %88, float %90) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %92, float %94, float %92, float %94) %95 = call i32 @llvm.SI.packf16(float %80, float %82) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %84, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %41, float %41, float %41, float %41) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %96, float %98, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800700 00650A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[16:19] ; F0800700 00870102 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v10, s8, v10 ; 10141408 v_mul_f32_e32 v11, s9, v11 ; 10161609 v_mul_f32_e32 v12, s10, v12 ; 1018180A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, s11, v3 ; 1004060B v_mul_f32_e32 v3, v8, v10 ; 10061508 v_mul_f32_e32 v8, v9, v11 ; 10101709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_cvt_pkrtz_f16_f32_e32 v3, v3, v8 ; 5E061103 v_mul_f32_e32 v8, v5, v5 ; 10100B05 v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v3, v0, v3, v0 ; F800040F 00030003 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v2, 0 ; D25E0000 00010102 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v8, v5 ; 10000B08 v_mul_f32_e32 v1, v8, v6 ; 10020D08 v_mul_f32_e32 v2, v8, v7 ; 10040F08 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 32, 16, 48} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} IMM[2] UINT32 {44, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, IN[3].xyzz, CONST[1][1].xyzz 3: MOV TEMP[2].xy, IN[0].xyyy 4: TEX TEMP[2].y, TEMP[2], SAMP[1], 2D 5: LRP TEMP[1].xyz, TEMP[2].yyyy, TEMP[1].xyzz, CONST[1][2].xyzz 6: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 7: MOV TEMP[1].xy, IN[0].xyyy 8: TEX TEMP[1].xyz, TEMP[1], SAMP[2], 2D 9: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 10: RSQ TEMP[2].x, TEMP[2].xxxx 11: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 12: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 13: MOV TEMP[3].w, IMM[1].yyyy 14: MOV TEMP[3].x, TEMP[2].xxxx 15: MOV TEMP[3].y, TEMP[2].yyyy 16: MOV TEMP[3].z, TEMP[2].zzzz 17: MOV TEMP[2].w, IMM[1].yyyy 18: MOV TEMP[2].x, TEMP[0].xxxx 19: MOV TEMP[2].y, TEMP[0].yyyy 20: MOV TEMP[2].z, TEMP[0].zzzz 21: MOV TEMP[0].w, IMM[1].yyyy 22: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 23: MOV TEMP[0].y, TEMP[1].yyyy 24: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 25: MOV TEMP[0].z, TEMP[1].xxxx 26: MOV OUT[2], IN[1].wwww 27: MOV OUT[0], TEMP[2] 28: MOV OUT[3], TEMP[3] 29: MOV OUT[1], TEMP[0] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0 %34 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %37 = bitcast <8 x i32> addrspace(2)* %36 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %40 = bitcast <4 x i32> addrspace(2)* %39 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %57 = bitcast float %48 to i32 %58 = bitcast float %49 to i32 %59 = insertelement <2 x i32> undef, i32 %57, i32 0 %60 = insertelement <2 x i32> %59, i32 %58, i32 1 %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %33, <16 x i8> %35, i32 2) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = fmul float %54, %24 %66 = fmul float %55, %25 %67 = fmul float %56, %26 %68 = bitcast float %48 to i32 %69 = bitcast float %49 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %38, <16 x i8> %41, i32 2) %73 = extractelement <4 x float> %72, i32 1 %74 = call float @llvm.AMDGPU.lrp(float %73, float %65, float %27) %75 = call float @llvm.AMDGPU.lrp(float %73, float %66, float %28) %76 = call float @llvm.AMDGPU.lrp(float %73, float %67, float %29) %77 = fmul float %62, %74 %78 = fmul float %63, %75 %79 = fmul float %64, %76 %80 = bitcast float %48 to i32 %81 = bitcast float %49 to i32 %82 = insertelement <2 x i32> undef, i32 %80, i32 0 %83 = insertelement <2 x i32> %82, i32 %81, i32 1 %84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %44, <16 x i8> %47, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = fmul float %51, %51 %89 = fmul float %52, %52 %90 = fadd float %89, %88 %91 = fmul float %53, %53 %92 = fadd float %90, %91 %93 = call float @llvm.AMDGPU.rsq.clamped.f32(float %92) %94 = fmul float %51, %93 %95 = fmul float %52, %93 %96 = fmul float %53, %93 %97 = fmul float %94, 5.000000e-01 %98 = fadd float %97, 5.000000e-01 %99 = fmul float %95, 5.000000e-01 %100 = fadd float %99, 5.000000e-01 %101 = fmul float %96, 5.000000e-01 %102 = fadd float %101, 5.000000e-01 %103 = fmul float %31, %85 %104 = fmul float %30, %87 %105 = call i32 @llvm.SI.packf16(float %77, float %78) %106 = bitcast i32 %105 to float %107 = call i32 @llvm.SI.packf16(float %79, float 0.000000e+00) %108 = bitcast i32 %107 to float %109 = call i32 @llvm.SI.packf16(float %103, float %86) %110 = bitcast i32 %109 to float %111 = call i32 @llvm.SI.packf16(float %104, float 0.000000e+00) %112 = bitcast i32 %111 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %106, float %108, float %106, float %108) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %110, float %112, float %110, float %112) %113 = call i32 @llvm.SI.packf16(float %98, float %100) %114 = bitcast i32 %113 to float %115 = call i32 @llvm.SI.packf16(float %102, float 0.000000e+00) %116 = bitcast i32 %115 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %50, float %50, float %50, float %50) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %114, float %116, float %114, float %116) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430A02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800200 00A60102 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800700 01090D02 s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v2, s4, v8 ; 10041004 v_mul_f32_e32 v3, s5, v9 ; 10061205 v_mul_f32_e32 v0, s6, v0 ; 10000006 s_waitcnt vmcnt(1) ; BF8C0771 v_sub_f32_e32 v8, 1.0, v1 ; 081002F2 v_mul_f32_e32 v9, s7, v8 ; 10121007 v_mul_f32_e32 v16, s8, v8 ; 10201008 v_mul_f32_e32 v8, s9, v8 ; 10101009 v_mac_f32_e32 v9, v2, v1 ; 3E120302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s10, v13 ; 10041A0A v_mul_f32_e32 v13, s0, v15 ; 101A1E00 v_mac_f32_e32 v16, v3, v1 ; 3E200303 v_mac_f32_e32 v8, v0, v1 ; 3E100300 v_mul_f32_e32 v0, v9, v10 ; 10001509 v_mul_f32_e32 v1, v16, v11 ; 10021710 v_mul_f32_e32 v3, v8, v12 ; 10061908 v_cvt_pkrtz_f16_f32_e32 v2, v2, v14 ; 5E041D02 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_mul_f32_e32 v1, v5, v5 ; 10020B05 v_mac_f32_e32 v1, v6, v6 ; 3E020D06 v_mac_f32_e32 v1, v7, v7 ; 3E020F07 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_cvt_pkrtz_f16_f32_e64 v3, v3, 0 ; D25E0003 00010103 exp 15, 0, 1, 0, 0, v0, v3, v0, v3 ; F800040F 03000300 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v13, 0 ; D25E0000 0001010D exp 15, 1, 1, 0, 0, v2, v0, v2, v0 ; F800041F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v1, v5 ; 10000B01 v_mul_f32_e32 v2, v1, v6 ; 10040D01 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 0.0000} IMM[3] UINT32 {3, 320, 304, 12} IMM[4] UINT32 {28, 44, 60, 0} IMM[5] UINT32 {24, 32, 16, 48} IMM[6] UINT32 {4, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4].xy, TEMP[4], IMM[2].yyyy 23: MOV TEMP[7].x, TEMP[4].xxxx 24: MOV TEMP[7].y, TEMP[4].yyyy 25: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 26: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 27: MOV TEMP[7].z, TEMP[4].xxxx 28: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 31: MUL TEMP[6], TEMP[6], IMM[2].zzzz 32: ADD TEMP[6].xy, IMM[0].wwww, -TEMP[6] 33: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 34: MOV TEMP[7].w, IMM[0].xxxx 35: MOV TEMP[7].x, TEMP[6].xxxx 36: MOV TEMP[7].y, TEMP[6].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 38: ADD TEMP[5].x, IMM[0].wwww, -TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 40: MOV TEMP[7].z, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[7], TEMP[0] 42: DP4 TEMP[5].x, TEMP[7], TEMP[2] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[5].x, TEMP[7], TEMP[3] 45: MOV TEMP[4].z, TEMP[5].xxxx 46: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 49: MOV TEMP[5].w, IMM[0].wwww 50: MOV TEMP[5].x, IN[0].xxxx 51: MOV TEMP[5].y, IN[0].yyyy 52: MOV TEMP[5].z, IN[0].zzzz 53: DP4 TEMP[0].x, TEMP[5], TEMP[0] 54: DP4 TEMP[2].x, TEMP[5], TEMP[2] 55: DP4 TEMP[3].x, TEMP[5], TEMP[3] 56: MOV TEMP[5].x, TEMP[0].xxxx 57: MOV TEMP[5].y, TEMP[2].xxxx 58: MOV TEMP[5].z, TEMP[3].xxxx 59: MOV TEMP[6].x, TEMP[0].xxxx 60: MOV TEMP[6].y, TEMP[2].xxxx 61: MOV TEMP[6].z, TEMP[3].xxxx 62: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 63: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[5].xyzz 64: MOV TEMP[6].w, TEMP[5].xxxx 65: MOV TEMP[5].w, IMM[0].xxxx 66: MOV TEMP[5].x, TEMP[4].xxxx 67: MOV TEMP[5].y, TEMP[4].yyyy 68: MOV TEMP[5].z, TEMP[4].zzzz 69: MOV TEMP[4].w, IMM[0].wwww 70: MOV TEMP[4].x, TEMP[0].xxxx 71: MOV TEMP[4].y, TEMP[2].xxxx 72: MOV TEMP[4].z, TEMP[3].xxxx 73: MOV TEMP[0].x, CONST[4][0].wwww 74: MOV TEMP[0].y, CONST[4][1].wwww 75: MOV TEMP[0].z, CONST[4][2].wwww 76: MOV TEMP[0].w, CONST[4][3].wwww 77: DP4 TEMP[0].x, TEMP[4], TEMP[0] 78: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 79: MOV TEMP[3].xy, IN[3].xyyy 80: MOV TEMP[3].w, IMM[0].xxxx 81: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 82: MOV TEMP[7].x, CONST[4][0].xxxx 83: MOV TEMP[7].y, CONST[4][1].xxxx 84: MOV TEMP[7].z, CONST[4][2].xxxx 85: MOV TEMP[7].w, CONST[4][3].xxxx 86: DP4 TEMP[7].x, TEMP[4], TEMP[7] 87: MOV TEMP[8].x, CONST[4][0].yyyy 88: MOV TEMP[8].y, CONST[4][1].yyyy 89: MOV TEMP[8].z, CONST[4][2].yyyy 90: MOV TEMP[8].w, CONST[4][3].yyyy 91: DP4 TEMP[8].x, TEMP[4], TEMP[8] 92: MOV TEMP[7].y, -TEMP[8].xxxx 93: MOV TEMP[8].x, CONST[4][0].zzzz 94: MOV TEMP[8].y, CONST[4][1].zzzz 95: MOV TEMP[8].z, CONST[4][2].zzzz 96: MOV TEMP[8].w, CONST[4][3].zzzz 97: DP4 TEMP[4].x, TEMP[4], TEMP[8] 98: MAD TEMP[4].x, IMM[2].zzzz, TEMP[4].xxxx, -TEMP[0].xxxx 99: MOV TEMP[7].z, TEMP[4].xxxx 100: MOV TEMP[7].w, TEMP[0].xxxx 101: MOV OUT[1], TEMP[1] 102: MOV OUT[2].xy, TEMP[2].xyxx 103: MOV OUT[4], TEMP[5] 104: MOV OUT[6], IMM[0].xxxx 105: MOV OUT[5], TEMP[3] 106: MOV OUT[0], TEMP[7] 107: MOV OUT[3], TEMP[6] 108: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %10, %6 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> undef, i32 %72, i32 0 %75 = insertelement <4 x i32> %74, i32 %73, i32 1 %76 = insertelement <4 x i32> %75, i32 0, i32 2 %77 = bitcast <8 x i32> %42 to <32 x i8> %78 = bitcast <4 x i32> %44 to <16 x i8> %79 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = bitcast float %70 to i32 %85 = bitcast float %71 to i32 %86 = insertelement <4 x i32> , i32 %84, i32 1 %87 = insertelement <4 x i32> %86, i32 %85, i32 2 %88 = insertelement <4 x i32> %87, i32 0, i32 3 %89 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %88, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = extractelement <4 x float> %89, i32 3 %94 = bitcast float %70 to i32 %95 = bitcast float %71 to i32 %96 = insertelement <4 x i32> , i32 %94, i32 1 %97 = insertelement <4 x i32> %96, i32 %95, i32 2 %98 = insertelement <4 x i32> %97, i32 0, i32 3 %99 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %98, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %56, 2.550000e+02 %105 = fadd float %104, -1.280000e+02 %106 = fmul float %57, 2.550000e+02 %107 = fadd float %106, -1.280000e+02 %108 = fmul float %58, 2.550000e+02 %109 = fadd float %108, -1.280000e+02 %110 = fmul float %59, 2.550000e+02 %111 = fadd float %110, -1.280000e+02 %112 = fcmp olt float %105, 0.000000e+00 %113 = fcmp olt float %107, 0.000000e+00 %114 = fcmp olt float %109, 0.000000e+00 %115 = fcmp olt float %111, 0.000000e+00 %116 = select i1 %112, float 1.000000e+00, float 0.000000e+00 %117 = call float @fabs(float %105) %118 = call float @fabs(float %107) %119 = call float @fabs(float %109) %120 = call float @fabs(float %111) %121 = fsub float %117, %116 %122 = select i1 %113, float -1.000000e+00, float -0.000000e+00 %123 = fadd float %118, %122 %124 = select i1 %114, float -1.000000e+00, float -0.000000e+00 %125 = fadd float %119, %124 %126 = select i1 %115, float -1.000000e+00, float -0.000000e+00 %127 = fadd float %120, %126 %128 = fadd float %121, -6.400000e+01 %129 = fadd float %123, -6.400000e+01 %130 = fadd float %125, -6.400000e+01 %131 = fadd float %127, -6.400000e+01 %132 = fcmp olt float %128, 0.000000e+00 %133 = fcmp olt float %129, 0.000000e+00 %134 = select i1 %132, float 1.000000e+00, float 0.000000e+00 %135 = select i1 %133, float 1.000000e+00, float 0.000000e+00 %136 = call float @fabs(float %128) %137 = call float @fabs(float %129) %138 = call float @fabs(float %130) %139 = call float @fabs(float %131) %140 = fsub float %136, %134 %141 = fsub float %137, %135 %142 = fmul float %140, 0x3F90410420000000 %143 = fmul float %141, 0x3F90410420000000 %144 = fsub float 1.000000e+00, %142 %145 = fsub float %144, %143 %146 = fmul float %142, %142 %147 = fmul float %143, %143 %148 = fadd float %147, %146 %149 = fmul float %145, %145 %150 = fadd float %148, %149 %151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150) %152 = fmul float %142, %151 %153 = fmul float %143, %151 %154 = fmul float %145, %151 %155 = fmul float %134, 2.000000e+00 %156 = fmul float %135, 2.000000e+00 %157 = fsub float 1.000000e+00, %155 %158 = fsub float 1.000000e+00, %156 %159 = fmul float %152, %157 %160 = fmul float %153, %158 %161 = fmul float %116, 2.000000e+00 %162 = fsub float 1.000000e+00, %161 %163 = fmul float %162, %154 %164 = fmul float %159, %80 %165 = fmul float %160, %81 %166 = fadd float %164, %165 %167 = fmul float %163, %82 %168 = fadd float %166, %167 %169 = fmul float %83, 0.000000e+00 %170 = fadd float %168, %169 %171 = fmul float %159, %90 %172 = fmul float %160, %91 %173 = fadd float %171, %172 %174 = fmul float %163, %92 %175 = fadd float %173, %174 %176 = fmul float %93, 0.000000e+00 %177 = fadd float %175, %176 %178 = fmul float %159, %100 %179 = fmul float %160, %101 %180 = fadd float %178, %179 %181 = fmul float %163, %102 %182 = fadd float %180, %181 %183 = fmul float %103, 0.000000e+00 %184 = fadd float %182, %183 %185 = fmul float %170, %170 %186 = fmul float %177, %177 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %170, %190 %192 = fmul float %177, %190 %193 = fmul float %184, %190 %194 = fmul float %49, %80 %195 = fmul float %50, %81 %196 = fadd float %194, %195 %197 = fmul float %51, %82 %198 = fadd float %196, %197 %199 = fadd float %198, %83 %200 = fmul float %49, %90 %201 = fmul float %50, %91 %202 = fadd float %200, %201 %203 = fmul float %51, %92 %204 = fadd float %202, %203 %205 = fadd float %204, %93 %206 = fmul float %49, %100 %207 = fmul float %50, %101 %208 = fadd float %206, %207 %209 = fmul float %51, %102 %210 = fadd float %208, %209 %211 = fadd float %210, %103 %212 = fsub float %199, %35 %213 = fsub float %205, %36 %214 = fsub float %211, %37 %215 = fmul float %38, %212 %216 = fmul float %39, %213 %217 = fadd float %216, %215 %218 = fmul float %40, %214 %219 = fadd float %217, %218 %220 = fmul float %199, %22 %221 = fmul float %205, %26 %222 = fadd float %220, %221 %223 = fmul float %211, %30 %224 = fadd float %222, %223 %225 = fadd float %224, %34 %226 = fmul float %64, %13 %227 = fadd float %226, %15 %228 = fmul float %65, %14 %229 = fadd float %228, %16 %230 = bitcast float %70 to i32 %231 = bitcast float %71 to i32 %232 = insertelement <4 x i32> , i32 %230, i32 1 %233 = insertelement <4 x i32> %232, i32 %231, i32 2 %234 = insertelement <4 x i32> %233, i32 0, i32 3 %235 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %234, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = extractelement <4 x float> %235, i32 3 %240 = fmul float %199, %19 %241 = fmul float %205, %23 %242 = fadd float %240, %241 %243 = fmul float %211, %27 %244 = fadd float %242, %243 %245 = fadd float %244, %31 %246 = fmul float %199, %20 %247 = fmul float %205, %24 %248 = fadd float %246, %247 %249 = fmul float %211, %28 %250 = fadd float %248, %249 %251 = fadd float %250, %32 %252 = fsub float -0.000000e+00, %251 %253 = fmul float %199, %21 %254 = fmul float %205, %25 %255 = fadd float %253, %254 %256 = fmul float %211, %29 %257 = fadd float %255, %256 %258 = fadd float %257, %33 %259 = fmul float %258, 2.000000e+00 %260 = fsub float %259, %225 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %227, float %229, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %199, float %205, float %211, float %219) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %236, float %237, float %238, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %245, float %252, float %260, float %225) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[36:39], s[2:3], 0x4 ; C0920304 s_load_dwordx4 s[40:43], s[2:3], 0x10 ; C0940310 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 buffer_load_format_xyzw v[18:21], v3, s[8:11], 0 idxen ; E00C2000 80021203 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v20, 0 ; 7E280280 s_buffer_load_dword s0, s[40:43], 0xf ; C200290F s_buffer_load_dword s19, s[40:43], 0x4c ; C209A94C s_buffer_load_dword s16, s[40:43], 0x4d ; C208294D s_buffer_load_dword s8, s[40:43], 0x4e ; C204294E s_buffer_load_dword s20, s[40:43], 0x50 ; C20A2950 s_buffer_load_dword s22, s[36:39], 0x6 ; C20B2506 s_buffer_load_dword s23, s[36:39], 0x7 ; C20BA507 s_buffer_load_dword s44, s[36:39], 0x8 ; C2162508 s_buffer_load_dword s36, s[36:39], 0x9 ; C2122509 s_buffer_load_dword s21, s[40:43], 0x51 ; C20AA951 s_buffer_load_dword s15, s[40:43], 0x52 ; C207A952 s_buffer_load_dword s3, s[40:43], 0x5 ; C201A905 s_buffer_load_dword s4, s[40:43], 0x6 ; C2022906 s_buffer_load_dword s9, s[40:43], 0x7 ; C204A907 s_buffer_load_dword s2, s[40:43], 0x8 ; C2012908 s_buffer_load_dword s1, s[40:43], 0x9 ; C200A909 s_buffer_load_dword s5, s[40:43], 0x0 ; C202A900 s_buffer_load_dword s6, s[40:43], 0x1 ; C2032901 s_buffer_load_dword s7, s[40:43], 0x2 ; C203A902 s_buffer_load_dword s11, s[40:43], 0x3 ; C205A903 s_buffer_load_dword s17, s[40:43], 0x4 ; C208A904 s_buffer_load_dword s12, s[40:43], 0xa ; C206290A s_buffer_load_dword s18, s[40:43], 0xb ; C209290B s_buffer_load_dword s13, s[40:43], 0xc ; C206A90C s_buffer_load_dword s10, s[40:43], 0xd ; C205290D s_buffer_load_dword s14, s[40:43], 0xe ; C207290E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s44 ; 7E00022C v_mov_b32_e32 v3, s36 ; 7E060224 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[28:35], s[24:27] ; F0900F00 00C71512 v_mov_b32_e32 v17, 0x10001 ; 7E2202FF 00010001 image_sample_l_o v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C71911 v_mov_b32_e32 v17, 0x20002 ; 7E2202FF 00020002 image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C71D11 v_mov_b32_e32 v17, 0x30003 ; 7E2202FF 00030003 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C70E11 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v18, v22, v8 ; 10241116 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s22, v12 ; 3E001816 v_mac_f32_e32 v3, s23, v13 ; 3E061A17 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v2, 0, 1.0, vcc ; D2000002 01A9E480 v_sub_f32_e64 v4, |v10|, v2 ; D2080104 0002050A v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v1, |v1|, v10 ; D2080101 00021501 v_mul_f32_e32 v11, v6, v4 ; 10160906 v_mad_f32 v4, -v4, v6, 1.0 ; D2820004 23CA0D04 v_mad_f32 v4, -v1, v6, v4 ; D2820004 24120D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mac_f32_e32 v18, v21, v7 ; 3E240F15 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v6, v26, v8 ; 100C111A v_mac_f32_e32 v6, v25, v7 ; 3E0C0F19 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mac_f32_e32 v8, v29, v7 ; 3E100F1D v_mul_f32_e32 v7, v11, v11 ; 100E170B v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_mac_f32_e32 v7, v4, v4 ; 3E0E0904 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mac_f32_e32 v18, v23, v9 ; 3E241317 v_mac_f32_e32 v6, v27, v9 ; 3E0C131B v_mac_f32_e32 v8, v31, v9 ; 3E10131F v_mul_f32_e32 v9, v7, v11 ; 10121707 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v7, -2.0, v10, 1.0 ; D2820007 03CA14F5 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_add_f32_e32 v4, v24, v18 ; 06082518 v_mul_f32_e32 v7, v22, v1 ; 100E0316 v_mac_f32_e32 v7, v21, v5 ; 3E0E0B15 v_mac_f32_e32 v7, v23, v2 ; 3E0E0517 v_mac_f32_e32 v7, 0, v24 ; 3E0E3080 v_add_f32_e32 v6, v28, v6 ; 060C0D1C v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mac_f32_e32 v9, v25, v5 ; 3E120B19 v_mac_f32_e32 v9, v27, v2 ; 3E12051B v_mac_f32_e32 v9, 0, v28 ; 3E123880 v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mac_f32_e32 v1, v29, v5 ; 3E020B1D v_mac_f32_e32 v1, v31, v2 ; 3E02051F v_add_f32_e32 v2, v32, v8 ; 06041120 v_mac_f32_e32 v1, 0, v32 ; 3E024080 exp 15, 33, 0, 0, 0, v0, v3, v20, v20 ; F800021F 14140300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_subrev_f32_e32 v0, s19, v4 ; 0A000813 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_subrev_f32_e32 v3, s16, v6 ; 0A060C10 v_mac_f32_e32 v0, s21, v3 ; 3E000615 v_mul_f32_e32 v3, v7, v7 ; 10060F07 v_mac_f32_e32 v3, v9, v9 ; 3E061309 v_mac_f32_e32 v3, v1, v1 ; 3E060301 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_subrev_f32_e32 v5, s8, v2 ; 0A0A0408 v_mac_f32_e32 v0, s15, v5 ; 3E000A0F exp 15, 34, 0, 0, 0, v4, v6, v2, v0 ; F800022F 00020604 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v7 ; 10000F03 v_mul_f32_e32 v5, v3, v9 ; 100A1303 v_mul_f32_e32 v1, v3, v1 ; 10020303 exp 15, 35, 0, 0, 0, v0, v5, v1, v20 ; F800023F 14010500 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s9, v6 ; 10000C09 v_mul_f32_e32 v1, s17, v6 ; 10020C11 v_mul_f32_e32 v3, s3, v6 ; 10060C03 v_mul_f32_e32 v5, s4, v6 ; 100A0C04 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mac_f32_e32 v1, s5, v4 ; 3E020805 v_mac_f32_e32 v3, s6, v4 ; 3E060806 v_mac_f32_e32 v5, s7, v4 ; 3E0A0807 v_mac_f32_e32 v0, s18, v2 ; 3E000412 v_mac_f32_e32 v1, s2, v2 ; 3E020402 v_mac_f32_e32 v3, s1, v2 ; 3E060401 v_mac_f32_e32 v5, s12, v2 ; 3E0A040C v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v2, s10, v3 ; 0604060A v_add_f32_e32 v3, s14, v5 ; 06060A0E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v14, v15, v16, v17 ; F800024F 11100F0E exp 15, 37, 0, 0, 0, v20, v20, v20, v20 ; F800025F 14141414 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 36 Code Size: 820 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..2], LOCAL IMM[0] UINT32 {0, 16, 0, 0} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0].xyz, TEMP[1], IN[3] 5: DP3 TEMP[1].x, IN[2].xyzz, IN[2].xyzz 6: RSQ TEMP[1].x, TEMP[1].xxxx 7: MUL TEMP[1].xyz, IN[2].xyzz, TEMP[1].xxxx 8: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, IMM[1].xxxx 9: MOV TEMP[2].w, IMM[1].yyyy 10: MOV TEMP[2].x, TEMP[1].xxxx 11: MOV TEMP[2].y, TEMP[1].yyyy 12: MOV TEMP[2].z, TEMP[1].zzzz 13: MOV TEMP[1].w, IMM[1].yyyy 14: MOV TEMP[1].x, TEMP[0].xxxx 15: MOV TEMP[1].y, TEMP[0].yyyy 16: MOV TEMP[1].z, TEMP[0].zzzz 17: MOV OUT[2], IN[1].wwww 18: MOV OUT[0], TEMP[1] 19: MOV OUT[3], TEMP[2] 20: MOV OUT[1], IMM[1].yyyy 21: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %40 = bitcast float %31 to i32 %41 = bitcast float %32 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %28, <16 x i8> %30, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = fmul float %24, %45 %49 = fmul float %25, %46 %50 = fmul float %26, %47 %51 = fmul float %48, %37 %52 = fmul float %49, %38 %53 = fmul float %50, %39 %54 = fmul float %34, %34 %55 = fmul float %35, %35 %56 = fadd float %55, %54 %57 = fmul float %36, %36 %58 = fadd float %56, %57 %59 = call float @llvm.AMDGPU.rsq.clamped.f32(float %58) %60 = fmul float %34, %59 %61 = fmul float %35, %59 %62 = fmul float %36, %59 %63 = fmul float %60, 5.000000e-01 %64 = fadd float %63, 5.000000e-01 %65 = fmul float %61, 5.000000e-01 %66 = fadd float %65, 5.000000e-01 %67 = fmul float %62, 5.000000e-01 %68 = fadd float %67, 5.000000e-01 %69 = call i32 @llvm.SI.packf16(float %51, float %52) %70 = bitcast i32 %69 to float %71 = call i32 @llvm.SI.packf16(float %53, float 0.000000e+00) %72 = bitcast i32 %71 to float %73 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %74 = bitcast i32 %73 to float %75 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %76 = bitcast i32 %75 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %70, float %72, float %70, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %74, float %76, float %74, float %76) %77 = call i32 @llvm.SI.packf16(float %64, float %66) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %68, float 0.000000e+00) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %33, float %33, float %33, float %33) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_mul_f32_e32 v10, v5, v5 ; 10140B05 v_mac_f32_e32 v10, v6, v6 ; 3E140D06 v_mac_f32_e32 v10, v7, v7 ; 3E140F07 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v3, v10, v5 ; 10060B0A v_mul_f32_e32 v5, v10, v6 ; 100A0D0A v_mul_f32_e32 v6, v10, v7 ; 100C0F0A v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 v_cvt_pkrtz_f16_f32_e64 v2, 0, 0 ; D25E0002 00010080 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 exp 15, 1, 1, 0, 0, v2, v2, v2, v2 ; F800041F 02020202 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL OUT[8], GENERIC[6] DCL OUT[9], GENERIC[7] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..7] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..16], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 20, 36, 52} IMM[5] UINT32 {8, 24, 40, 56} IMM[6] UINT32 {12, 28, 44, 60} IMM[7] UINT32 {16, 32, 48, 348} IMM[8] FLT32 { 0.0175, -0.5000, 0.5000, 0.0001} IMM[9] UINT32 {72, 80, 64, 88} IMM[10] UINT32 {100, 104, 96, 112} IMM[11] FLT32 { 0.0774, 0.9479, 0.0521, 2.4000} IMM[12] FLT32 { 0.0404, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[6].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[6].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[6].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[5].x, TEMP[0].xxxx 90: MOV TEMP[5].y, TEMP[2].xxxx 91: MOV TEMP[5].z, TEMP[3].xxxx 92: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 93: MOV TEMP[5].w, TEMP[8].xxxx 94: MOV TEMP[8].x, TEMP[7].xxxx 95: MOV TEMP[8].y, TEMP[7].yyyy 96: MOV TEMP[8].z, TEMP[7].zzzz 97: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 98: RSQ TEMP[9].x, TEMP[9].xxxx 99: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 100: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 102: MUL TEMP[7].xyz, IMM[2].zzzz, TEMP[7].xyzz 103: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 104: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 105: MOV_SAT TEMP[4].x, TEMP[4].xxxx 106: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[4].x, TEMP[4].xxxx 108: MOV TEMP[8].w, TEMP[4].xxxx 109: MOV TEMP[4].w, IMM[0].wwww 110: MOV TEMP[4].x, TEMP[0].xxxx 111: MOV TEMP[4].y, TEMP[2].xxxx 112: MOV TEMP[4].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].yyyy 114: MOV TEMP[0].y, CONST[4][1].yyyy 115: MOV TEMP[0].z, CONST[4][2].yyyy 116: MOV TEMP[0].w, CONST[4][3].yyyy 117: DP4 TEMP[0].x, TEMP[4], TEMP[0] 118: MOV TEMP[2].x, CONST[4][0].zzzz 119: MOV TEMP[2].y, CONST[4][1].zzzz 120: MOV TEMP[2].z, CONST[4][2].zzzz 121: MOV TEMP[2].w, CONST[4][3].zzzz 122: DP4 TEMP[2].x, TEMP[4], TEMP[2] 123: MOV TEMP[3].x, CONST[4][0].wwww 124: MOV TEMP[3].y, CONST[4][1].wwww 125: MOV TEMP[3].z, CONST[4][2].wwww 126: MOV TEMP[3].w, CONST[4][3].wwww 127: DP4 TEMP[3].x, TEMP[4], TEMP[3] 128: MOV TEMP[7].x, CONST[4][0].xxxx 129: MOV TEMP[7].y, CONST[4][1].xxxx 130: MOV TEMP[7].z, CONST[4][2].xxxx 131: MOV TEMP[7].w, CONST[4][3].xxxx 132: DP4 TEMP[4].x, TEMP[4], TEMP[7] 133: MOV TEMP[4].w, TEMP[3].xxxx 134: MUL TEMP[7].x, CONST[1][2].xxxx, IMM[8].xxxx 135: ADD TEMP[9].xy, IN[2].xyyy, IMM[8].yyyy 136: COS TEMP[10].x, TEMP[7].xxxx 137: SIN TEMP[7].x, TEMP[7].xxxx 138: MUL TEMP[11].x, TEMP[7].xxxx, TEMP[9].yyyy 139: MAD TEMP[11].x, TEMP[10].xxxx, TEMP[9].xxxx, -TEMP[11].xxxx 140: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].yyyy 141: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx, TEMP[10].xxxx 142: MOV TEMP[11].y, TEMP[7].xxxx 143: MAD TEMP[7].xy, CONST[1][1].yyyy, TEMP[11].xyyy, IMM[8].zzzz 144: MAD TEMP[7].xy, CONST[4][21].wwww, CONST[1][3].xyyy, TEMP[7].xyyy 145: ADD TEMP[7].xy, TEMP[7].xyyy, CONST[1][4].zwww 146: MUL TEMP[10].x, IMM[8].xxxx, CONST[1][2].yyyy 147: COS TEMP[11].x, TEMP[10].xxxx 148: SIN TEMP[10].x, TEMP[10].xxxx 149: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[9].yyyy 150: MAD TEMP[12].x, TEMP[11].xxxx, TEMP[9].xxxx, -TEMP[12].xxxx 151: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 152: MAD TEMP[10].x, TEMP[11].xxxx, TEMP[9].yyyy, TEMP[10].xxxx 153: MOV TEMP[12].y, TEMP[10].xxxx 154: MAD TEMP[10].xy, TEMP[12].xyyy, CONST[1][1].zzzz, IMM[8].zzzz 155: MAD TEMP[10].xy, CONST[4][21].wwww, CONST[1][3].zwww, TEMP[10].xyyy 156: ADD TEMP[10].xy, TEMP[10].xyyy, CONST[1][5].xyyy 157: MOV TEMP[11].x, TEMP[7].xxxx 158: MOV TEMP[11].y, TEMP[7].yyyy 159: MOV TEMP[11].z, TEMP[10].xxxx 160: MOV TEMP[11].w, TEMP[10].yyyy 161: MUL TEMP[7].x, IMM[8].xxxx, CONST[1][2].zzzz 162: COS TEMP[10].x, TEMP[7].xxxx 163: SIN TEMP[7].x, TEMP[7].xxxx 164: MUL TEMP[12].x, TEMP[7].xxxx, TEMP[9].yyyy 165: MAD TEMP[12].x, TEMP[10].xxxx, TEMP[9].xxxx, -TEMP[12].xxxx 166: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx 167: MAD TEMP[7].x, TEMP[10].xxxx, TEMP[9].yyyy, TEMP[7].xxxx 168: MOV TEMP[12].y, TEMP[7].xxxx 169: MAD TEMP[7].xy, TEMP[12].xyyy, CONST[1][1].wwww, IMM[8].zzzz 170: MAD TEMP[7].xy, CONST[4][21].wwww, CONST[1][4].xyyy, TEMP[7].xyyy 171: ADD TEMP[7].xy, TEMP[7].xyyy, CONST[1][5].zwww 172: MUL TEMP[10].x, IMM[8].xxxx, CONST[1][6].yyyy 173: COS TEMP[12].x, TEMP[10].xxxx 174: SIN TEMP[10].x, TEMP[10].xxxx 175: MUL TEMP[13].x, TEMP[10].xxxx, TEMP[9].yyyy 176: MAD TEMP[13].x, TEMP[12].xxxx, TEMP[9].xxxx, -TEMP[13].xxxx 177: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 178: MAD TEMP[9].x, TEMP[12].xxxx, TEMP[9].yyyy, TEMP[10].xxxx 179: MOV TEMP[13].y, TEMP[9].xxxx 180: MAD TEMP[9].xy, CONST[1][6].xxxx, TEMP[13].xyyy, IMM[8].zzzz 181: MAD TEMP[9].xy, CONST[4][21].wwww, CONST[1][6].zwww, TEMP[9].xyyy 182: ADD TEMP[9].xy, TEMP[9].xyyy, CONST[1][7].xyyy 183: MOV TEMP[10].x, TEMP[7].xxxx 184: MOV TEMP[10].y, TEMP[7].yyyy 185: MOV TEMP[10].z, TEMP[9].xxxx 186: MOV TEMP[10].w, TEMP[9].yyyy 187: MOV TEMP[7].xy, IN[6].xyyy 188: MOV TEMP[7].w, IMM[0].xxxx 189: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[1].wyw 190: MUL TEMP[9].xyz, IN[4].xyzz, IMM[8].zzzz 191: MAX TEMP[9].xyz, TEMP[9].xyzz, IMM[8].wwww 192: MUL TEMP[12].xyz, IN[5].xyzz, IMM[11].xxxx 193: MAD TEMP[13].xyz, IN[5].xyzz, IMM[11].yyyy, IMM[11].zzzz 194: POW TEMP[14].x, TEMP[13].xxxx, IMM[11].wwww 195: POW TEMP[14].y, TEMP[13].yyyy, IMM[11].wwww 196: POW TEMP[14].z, TEMP[13].zzzz, IMM[11].wwww 197: FSLT TEMP[13].x, IMM[12].xxxx, IN[5].xxxx 198: UIF TEMP[13].xxxx :0 199: MOV TEMP[13].x, TEMP[14].xxxx 200: ELSE :0 201: MOV TEMP[13].x, TEMP[12].xxxx 202: ENDIF 203: FSLT TEMP[15].x, IMM[12].xxxx, IN[5].yyyy 204: UIF TEMP[15].xxxx :0 205: MOV TEMP[15].x, TEMP[14].yyyy 206: ELSE :0 207: MOV TEMP[15].x, TEMP[12].yyyy 208: ENDIF 209: FSLT TEMP[16].x, IMM[12].xxxx, IN[5].zzzz 210: UIF TEMP[16].xxxx :0 211: MOV TEMP[14].x, TEMP[14].zzzz 212: ELSE :0 213: MOV TEMP[14].x, TEMP[12].zzzz 214: ENDIF 215: MOV TEMP[12].x, TEMP[13].xxxx 216: MOV TEMP[12].y, TEMP[15].xxxx 217: MOV TEMP[12].z, TEMP[14].xxxx 218: MOV TEMP[13].w, TEMP[7].wwww 219: MUL TEMP[13].xyz, TEMP[12].xyzz, TEMP[7].xyzz 220: MOV TEMP[7].w, IMM[0].xxxx 221: MOV TEMP[7].x, IN[3].xxxx 222: MOV TEMP[7].y, IN[3].yyyy 223: MOV TEMP[7].z, IN[3].zzzz 224: MOV TEMP[12].w, IMM[0].xxxx 225: MOV TEMP[12].x, TEMP[9].xxxx 226: MOV TEMP[12].y, TEMP[9].yyyy 227: MOV TEMP[12].z, TEMP[9].zzzz 228: MOV TEMP[4].xw, TEMP[4].xxxw 229: MOV TEMP[0].x, -TEMP[0].xxxx 230: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz, -TEMP[3].xxxx 231: MOV TEMP[0].y, TEMP[2].xxxx 232: MOV TEMP[4].yz, TEMP[0].yxyy 233: MOV OUT[1], TEMP[1] 234: MOV OUT[3], TEMP[8] 235: MOV OUT[7], TEMP[6] 236: MOV OUT[6], TEMP[12] 237: MOV OUT[8], TEMP[11] 238: MOV OUT[4], TEMP[13] 239: MOV OUT[0], TEMP[4] 240: MOV OUT[9], TEMP[10] 241: MOV OUT[2], TEMP[5] 242: MOV OUT[5], TEMP[7] 243: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 0) %40 = call float @llvm.SI.load.const(<16 x i8> %38, i32 4) %41 = call float @llvm.SI.load.const(<16 x i8> %38, i32 8) %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 12) %43 = call float @llvm.SI.load.const(<16 x i8> %38, i32 16) %44 = call float @llvm.SI.load.const(<16 x i8> %38, i32 20) %45 = call float @llvm.SI.load.const(<16 x i8> %38, i32 24) %46 = call float @llvm.SI.load.const(<16 x i8> %38, i32 28) %47 = call float @llvm.SI.load.const(<16 x i8> %38, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %38, i32 36) %49 = call float @llvm.SI.load.const(<16 x i8> %38, i32 40) %50 = call float @llvm.SI.load.const(<16 x i8> %38, i32 44) %51 = call float @llvm.SI.load.const(<16 x i8> %38, i32 48) %52 = call float @llvm.SI.load.const(<16 x i8> %38, i32 52) %53 = call float @llvm.SI.load.const(<16 x i8> %38, i32 56) %54 = call float @llvm.SI.load.const(<16 x i8> %38, i32 60) %55 = call float @llvm.SI.load.const(<16 x i8> %38, i32 304) %56 = call float @llvm.SI.load.const(<16 x i8> %38, i32 308) %57 = call float @llvm.SI.load.const(<16 x i8> %38, i32 312) %58 = call float @llvm.SI.load.const(<16 x i8> %38, i32 320) %59 = call float @llvm.SI.load.const(<16 x i8> %38, i32 324) %60 = call float @llvm.SI.load.const(<16 x i8> %38, i32 328) %61 = call float @llvm.SI.load.const(<16 x i8> %38, i32 348) %62 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.load.const(<16 x i8> %63, i32 0) %65 = call float @llvm.SI.load.const(<16 x i8> %63, i32 4) %66 = call float @llvm.SI.load.const(<16 x i8> %63, i32 8) %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = extractelement <4 x float> %81, i32 2 %85 = extractelement <4 x float> %81, i32 3 %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = add i32 %5, %7 %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %88) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = add i32 %5, %7 %102 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %101) %103 = extractelement <4 x float> %102, i32 0 %104 = extractelement <4 x float> %102, i32 1 %105 = extractelement <4 x float> %102, i32 2 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 6 %114 = load <16 x i8>, <16 x i8> addrspace(2)* %113, align 16, !tbaa !0 %115 = add i32 %10, %6 %116 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %114, i32 0, i32 %115) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = bitcast float %117 to i32 %120 = bitcast float %118 to i32 %121 = insertelement <4 x i32> undef, i32 %119, i32 0 %122 = insertelement <4 x i32> %121, i32 %120, i32 1 %123 = insertelement <4 x i32> %122, i32 0, i32 2 %124 = bitcast <8 x i32> %68 to <32 x i8> %125 = bitcast <4 x i32> %70 to <16 x i8> %126 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %123, <32 x i8> %124, <16 x i8> %125, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = extractelement <4 x float> %126, i32 3 %131 = bitcast float %117 to i32 %132 = bitcast float %118 to i32 %133 = insertelement <4 x i32> , i32 %131, i32 1 %134 = insertelement <4 x i32> %133, i32 %132, i32 2 %135 = insertelement <4 x i32> %134, i32 0, i32 3 %136 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %135, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = bitcast float %117 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <4 x i32> , i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 %142, i32 2 %145 = insertelement <4 x i32> %144, i32 0, i32 3 %146 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %145, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %82, 2.550000e+02 %152 = fadd float %151, -1.280000e+02 %153 = fmul float %83, 2.550000e+02 %154 = fadd float %153, -1.280000e+02 %155 = fmul float %84, 2.550000e+02 %156 = fadd float %155, -1.280000e+02 %157 = fmul float %85, 2.550000e+02 %158 = fadd float %157, -1.280000e+02 %159 = fcmp olt float %152, 0.000000e+00 %160 = fcmp olt float %154, 0.000000e+00 %161 = fcmp olt float %156, 0.000000e+00 %162 = fcmp olt float %158, 0.000000e+00 %163 = select i1 %159, float 1.000000e+00, float 0.000000e+00 %164 = select i1 %161, float 1.000000e+00, float 0.000000e+00 %165 = select i1 %162, float 1.000000e+00, float 0.000000e+00 %166 = call float @fabs(float %152) %167 = call float @fabs(float %154) %168 = call float @fabs(float %156) %169 = call float @fabs(float %158) %170 = fsub float %166, %163 %171 = select i1 %160, float -1.000000e+00, float -0.000000e+00 %172 = fadd float %167, %171 %173 = fsub float %168, %164 %174 = fsub float %169, %165 %175 = fadd float %170, -6.400000e+01 %176 = fadd float %172, -6.400000e+01 %177 = fadd float %173, -6.400000e+01 %178 = fadd float %174, -6.400000e+01 %179 = fcmp olt float %175, 0.000000e+00 %180 = fcmp olt float %176, 0.000000e+00 %181 = fcmp olt float %177, 0.000000e+00 %182 = fcmp olt float %178, 0.000000e+00 %183 = select i1 %179, float 1.000000e+00, float 0.000000e+00 %184 = select i1 %180, float 1.000000e+00, float 0.000000e+00 %185 = select i1 %181, float 1.000000e+00, float 0.000000e+00 %186 = select i1 %182, float 1.000000e+00, float 0.000000e+00 %187 = call float @fabs(float %175) %188 = call float @fabs(float %176) %189 = call float @fabs(float %177) %190 = call float @fabs(float %178) %191 = fsub float %187, %183 %192 = fsub float %188, %184 %193 = fsub float %189, %185 %194 = fsub float %190, %186 %195 = fmul float %191, 0x3F90410420000000 %196 = fmul float %192, 0x3F90410420000000 %197 = fmul float %193, 0x3F90410420000000 %198 = fmul float %194, 0x3F90410420000000 %199 = fmul float %183, 2.000000e+00 %200 = fmul float %184, 2.000000e+00 %201 = fmul float %185, 2.000000e+00 %202 = fmul float %186, 2.000000e+00 %203 = fsub float 1.000000e+00, %199 %204 = fsub float 1.000000e+00, %200 %205 = fsub float 1.000000e+00, %201 %206 = fsub float 1.000000e+00, %202 %207 = fmul float %163, 2.000000e+00 %208 = fmul float %164, 2.000000e+00 %209 = fmul float %165, 2.000000e+00 %210 = fsub float 1.000000e+00, %207 %211 = fsub float 1.000000e+00, %208 %212 = fsub float 1.000000e+00, %209 %213 = fsub float 1.000000e+00, %195 %214 = fsub float %213, %196 %215 = fmul float %195, %195 %216 = fmul float %196, %196 %217 = fadd float %216, %215 %218 = fmul float %214, %214 %219 = fadd float %217, %218 %220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219) %221 = fmul float %195, %220 %222 = fmul float %196, %220 %223 = fmul float %214, %220 %224 = fmul float %221, %203 %225 = fmul float %222, %204 %226 = fsub float 1.000000e+00, %197 %227 = fsub float %226, %198 %228 = fmul float %197, %197 %229 = fmul float %198, %198 %230 = fadd float %229, %228 %231 = fmul float %227, %227 %232 = fadd float %230, %231 %233 = call float @llvm.AMDGPU.rsq.clamped.f32(float %232) %234 = fmul float %197, %233 %235 = fmul float %198, %233 %236 = fmul float %227, %233 %237 = fmul float %234, %205 %238 = fmul float %235, %206 %239 = fmul float %223, %210 %240 = fmul float %224, %127 %241 = fmul float %225, %128 %242 = fadd float %240, %241 %243 = fmul float %239, %129 %244 = fadd float %242, %243 %245 = fmul float %130, 0.000000e+00 %246 = fadd float %244, %245 %247 = fmul float %224, %137 %248 = fmul float %225, %138 %249 = fadd float %247, %248 %250 = fmul float %239, %139 %251 = fadd float %249, %250 %252 = fmul float %140, 0.000000e+00 %253 = fadd float %251, %252 %254 = fmul float %224, %147 %255 = fmul float %225, %148 %256 = fadd float %254, %255 %257 = fmul float %239, %149 %258 = fadd float %256, %257 %259 = fmul float %150, 0.000000e+00 %260 = fadd float %258, %259 %261 = fmul float %246, %246 %262 = fmul float %253, %253 %263 = fadd float %262, %261 %264 = fmul float %260, %260 %265 = fadd float %263, %264 %266 = call float @llvm.AMDGPU.rsq.clamped.f32(float %265) %267 = fmul float %246, %266 %268 = fmul float %253, %266 %269 = fmul float %260, %266 %270 = fmul float %236, %211 %271 = fmul float %237, %127 %272 = fmul float %238, %128 %273 = fadd float %271, %272 %274 = fmul float %270, %129 %275 = fadd float %273, %274 %276 = fmul float %130, 0.000000e+00 %277 = fadd float %275, %276 %278 = fmul float %237, %137 %279 = fmul float %238, %138 %280 = fadd float %278, %279 %281 = fmul float %270, %139 %282 = fadd float %280, %281 %283 = fmul float %140, 0.000000e+00 %284 = fadd float %282, %283 %285 = fmul float %237, %147 %286 = fmul float %238, %148 %287 = fadd float %285, %286 %288 = fmul float %270, %149 %289 = fadd float %287, %288 %290 = fmul float %150, 0.000000e+00 %291 = fadd float %289, %290 %292 = fmul float %277, %267 %293 = fmul float %284, %268 %294 = fadd float %293, %292 %295 = fmul float %291, %269 %296 = fadd float %294, %295 %297 = fmul float %296, %267 %298 = fmul float %296, %268 %299 = fmul float %296, %269 %300 = fsub float %277, %297 %301 = fsub float %284, %298 %302 = fsub float %291, %299 %303 = fmul float %300, %300 %304 = fmul float %301, %301 %305 = fadd float %304, %303 %306 = fmul float %302, %302 %307 = fadd float %305, %306 %308 = call float @llvm.AMDGPU.rsq.clamped.f32(float %307) %309 = fmul float %300, %308 %310 = fmul float %301, %308 %311 = fmul float %302, %308 %312 = fmul float %75, %127 %313 = fmul float %76, %128 %314 = fadd float %312, %313 %315 = fmul float %77, %129 %316 = fadd float %314, %315 %317 = fadd float %316, %130 %318 = fmul float %75, %137 %319 = fmul float %76, %138 %320 = fadd float %318, %319 %321 = fmul float %77, %139 %322 = fadd float %320, %321 %323 = fadd float %322, %140 %324 = fmul float %75, %147 %325 = fmul float %76, %148 %326 = fadd float %324, %325 %327 = fmul float %77, %149 %328 = fadd float %326, %327 %329 = fadd float %328, %150 %330 = fsub float %317, %55 %331 = fsub float %323, %56 %332 = fsub float %329, %57 %333 = fmul float %58, %330 %334 = fmul float %59, %331 %335 = fadd float %334, %333 %336 = fmul float %60, %332 %337 = fadd float %335, %336 %338 = fmul float %330, %330 %339 = fmul float %331, %331 %340 = fadd float %339, %338 %341 = fmul float %332, %332 %342 = fadd float %340, %341 %343 = call float @llvm.AMDGPU.rsq.clamped.f32(float %342) %344 = fmul float %330, %343 %345 = fmul float %331, %343 %346 = fmul float %332, %343 %347 = fmul float %267, %64 %348 = fmul float %268, %65 %349 = fadd float %348, %347 %350 = fmul float %269, %66 %351 = fadd float %349, %350 %352 = fmul float %351, %267 %353 = fmul float %351, %268 %354 = fmul float %351, %269 %355 = fmul float %352, 2.000000e+00 %356 = fmul float %353, 2.000000e+00 %357 = fmul float %354, 2.000000e+00 %358 = fsub float %64, %355 %359 = fsub float %65, %356 %360 = fsub float %66, %357 %361 = fmul float %344, %358 %362 = fsub float -0.000000e+00, %361 %363 = fmul float %345, %359 %364 = fsub float %362, %363 %365 = fmul float %346, %360 %366 = fsub float %364, %365 %367 = call float @llvm.AMDIL.clamp.(float %366, float 0.000000e+00, float 1.000000e+00) %368 = call float @llvm.pow.f32(float %367, float 1.600000e+01) %369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %370 = fmul float %317, %40 %371 = fmul float %323, %44 %372 = fadd float %370, %371 %373 = fmul float %329, %48 %374 = fadd float %372, %373 %375 = fadd float %374, %52 %376 = fmul float %317, %41 %377 = fmul float %323, %45 %378 = fadd float %376, %377 %379 = fmul float %329, %49 %380 = fadd float %378, %379 %381 = fadd float %380, %53 %382 = fmul float %317, %42 %383 = fmul float %323, %46 %384 = fadd float %382, %383 %385 = fmul float %329, %50 %386 = fadd float %384, %385 %387 = fadd float %386, %54 %388 = fmul float %317, %39 %389 = fmul float %323, %43 %390 = fadd float %388, %389 %391 = fmul float %329, %47 %392 = fadd float %390, %391 %393 = fadd float %392, %51 %394 = fmul float %16, 0x3F91DF4720000000 %395 = fadd float %90, -5.000000e-01 %396 = fadd float %91, -5.000000e-01 %397 = call float @llvm.cos.f32(float %394) %398 = call float @llvm.sin.f32(float %394) %399 = fmul float %398, %396 %400 = fmul float %397, %395 %401 = fsub float %400, %399 %402 = fmul float %397, %396 %403 = fmul float %398, %395 %404 = fadd float %403, %402 %405 = fmul float %13, %401 %406 = fadd float %405, 5.000000e-01 %407 = fmul float %13, %404 %408 = fadd float %407, 5.000000e-01 %409 = fmul float %61, %19 %410 = fadd float %409, %406 %411 = fmul float %61, %20 %412 = fadd float %411, %408 %413 = fadd float %410, %25 %414 = fadd float %412, %26 %415 = fmul float %17, 0x3F91DF4720000000 %416 = call float @llvm.cos.f32(float %415) %417 = call float @llvm.sin.f32(float %415) %418 = fmul float %417, %396 %419 = fmul float %416, %395 %420 = fsub float %419, %418 %421 = fmul float %417, %395 %422 = fmul float %416, %396 %423 = fadd float %422, %421 %424 = fmul float %420, %14 %425 = fadd float %424, 5.000000e-01 %426 = fmul float %423, %14 %427 = fadd float %426, 5.000000e-01 %428 = fmul float %61, %21 %429 = fadd float %428, %425 %430 = fmul float %61, %22 %431 = fadd float %430, %427 %432 = fadd float %429, %27 %433 = fadd float %431, %28 %434 = fmul float %18, 0x3F91DF4720000000 %435 = call float @llvm.cos.f32(float %434) %436 = call float @llvm.sin.f32(float %434) %437 = fmul float %436, %396 %438 = fmul float %435, %395 %439 = fsub float %438, %437 %440 = fmul float %436, %395 %441 = fmul float %435, %396 %442 = fadd float %441, %440 %443 = fmul float %439, %15 %444 = fadd float %443, 5.000000e-01 %445 = fmul float %442, %15 %446 = fadd float %445, 5.000000e-01 %447 = fmul float %61, %23 %448 = fadd float %447, %444 %449 = fmul float %61, %24 %450 = fadd float %449, %446 %451 = fadd float %448, %29 %452 = fadd float %450, %30 %453 = fmul float %32, 0x3F91DF4720000000 %454 = call float @llvm.cos.f32(float %453) %455 = call float @llvm.sin.f32(float %453) %456 = fmul float %455, %396 %457 = fmul float %454, %395 %458 = fsub float %457, %456 %459 = fmul float %455, %395 %460 = fmul float %454, %396 %461 = fadd float %460, %459 %462 = fmul float %31, %458 %463 = fadd float %462, 5.000000e-01 %464 = fmul float %31, %461 %465 = fadd float %464, 5.000000e-01 %466 = fmul float %61, %33 %467 = fadd float %466, %463 %468 = fmul float %61, %34 %469 = fadd float %468, %465 %470 = fadd float %467, %35 %471 = fadd float %469, %36 %472 = bitcast float %117 to i32 %473 = bitcast float %118 to i32 %474 = insertelement <4 x i32> , i32 %472, i32 1 %475 = insertelement <4 x i32> %474, i32 %473, i32 2 %476 = insertelement <4 x i32> %475, i32 0, i32 3 %477 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %476, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %478 = extractelement <4 x float> %477, i32 0 %479 = extractelement <4 x float> %477, i32 1 %480 = extractelement <4 x float> %477, i32 2 %481 = extractelement <4 x float> %477, i32 3 %482 = fmul float %103, 5.000000e-01 %483 = fmul float %104, 5.000000e-01 %484 = fmul float %105, 5.000000e-01 %485 = call float @llvm.maxnum.f32(float %482, float 0x3F1A36E2E0000000) %486 = call float @llvm.maxnum.f32(float %483, float 0x3F1A36E2E0000000) %487 = call float @llvm.maxnum.f32(float %484, float 0x3F1A36E2E0000000) %488 = fmul float %110, 0x3FB3D07220000000 %489 = fmul float %111, 0x3FB3D07220000000 %490 = fmul float %112, 0x3FB3D07220000000 %491 = fmul float %110, 0x3FEE54EDE0000000 %492 = fadd float %491, 0x3FAAB12320000000 %493 = fmul float %111, 0x3FEE54EDE0000000 %494 = fadd float %493, 0x3FAAB12320000000 %495 = fmul float %112, 0x3FEE54EDE0000000 %496 = fadd float %495, 0x3FAAB12320000000 %497 = call float @llvm.pow.f32(float %492, float 0x4003333340000000) %498 = call float @llvm.pow.f32(float %494, float 0x4003333340000000) %499 = call float @llvm.pow.f32(float %496, float 0x4003333340000000) %500 = fcmp ogt float %110, 0x3FA4B5DCC0000000 %. = select i1 %500, float %497, float %488 %501 = fcmp ogt float %111, 0x3FA4B5DCC0000000 %temp60.0 = select i1 %501, float %498, float %489 %502 = fcmp ogt float %112, 0x3FA4B5DCC0000000 %.74 = select i1 %502, float %499, float %490 %503 = fmul float %., %478 %504 = fmul float %temp60.0, %479 %505 = fmul float %.74, %480 %506 = fsub float -0.000000e+00, %375 %507 = fmul float %381, 2.000000e+00 %508 = fsub float %507, %387 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %317, float %323, float %329, float %337) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %267, float %268, float %269, float %369) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %503, float %504, float %505, float %481) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %96, float %97, float %98, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %485, float %486, float %487, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %309, float %310, float %311, float %212) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %413, float %414, float %432, float %433) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %451, float %452, float %470, float %471) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %393, float %506, float %508, float %387) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v18, 0xc3000000 ; 7E2402FF C3000000 v_mov_b32_e32 v21, 0x437f0000 ; 7E2A02FF 437F0000 v_mov_b32_e32 v22, 0x80000000 ; 7E2C02FF 80000000 v_mov_b32_e32 v19, 0xc2800000 ; 7E2602FF C2800000 v_mov_b32_e32 v2, 0x3c820821 ; 7E0402FF 3C820821 v_mov_b32_e32 v23, 0x3d558919 ; 7E2E02FF 3D558919 v_add_i32_e32 v4, s10, v0 ; 4A08000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_mov_b32_e32 v24, 0x3f72a76f ; 7E3002FF 3F72A76F v_mov_b32_e32 v1, 0x3c8efa39 ; 7E0202FF 3C8EFA39 v_mov_b32_e32 v0, 0x3e22f983 ; 7E0002FF 3E22F983 v_mov_b32_e32 v20, 0x4019999a ; 7E2802FF 4019999A v_mov_b32_e32 v25, 0x3d9e8391 ; 7E3202FF 3D9E8391 v_mov_b32_e32 v26, 0x3d25aee6 ; 7E3402FF 3D25AEE6 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[48:51], s[8:9], 0x18 ; C0980918 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[52:55], s[2:3], 0x14 ; C09A0314 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[27:30], v4, s[12:15], 0 idxen ; E00C2000 80031B04 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[30:33], v4, s[16:19], 0 idxen ; E00C2000 80041E04 buffer_load_format_xyzw v[8:11], v4, s[20:23], 0 idxen ; E00C2000 80050804 buffer_load_format_xyzw v[14:17], v4, s[36:39], 0 idxen ; E00C2000 80090E04 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[10:13], v4, s[40:43], 0 idxen ; E00C2000 800A0A04 buffer_load_format_xyzw v[34:37], v4, s[44:47], 0 idxen ; E00C2000 800B2204 buffer_load_format_xyzw v[4:7], v3, s[48:51], 0 idxen ; E00C2000 800C0403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_buffer_load_dword s17, s[52:55], 0x0 ; C208B500 s_buffer_load_dword s16, s[52:55], 0x1 ; C2083501 s_buffer_load_dword s15, s[52:55], 0x2 ; C207B502 s_buffer_load_dword s13, s[8:11], 0x51 ; C2068951 s_buffer_load_dword s14, s[8:11], 0x52 ; C2070952 s_buffer_load_dword s12, s[8:11], 0x57 ; C2060957 s_buffer_load_dword s19, s[8:11], 0x4c ; C209894C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s20, s[8:11], 0x4e ; C20A094E s_buffer_load_dword s18, s[8:11], 0x50 ; C2090950 image_sample_l v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[28:35], s[24:27] ; F0900F00 00C72504 v_mov_b32_e32 v3, 0x10001 ; 7E0602FF 00010001 image_sample_l_o v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72903 v_mov_b32_e32 v3, 0x20002 ; 7E0602FF 00020002 image_sample_l_o v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72D03 v_mov_b32_e32 v3, 0x30003 ; 7E0602FF 00030003 image_sample_l_o v[49:52], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C73103 exp 15, 32, 0, 0, 0, v6, v6, v6, v6 ; F800020F 06060606 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v3, v38, v28 ; 10063926 v_mad_f32 v4, v21, v30, v18 ; D2820004 044A3D15 v_mad_f32 v5, v21, v31, v18 ; D2820005 044A3F15 v_mad_f32 v7, v21, v32, v18 ; D2820007 044A4115 v_mac_f32_e32 v18, v21, v33 ; 3E244315 v_mad_f32 v13, v24, v34, v23 ; D282000D 045E4518 v_mad_f32 v17, v24, v35, v23 ; D2820011 045E4718 v_mac_f32_e32 v23, v24, v36 ; 3E2E4918 v_cmp_gt_f32_e32 vcc, v34, v26 ; 7C083522 v_cmp_gt_f32_e64 s[0:1], v35, v26 ; D0080000 00023523 v_cmp_gt_f32_e64 s[4:5], v36, v26 ; D0080004 00023524 v_cmp_gt_f32_e64 s[6:7], 0, v5 ; D0080006 00020A80 v_cndmask_b32_e64 v21, v22, -1.0, s[6:7] ; D2000015 0019E716 v_add_f32_e64 v5, |v5|, v21 ; D2060105 00022B05 v_cmp_gt_f32_e64 s[6:7], 0, v4 ; D0080006 00020880 v_cndmask_b32_e64 v21, 0, 1.0, s[6:7] ; D2000015 0019E480 v_sub_f32_e64 v4, |v4|, v21 ; D2080104 00022B04 v_add_f32_e32 v4, v19, v4 ; 06080913 v_add_f32_e32 v5, v19, v5 ; 060A0B13 v_cmp_gt_f32_e64 s[6:7], 0, v4 ; D0080006 00020880 v_cndmask_b32_e64 v22, 0, 1.0, s[6:7] ; D2000016 0019E480 v_sub_f32_e64 v4, |v4|, v22 ; D2080104 00022D04 v_cmp_gt_f32_e64 s[6:7], 0, v5 ; D0080006 00020A80 v_cndmask_b32_e64 v24, 0, 1.0, s[6:7] ; D2000018 0019E480 v_sub_f32_e64 v5, |v5|, v24 ; D2080105 00023105 v_mul_f32_e32 v26, v2, v4 ; 10340902 v_mad_f32 v4, -v4, v2, 1.0 ; D2820004 23CA0504 v_mad_f32 v4, -v5, v2, v4 ; D2820004 24120505 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 v_mul_f32_e32 v30, v26, v26 ; 103C351A v_mac_f32_e32 v30, v5, v5 ; 3E3C0B05 v_mac_f32_e32 v30, v4, v4 ; 3E3C0904 v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v31, v25, v34 ; 103E4519 v_mul_f32_e32 v32, v25, v35 ; 10404719 v_mul_f32_e32 v25, v25, v36 ; 10324919 v_mul_f32_e32 v26, v30, v26 ; 1034351E v_mul_f32_e32 v5, v30, v5 ; 100A0B1E v_mul_f32_e32 v4, v30, v4 ; 1008091E v_mad_f32 v22, -2.0, v22, 1.0 ; D2820016 03CA2CF5 v_mul_f32_e32 v22, v22, v26 ; 102C3516 v_mad_f32 v24, -2.0, v24, 1.0 ; D2820018 03CA30F5 v_mul_f32_e32 v5, v24, v5 ; 100A0B18 v_mad_f32 v21, -2.0, v21, 1.0 ; D2820015 03CA2AF5 v_mul_f32_e32 v4, v21, v4 ; 10080915 v_mul_f32_e32 v21, v38, v5 ; 102A0B26 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v24, v42, v5 ; 10300B2A s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v5, v46, v5 ; 100A0B2E v_mac_f32_e32 v21, v37, v22 ; 3E2A2D25 v_mac_f32_e32 v24, v41, v22 ; 3E302D29 v_mac_f32_e32 v5, v45, v22 ; 3E0A2D2D v_mac_f32_e32 v21, v39, v4 ; 3E2A0927 v_mac_f32_e32 v24, v43, v4 ; 3E30092B v_mac_f32_e32 v5, v47, v4 ; 3E0A092F v_mac_f32_e32 v3, v37, v27 ; 3E063725 v_mul_f32_e32 v4, v42, v28 ; 1008392A v_mac_f32_e32 v4, v41, v27 ; 3E083729 v_mul_f32_e32 v22, v46, v28 ; 102C392E v_mac_f32_e32 v22, v45, v27 ; 3E2C372D v_mac_f32_e32 v21, 0, v40 ; 3E2A5080 v_mac_f32_e32 v24, 0, v44 ; 3E305880 v_mac_f32_e32 v5, 0, v48 ; 3E0A6080 v_mul_f32_e32 v26, v21, v21 ; 10342B15 v_mac_f32_e32 v26, v24, v24 ; 3E343118 v_mac_f32_e32 v26, v5, v5 ; 3E340B05 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mac_f32_e32 v3, v39, v29 ; 3E063B27 v_mac_f32_e32 v4, v43, v29 ; 3E083B2B v_mac_f32_e32 v22, v47, v29 ; 3E2C3B2F v_mul_f32_e32 v21, v26, v21 ; 102A2B1A v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v5, v26, v5 ; 100A0B1A v_add_f32_e32 v3, v40, v3 ; 06060728 v_add_f32_e32 v4, v44, v4 ; 0608092C v_add_f32_e32 v22, v48, v22 ; 062C2D30 v_subrev_f32_e32 v26, s19, v3 ; 0A340613 v_subrev_f32_e32 v27, s21, v4 ; 0A360815 v_mul_f32_e32 v28, v26, v26 ; 1038351A v_mac_f32_e32 v28, v27, v27 ; 3E38371B v_subrev_f32_e32 v29, s20, v22 ; 0A3A2C14 v_mac_f32_e32 v28, v29, v29 ; 3E383B1D v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v30, s17, v21 ; 103C2A11 v_mac_f32_e32 v30, s16, v24 ; 3E3C3010 v_mac_f32_e32 v30, s15, v5 ; 3E3C0A0F v_mul_f32_e32 v33, v21, v30 ; 10423D15 v_mad_f32 v33, -2.0, v33, s17 ; D2820021 004642F5 v_mul_f32_e32 v34, v28, v26 ; 1044351C v_mul_f32_e32 v33, v33, v34 ; 10424521 v_mul_f32_e32 v34, v24, v30 ; 10443D18 v_mad_f32 v34, -2.0, v34, s16 ; D2820022 004244F5 v_mul_f32_e32 v35, v28, v27 ; 1046371C v_mad_f32 v33, -v35, v34, -v33 ; D2820021 A4864523 v_mul_f32_e32 v30, v5, v30 ; 103C3D05 v_mad_f32 v30, -2.0, v30, s15 ; D282001E 003E3CF5 v_mul_f32_e32 v28, v28, v29 ; 10383B1C v_mad_f32 v28, -v28, v30, v33 ; D282001C 24863D1C v_log_f32_e32 v13, v13 ; 7E1A4F0D v_log_f32_e32 v17, v17 ; 7E224F11 v_log_f32_e32 v23, v23 ; 7E2E4F17 v_cmp_gt_f32_e64 s[6:7], 0, v7 ; D0080006 00020E80 v_cndmask_b32_e64 v30, 0, 1.0, s[6:7] ; D200001E 0019E480 v_sub_f32_e64 v7, |v7|, v30 ; D2080107 00023D07 v_cmp_gt_f32_e64 s[6:7], 0, v18 ; D0080006 00022480 v_cndmask_b32_e64 v33, 0, 1.0, s[6:7] ; D2000021 0019E480 v_sub_f32_e64 v18, |v18|, v33 ; D2080112 00024312 v_add_f32_e32 v7, v19, v7 ; 060E0F13 v_add_f32_e32 v18, v19, v18 ; 06242513 v_mul_legacy_f32_e32 v13, v20, v13 ; 0E1A1B14 v_mul_legacy_f32_e32 v17, v20, v17 ; 0E222314 v_mul_legacy_f32_e32 v19, v20, v23 ; 0E262F14 v_mul_f32_e32 v20, s18, v26 ; 10283412 v_mac_f32_e32 v20, s13, v27 ; 3E28360D v_mac_f32_e32 v20, s14, v29 ; 3E283A0E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cndmask_b32_e32 v13, v31, v13 ; 001A1B1F v_exp_f32_e32 v17, v17 ; 7E224B11 v_cndmask_b32_e64 v17, v32, v17, s[0:1] ; D2000011 00022320 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e64 v19, v25, v19, s[4:5] ; D2000013 00122719 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v23, 0, 1.0, vcc ; D2000017 01A9E480 v_sub_f32_e64 v7, |v7|, v23 ; D2080107 00022F07 v_cmp_gt_f32_e32 vcc, 0, v18 ; 7C082480 v_cndmask_b32_e64 v25, 0, 1.0, vcc ; D2000019 01A9E480 v_sub_f32_e64 v18, |v18|, v25 ; D2080112 00023312 v_mul_f32_e32 v26, v2, v7 ; 10340F02 v_mad_f32 v7, -v7, v2, 1.0 ; D2820007 23CA0507 v_add_f32_e64 v27, 0, v28 clamp ; D206081B 00023880 v_log_f32_e32 v27, v27 ; 7E364F1B v_mad_f32 v7, -v18, v2, v7 ; D2820007 241E0512 v_mul_f32_e32 v2, v2, v18 ; 10042502 exp 15, 33, 0, 0, 0, v3, v4, v22, v20 ; F800021F 14160403 v_mul_legacy_f32_e32 v18, 0x41800000, v27 ; 0E2436FF 41800000 v_exp_f32_e32 v18, v18 ; 7E244B12 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 exp 15, 34, 0, 0, 0, v21, v24, v5, v18 ; F800022F 12051815 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v49, v13 ; 101A1B31 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v18, v26, v26 ; 1024351A v_mac_f32_e32 v18, v2, v2 ; 3E240502 v_mac_f32_e32 v18, v7, v7 ; 3E240F07 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mul_f32_e32 v17, v50, v17 ; 10222332 v_mul_f32_e32 v19, v51, v19 ; 10262733 exp 15, 35, 0, 0, 0, v13, v17, v19, v52 ; F800023F 3413110D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v13, v18, v26 ; 101A3512 v_mul_f32_e32 v2, v18, v2 ; 10040512 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mad_f32 v17, -2.0, v23, 1.0 ; D2820011 03CA2EF5 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 v_mad_f32 v17, -2.0, v25, 1.0 ; D2820011 03CA32F5 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v17, -2.0, v30, 1.0 ; D2820011 03CA3CF5 v_mul_f32_e32 v7, v17, v7 ; 100E0F11 v_mul_f32_e32 v17, v38, v2 ; 10220526 v_mac_f32_e32 v17, v37, v13 ; 3E221B25 v_mac_f32_e32 v17, v39, v7 ; 3E220F27 v_mac_f32_e32 v17, 0, v40 ; 3E225080 v_mul_f32_e32 v18, v42, v2 ; 1024052A v_mac_f32_e32 v18, v41, v13 ; 3E241B29 v_mac_f32_e32 v18, v43, v7 ; 3E240F2B v_mac_f32_e32 v18, 0, v44 ; 3E245880 v_mul_f32_e32 v2, v46, v2 ; 1004052E v_mac_f32_e32 v2, v45, v13 ; 3E041B2D v_mac_f32_e32 v2, v47, v7 ; 3E040F2F v_mac_f32_e32 v2, 0, v48 ; 3E046080 exp 15, 36, 0, 0, 0, v14, v15, v16, v6 ; F800024F 06100F0E v_mul_f32_e32 v7, 0.5, v10 ; 100E14F0 v_mul_f32_e32 v10, 0.5, v11 ; 101416F0 v_mul_f32_e32 v11, 0.5, v12 ; 101618F0 v_mul_f32_e32 v12, v21, v17 ; 10182315 v_mac_f32_e32 v12, v24, v18 ; 3E182518 v_mac_f32_e32 v12, v5, v2 ; 3E180505 v_mad_f32 v13, -v12, v21, v17 ; D282000D 24462B0C s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v14, -v12, v24, v18 ; D282000E 244A310C s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_mad_f32 v2, -v12, v5, v2 ; D2820002 240A0B0C v_mov_b32_e32 v5, 0x38d1b717 ; 7E0A02FF 38D1B717 v_max_f32_e32 v7, v5, v7 ; 200E0F05 v_max_f32_e32 v10, v5, v10 ; 20141505 v_mul_f32_e32 v12, v13, v13 ; 10181B0D v_mac_f32_e32 v12, v14, v14 ; 3E181D0E v_mac_f32_e32 v12, v2, v2 ; 3E180502 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_max_f32_e32 v5, v5, v11 ; 200A1705 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 exp 15, 37, 0, 0, 0, v7, v10, v5, v6 ; F800025F 06050A07 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v5, v12, v13 ; 100A1B0C v_mul_f32_e32 v7, v12, v14 ; 100E1D0C v_mul_f32_e32 v2, v12, v2 ; 1004050C v_mad_f32 v10, -2.0, v33, 1.0 ; D282000A 03CA42F5 exp 15, 38, 0, 0, 0, v5, v7, v2, v10 ; F800026F 0A020705 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v2, -0.5, v8 ; 060410F1 v_add_f32_e32 v5, -0.5, v9 ; 060A12F1 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 v_mul_f32_e32 v7, s4, v1 ; 100E0204 v_mul_f32_e32 v7, v0, v7 ; 100E0F00 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_cos_f32_e32 v8, v7 ; 7E106D07 v_sin_f32_e32 v7, v7 ; 7E0E6B07 v_mul_f32_e32 v9, v5, v7 ; 10120F05 v_mad_f32 v9, v8, v2, -v9 ; D2820009 84260508 v_mul_f32_e32 v8, v5, v8 ; 10101105 v_mac_f32_e32 v8, v2, v7 ; 3E100F02 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mul_f32_e32 v7, v0, v7 ; 100E0F00 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_cos_f32_e32 v10, v7 ; 7E146D07 v_sin_f32_e32 v7, v7 ; 7E0E6B07 v_mul_f32_e32 v11, v5, v7 ; 10160F05 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 v_mul_f32_e32 v7, v2, v7 ; 100E0F02 v_mad_f32 v11, v10, v2, -v11 ; D282000B 842E050A v_mac_f32_e32 v7, v5, v10 ; 3E0E1505 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, s4, v9, 0.5 ; D2820009 03C21204 v_mad_f32 v8, s4, v8, 0.5 ; D2820008 03C21004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 v_mad_f32 v10, v11, s5, 0.5 ; D282000A 03C00B0B v_mad_f32 v7, v7, s5, 0.5 ; D2820007 03C00B07 v_mov_b32_e32 v11, s7 ; 7E160207 v_mac_f32_e32 v9, s12, v11 ; 3E12160C v_mov_b32_e32 v11, s13 ; 7E16020D v_mac_f32_e32 v8, s12, v11 ; 3E10160C v_mov_b32_e32 v11, s14 ; 7E16020E v_mac_f32_e32 v10, s12, v11 ; 3E14160C v_mov_b32_e32 v11, s15 ; 7E16020F s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 v_mac_f32_e32 v7, s12, v11 ; 3E0E160C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s4, v9 ; 06121204 v_add_f32_e32 v8, s16, v8 ; 06101010 v_add_f32_e32 v10, s17, v10 ; 06141411 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s13, s[0:3], 0x17 ; C2068117 s_buffer_load_dword s14, s[0:3], 0x18 ; C2070118 s_buffer_load_dword s15, s[0:3], 0x19 ; C2078119 v_add_f32_e32 v7, s5, v7 ; 060E0E05 exp 15, 39, 0, 0, 0, v9, v8, v10, v7 ; F800027F 070A0809 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x1a ; C208811A s_buffer_load_dword s18, s[0:3], 0x1b ; C209011B s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v7, s4, v1 ; 100E0204 v_mul_f32_e32 v1, s15, v1 ; 1002020F v_mul_f32_e32 v7, v0, v7 ; 100E0F00 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_fract_f32_e32 v1, v7 ; 7E024107 v_cos_f32_e32 v7, v1 ; 7E0E6D01 v_sin_f32_e32 v1, v1 ; 7E026B01 v_mul_f32_e32 v8, v5, v1 ; 10100305 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mad_f32 v8, v7, v2, -v8 ; D2820008 84220507 v_mac_f32_e32 v1, v5, v7 ; 3E020F05 v_fract_f32_e32 v0, v0 ; 7E004100 v_cos_f32_e32 v7, v0 ; 7E0E6D00 v_sin_f32_e32 v0, v0 ; 7E006B00 v_mul_f32_e32 v9, v5, v0 ; 10120105 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mad_f32 v2, v7, v2, -v9 ; D2820002 84260507 v_mac_f32_e32 v0, v5, v7 ; 3E000F05 v_mad_f32 v5, v8, s6, 0.5 ; D2820005 03C00D08 v_mad_f32 v1, v1, s6, 0.5 ; D2820001 03C00D01 v_mad_f32 v2, s14, v2, 0.5 ; D2820002 03C2040E v_mad_f32 v0, s14, v0, 0.5 ; D2820000 03C2000E v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mac_f32_e32 v5, s12, v7 ; 3E0A0E0C v_mov_b32_e32 v7, s16 ; 7E0E0210 v_mac_f32_e32 v1, s12, v7 ; 3E020E0C v_mov_b32_e32 v7, s17 ; 7E0E0211 v_mac_f32_e32 v2, s12, v7 ; 3E040E0C v_mov_b32_e32 v7, s18 ; 7E0E0212 v_mac_f32_e32 v0, s12, v7 ; 3E000E0C v_add_f32_e32 v5, s7, v5 ; 060A0A07 v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v2, s19, v2 ; 06040413 v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 40, 0, 0, 0, v5, v1, v2, v0 ; F800028F 00020105 s_buffer_load_dword s0, s[8:11], 0xf ; C200090F s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903 s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906 s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909 s_buffer_load_dword s15, s[8:11], 0xa ; C207890A s_buffer_load_dword s16, s[8:11], 0xb ; C208090B s_buffer_load_dword s17, s[8:11], 0xc ; C208890C s_buffer_load_dword s18, s[8:11], 0xd ; C209090D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s6, v4 ; 10000806 v_mul_f32_e32 v1, s7, v4 ; 10020807 v_mul_f32_e32 v2, s12, v4 ; 1004080C v_mul_f32_e32 v4, s5, v4 ; 10080805 v_mac_f32_e32 v0, s2, v3 ; 3E000602 v_mac_f32_e32 v1, s3, v3 ; 3E020603 v_mac_f32_e32 v2, s4, v3 ; 3E040604 v_mac_f32_e32 v4, s1, v3 ; 3E080601 v_mac_f32_e32 v0, s14, v22 ; 3E002C0E v_mac_f32_e32 v1, s15, v22 ; 3E022C0F v_mac_f32_e32 v2, s16, v22 ; 3E042C10 v_mac_f32_e32 v4, s13, v22 ; 3E082C0D v_add_f32_e32 v0, s18, v0 ; 06000012 v_add_f32_e32 v1, s8, v1 ; 06020208 v_add_f32_e32 v2, s0, v2 ; 06040400 v_add_f32_e32 v3, s17, v4 ; 06060811 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_mad_f32 v1, 2.0, v1, -v2 ; D2820001 840A02F4 exp 15, 12, 0, 0, 0, v3, v0, v1, v2 ; F80000CF 02010003 exp 15, 13, 0, 1, 0, v6, v6, v6, v6 ; F80008DF 06060606 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 56 Code Size: 1996 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL IN[7], GENERIC[7], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL SVIEW[11], 2D, FLOAT DCL CONST[1][0..24] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 3.0000, 2.0000} IMM[1] UINT32 {0, 64, 48, 32} IMM[2] UINT32 {16, 224, 228, 0} IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[6].zwww 4: TEX TEMP[1], TEMP[1], SAMP[1], 2D 5: MOV TEMP[2].xy, IN[7].xyyy 6: TEX TEMP[2], TEMP[2], SAMP[2], 2D 7: MOV TEMP[3].xy, IN[7].zwww 8: TEX TEMP[3], TEMP[3], SAMP[3], 2D 9: ADD TEMP[4].x, TEMP[1].wwww, -IN[4].xxxx 10: MAX TEMP[4].x, IMM[0].xxxx, TEMP[4].xxxx 11: ADD TEMP[5].x, IN[3].xxxx, -TEMP[4].xxxx 12: ADD TEMP[6].x, IN[4].xxxx, TEMP[1].wwww 13: MIN TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 14: ADD TEMP[4].x, TEMP[6].xxxx, -TEMP[4].xxxx 15: RCP TEMP[4].x, TEMP[4].xxxx 16: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 17: MOV_SAT TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx 19: ADD TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx 20: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx 21: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 22: ADD TEMP[5].x, IMM[0].yyyy, -TEMP[4].xxxx 23: ADD TEMP[6].x, TEMP[2].wwww, -IN[4].yyyy 24: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx 25: ADD TEMP[7].x, IN[3].yyyy, -TEMP[6].xxxx 26: ADD TEMP[8].x, IN[4].yyyy, TEMP[2].wwww 27: MIN TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx 28: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx 29: RCP TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 31: MOV_SAT TEMP[6].x, TEMP[6].xxxx 32: MUL TEMP[7].x, IMM[0].wwww, TEMP[6].xxxx 33: ADD TEMP[7].x, IMM[0].zzzz, -TEMP[7].xxxx 34: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 35: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 36: MAX TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 37: MIN TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx 38: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[6].xxxx 39: ADD TEMP[7].x, TEMP[3].wwww, -IN[4].zzzz 40: MAX TEMP[7].x, IMM[0].xxxx, TEMP[7].xxxx 41: ADD TEMP[8].x, IN[3].zzzz, -TEMP[7].xxxx 42: ADD TEMP[9].x, IN[4].zzzz, TEMP[3].wwww 43: MIN TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 44: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 45: RCP TEMP[7].x, TEMP[7].xxxx 46: MUL TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx 47: MOV_SAT TEMP[7].x, TEMP[7].xxxx 48: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 49: ADD TEMP[8].x, IMM[0].zzzz, -TEMP[8].xxxx 50: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[8].xxxx 51: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 52: MAX TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx 53: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 54: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[7].xxxx 55: MUL TEMP[3].xyz, CONST[1][4].xyzz, TEMP[3].xyzz 56: MUL TEMP[2].xyz, CONST[1][3].xyzz, TEMP[2].xyzz 57: MUL TEMP[1].xyz, CONST[1][2].xyzz, TEMP[1].xyzz 58: MOV TEMP[8].xy, IN[6].xyyy 59: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 60: MUL TEMP[8].xyz, CONST[1][1].xyzz, TEMP[8].xyzz 61: MUL TEMP[8].xyz, TEMP[5].xxxx, TEMP[8].xyzz 62: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, TEMP[8].xyzz 63: MAD TEMP[1].xyz, TEMP[6].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 64: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[7].xxxx, TEMP[1].xyzz 65: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[2].xyzz 66: MOV TEMP[2].xy, IN[7].zwww 67: TEX TEMP[2].xyz, TEMP[2], SAMP[7], 2D 68: MOV TEMP[3].xy, IN[7].xyyy 69: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 70: MOV TEMP[8].xy, IN[6].zwww 71: TEX TEMP[8].xyz, TEMP[8], SAMP[5], 2D 72: MOV TEMP[9].xy, IN[6].xyyy 73: TEX TEMP[9].xyz, TEMP[9], SAMP[4], 2D 74: MUL TEMP[9].xyz, TEMP[5].xxxx, TEMP[9].xyzz 75: MAD TEMP[8].xyz, TEMP[4].xxxx, TEMP[8].xyzz, TEMP[9].xyzz 76: MAD TEMP[3].xyz, TEMP[6].xxxx, TEMP[3].xyzz, TEMP[8].xyzz 77: MAD TEMP[2].xyz, TEMP[7].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 78: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[5].yzxx 79: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[5].zxyy, -TEMP[3].xyzz 80: MOV TEMP[8].xy, IN[7].zwww 81: TEX TEMP[8], TEMP[8], SAMP[11], 2D 82: MOV TEMP[9].xy, IN[7].xyyy 83: TEX TEMP[9], TEMP[9], SAMP[10], 2D 84: MOV TEMP[10].xy, IN[6].xyyy 85: TEX TEMP[10], TEMP[10], SAMP[8], 2D 86: MOV TEMP[11].xy, IN[6].zwww 87: TEX TEMP[11], TEMP[11], SAMP[9], 2D 88: MUL TEMP[4], TEMP[4].xxxx, TEMP[11] 89: MAD TEMP[4], TEMP[5].xxxx, TEMP[10], TEMP[4] 90: MAD TEMP[4], TEMP[6].xxxx, TEMP[9], TEMP[4] 91: MAD TEMP[4].yw, TEMP[7].xxxx, TEMP[8], TEMP[4] 92: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].wwww, IMM[3].xxxx 93: MOV TEMP[5].x, TEMP[4].xxxx 94: MOV TEMP[5].y, -TEMP[4].yyyy 95: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 96: MOV TEMP[6].x, TEMP[5].xxxx 97: MOV TEMP[6].y, TEMP[5].yyyy 98: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 99: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 100: MOV_SAT TEMP[4].x, TEMP[4].xxxx 101: SQRT TEMP[4].x, TEMP[4].xxxx 102: MOV TEMP[6].z, TEMP[4].xxxx 103: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 104: RSQ TEMP[4].x, TEMP[4].xxxx 105: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 106: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[5].xyz, IN[5].xyzz, TEMP[5].xxxx 109: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 110: RSQ TEMP[6].x, TEMP[6].xxxx 111: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 112: MUL TEMP[3].xyz, IN[5].wwww, TEMP[3].xyzz 113: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 114: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 115: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 116: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 117: RSQ TEMP[3].x, TEMP[3].xxxx 118: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 119: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[3].yyyy, IMM[3].yyyy 120: MOV TEMP[3].w, IMM[0].xxxx 121: MOV TEMP[3].x, TEMP[0].xxxx 122: MOV TEMP[3].y, TEMP[0].yyyy 123: MOV TEMP[3].z, TEMP[0].zzzz 124: MOV TEMP[0].w, IMM[0].xxxx 125: MOV TEMP[0].x, TEMP[1].xxxx 126: MOV TEMP[0].y, TEMP[1].yyyy 127: MOV TEMP[0].z, TEMP[1].zzzz 128: MOV TEMP[1].w, IMM[0].xxxx 129: MUL TEMP[1].x, TEMP[2].xxxx, CONST[1][14].xxxx 130: MOV TEMP[1].y, TEMP[2].yyyy 131: MUL TEMP[2].x, TEMP[2].zzzz, CONST[1][14].yyyy 132: MOV TEMP[1].z, TEMP[2].xxxx 133: MOV OUT[2], IN[0].wwww 134: MOV OUT[0], TEMP[0] 135: MOV OUT[3], TEMP[3] 136: MOV OUT[1], TEMP[1] 137: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %39 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %40 = load <32 x i8>, <32 x i8> addrspace(2)* %39, align 32, !tbaa !0 %41 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = bitcast <8 x i32> addrspace(2)* %43 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = bitcast <4 x i32> addrspace(2)* %46 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %86 = bitcast <8 x i32> addrspace(2)* %85 to <32 x i8> addrspace(2)* %87 = load <32 x i8>, <32 x i8> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %89 = bitcast <4 x i32> addrspace(2)* %88 to <16 x i8> addrspace(2)* %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %92 = bitcast <8 x i32> addrspace(2)* %91 to <32 x i8> addrspace(2)* %93 = load <32 x i8>, <32 x i8> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %95 = bitcast <4 x i32> addrspace(2)* %94 to <16 x i8> addrspace(2)* %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %98 = bitcast <8 x i32> addrspace(2)* %97 to <32 x i8> addrspace(2)* %99 = load <32 x i8>, <32 x i8> addrspace(2)* %98, align 32, !tbaa !0 %100 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %101 = bitcast <4 x i32> addrspace(2)* %100 to <16 x i8> addrspace(2)* %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 11 %104 = bitcast <8 x i32> addrspace(2)* %103 to <32 x i8> addrspace(2)* %105 = load <32 x i8>, <32 x i8> addrspace(2)* %104, align 32, !tbaa !0 %106 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 11 %107 = bitcast <4 x i32> addrspace(2)* %106 to <16 x i8> addrspace(2)* %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %134 = fmul float %110, %110 %135 = fmul float %111, %111 %136 = fadd float %135, %134 %137 = fmul float %112, %112 %138 = fadd float %136, %137 %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) %140 = fmul float %110, %139 %141 = fmul float %111, %139 %142 = fmul float %112, %139 %143 = bitcast float %128 to i32 %144 = bitcast float %129 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %45, <16 x i8> %48, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = extractelement <4 x float> %147, i32 3 %152 = bitcast float %130 to i32 %153 = bitcast float %131 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %51, <16 x i8> %54, i32 2) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = bitcast float %132 to i32 %162 = bitcast float %133 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %57, <16 x i8> %60, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = extractelement <4 x float> %165, i32 1 %168 = extractelement <4 x float> %165, i32 2 %169 = extractelement <4 x float> %165, i32 3 %170 = fsub float %151, %119 %171 = call float @llvm.maxnum.f32(float %170, float 0.000000e+00) %172 = fsub float %116, %171 %173 = fadd float %119, %151 %174 = call float @llvm.minnum.f32(float %173, float 1.000000e+00) %175 = fsub float %174, %171 %176 = fdiv float 1.000000e+00, %175 %177 = fmul float %172, %176 %178 = call float @llvm.AMDIL.clamp.(float %177, float 0.000000e+00, float 1.000000e+00) %179 = fmul float %178, 2.000000e+00 %180 = fsub float 3.000000e+00, %179 %181 = fmul float %178, %180 %182 = fmul float %178, %181 %183 = fsub float 1.000000e+00, %182 %184 = fsub float %160, %120 %185 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00) %186 = fsub float %117, %185 %187 = fadd float %120, %160 %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) %189 = fsub float %188, %185 %190 = fdiv float 1.000000e+00, %189 %191 = fmul float %186, %190 %192 = call float @llvm.AMDIL.clamp.(float %191, float 0.000000e+00, float 1.000000e+00) %193 = fmul float %192, 2.000000e+00 %194 = fsub float 3.000000e+00, %193 %195 = fmul float %192, %194 %196 = fmul float %192, %195 %197 = call float @llvm.maxnum.f32(float %196, float 0.000000e+00) %198 = call float @llvm.minnum.f32(float %197, float %183) %199 = fsub float %183, %198 %200 = fsub float %169, %121 %201 = call float @llvm.maxnum.f32(float %200, float 0.000000e+00) %202 = fsub float %118, %201 %203 = fadd float %121, %169 %204 = call float @llvm.minnum.f32(float %203, float 1.000000e+00) %205 = fsub float %204, %201 %206 = fdiv float 1.000000e+00, %205 %207 = fmul float %202, %206 %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %209 = fmul float %208, 2.000000e+00 %210 = fsub float 3.000000e+00, %209 %211 = fmul float %208, %210 %212 = fmul float %208, %211 %213 = call float @llvm.maxnum.f32(float %212, float 0.000000e+00) %214 = call float @llvm.minnum.f32(float %213, float %199) %215 = fsub float %199, %214 %216 = fmul float %34, %166 %217 = fmul float %35, %167 %218 = fmul float %36, %168 %219 = fmul float %31, %157 %220 = fmul float %32, %158 %221 = fmul float %33, %159 %222 = fmul float %28, %148 %223 = fmul float %29, %149 %224 = fmul float %30, %150 %225 = bitcast float %126 to i32 %226 = bitcast float %127 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %40, <16 x i8> %42, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = fmul float %25, %230 %234 = fmul float %26, %231 %235 = fmul float %27, %232 %236 = fmul float %215, %233 %237 = fmul float %215, %234 %238 = fmul float %215, %235 %239 = fmul float %182, %222 %240 = fadd float %239, %236 %241 = fmul float %182, %223 %242 = fadd float %241, %237 %243 = fmul float %182, %224 %244 = fadd float %243, %238 %245 = fmul float %198, %219 %246 = fadd float %245, %240 %247 = fmul float %198, %220 %248 = fadd float %247, %242 %249 = fmul float %198, %221 %250 = fadd float %249, %244 %251 = fmul float %216, %214 %252 = fadd float %251, %246 %253 = fmul float %217, %214 %254 = fadd float %253, %248 %255 = fmul float %218, %214 %256 = fadd float %255, %250 %257 = fmul float %252, %113 %258 = fmul float %254, %114 %259 = fmul float %256, %115 %260 = bitcast float %132 to i32 %261 = bitcast float %133 to i32 %262 = insertelement <2 x i32> undef, i32 %260, i32 0 %263 = insertelement <2 x i32> %262, i32 %261, i32 1 %264 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %263, <32 x i8> %81, <16 x i8> %84, i32 2) %265 = extractelement <4 x float> %264, i32 0 %266 = extractelement <4 x float> %264, i32 1 %267 = extractelement <4 x float> %264, i32 2 %268 = bitcast float %130 to i32 %269 = bitcast float %131 to i32 %270 = insertelement <2 x i32> undef, i32 %268, i32 0 %271 = insertelement <2 x i32> %270, i32 %269, i32 1 %272 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %271, <32 x i8> %75, <16 x i8> %78, i32 2) %273 = extractelement <4 x float> %272, i32 0 %274 = extractelement <4 x float> %272, i32 1 %275 = extractelement <4 x float> %272, i32 2 %276 = bitcast float %128 to i32 %277 = bitcast float %129 to i32 %278 = insertelement <2 x i32> undef, i32 %276, i32 0 %279 = insertelement <2 x i32> %278, i32 %277, i32 1 %280 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %279, <32 x i8> %69, <16 x i8> %72, i32 2) %281 = extractelement <4 x float> %280, i32 0 %282 = extractelement <4 x float> %280, i32 1 %283 = extractelement <4 x float> %280, i32 2 %284 = bitcast float %126 to i32 %285 = bitcast float %127 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %63, <16 x i8> %66, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = extractelement <4 x float> %288, i32 2 %292 = fmul float %215, %289 %293 = fmul float %215, %290 %294 = fmul float %215, %291 %295 = fmul float %182, %281 %296 = fadd float %295, %292 %297 = fmul float %182, %282 %298 = fadd float %297, %293 %299 = fmul float %182, %283 %300 = fadd float %299, %294 %301 = fmul float %198, %273 %302 = fadd float %301, %296 %303 = fmul float %198, %274 %304 = fadd float %303, %298 %305 = fmul float %198, %275 %306 = fadd float %305, %300 %307 = fmul float %214, %265 %308 = fadd float %307, %302 %309 = fmul float %214, %266 %310 = fadd float %309, %304 %311 = fmul float %214, %267 %312 = fadd float %311, %306 %313 = fmul float %142, %123 %314 = fmul float %140, %124 %315 = fmul float %141, %122 %316 = fmul float %141, %124 %317 = fsub float %316, %313 %318 = fmul float %142, %122 %319 = fsub float %318, %314 %320 = fmul float %140, %123 %321 = fsub float %320, %315 %322 = bitcast float %132 to i32 %323 = bitcast float %133 to i32 %324 = insertelement <2 x i32> undef, i32 %322, i32 0 %325 = insertelement <2 x i32> %324, i32 %323, i32 1 %326 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %325, <32 x i8> %105, <16 x i8> %108, i32 2) %327 = extractelement <4 x float> %326, i32 1 %328 = extractelement <4 x float> %326, i32 3 %329 = bitcast float %130 to i32 %330 = bitcast float %131 to i32 %331 = insertelement <2 x i32> undef, i32 %329, i32 0 %332 = insertelement <2 x i32> %331, i32 %330, i32 1 %333 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %332, <32 x i8> %99, <16 x i8> %102, i32 2) %334 = extractelement <4 x float> %333, i32 1 %335 = extractelement <4 x float> %333, i32 3 %336 = bitcast float %126 to i32 %337 = bitcast float %127 to i32 %338 = insertelement <2 x i32> undef, i32 %336, i32 0 %339 = insertelement <2 x i32> %338, i32 %337, i32 1 %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %339, <32 x i8> %87, <16 x i8> %90, i32 2) %341 = extractelement <4 x float> %340, i32 1 %342 = extractelement <4 x float> %340, i32 3 %343 = bitcast float %128 to i32 %344 = bitcast float %129 to i32 %345 = insertelement <2 x i32> undef, i32 %343, i32 0 %346 = insertelement <2 x i32> %345, i32 %344, i32 1 %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %346, <32 x i8> %93, <16 x i8> %96, i32 2) %348 = extractelement <4 x float> %347, i32 1 %349 = extractelement <4 x float> %347, i32 3 %350 = fmul float %182, %348 %351 = fmul float %182, %349 %352 = fmul float %215, %341 %353 = fadd float %352, %350 %354 = fmul float %215, %342 %355 = fadd float %354, %351 %356 = fmul float %198, %334 %357 = fadd float %356, %353 %358 = fmul float %198, %335 %359 = fadd float %358, %355 %360 = fmul float %214, %327 %361 = fadd float %360, %357 %362 = fmul float %214, %328 %363 = fadd float %362, %359 %364 = fmul float %363, 2.000000e+00 %365 = fadd float %364, -1.000000e+00 %366 = fmul float %361, 2.000000e+00 %367 = fadd float %366, -1.000000e+00 %368 = fmul float %365, %24 %369 = fmul float %367, %24 %370 = fmul float %365, %365 %371 = fmul float %367, %367 %372 = fadd float %370, %371 %373 = fsub float 1.000000e+00, %372 %374 = call float @llvm.AMDIL.clamp.(float %373, float 0.000000e+00, float 1.000000e+00) %375 = call float @llvm.sqrt.f32(float %374) %376 = fmul float %368, %368 %377 = fmul float %369, %369 %378 = fadd float %377, %376 %379 = fmul float %375, %375 %380 = fadd float %378, %379 %381 = call float @llvm.AMDGPU.rsq.clamped.f32(float %380) %382 = fmul float %368, %381 %383 = fmul float %369, %381 %384 = fsub float -0.000000e+00, %383 %385 = fmul float %375, %381 %386 = fmul float %122, %122 %387 = fmul float %123, %123 %388 = fadd float %387, %386 %389 = fmul float %124, %124 %390 = fadd float %388, %389 %391 = call float @llvm.AMDGPU.rsq.clamped.f32(float %390) %392 = fmul float %122, %391 %393 = fmul float %123, %391 %394 = fmul float %124, %391 %395 = fmul float %317, %317 %396 = fmul float %319, %319 %397 = fadd float %396, %395 %398 = fmul float %321, %321 %399 = fadd float %397, %398 %400 = call float @llvm.AMDGPU.rsq.clamped.f32(float %399) %401 = fmul float %317, %400 %402 = fmul float %319, %400 %403 = fmul float %321, %400 %404 = fmul float %125, %401 %405 = fmul float %125, %402 %406 = fmul float %125, %403 %407 = fmul float %404, %384 %408 = fmul float %405, %384 %409 = fmul float %406, %384 %410 = fmul float %382, %392 %411 = fadd float %410, %407 %412 = fmul float %382, %393 %413 = fadd float %412, %408 %414 = fmul float %382, %394 %415 = fadd float %414, %409 %416 = fmul float %140, %385 %417 = fadd float %416, %411 %418 = fmul float %141, %385 %419 = fadd float %418, %413 %420 = fmul float %142, %385 %421 = fadd float %420, %415 %422 = fmul float %417, %417 %423 = fmul float %419, %419 %424 = fadd float %423, %422 %425 = fmul float %421, %421 %426 = fadd float %424, %425 %427 = call float @llvm.AMDGPU.rsq.clamped.f32(float %426) %428 = fmul float %417, %427 %429 = fmul float %419, %427 %430 = fmul float %421, %427 %431 = fmul float %428, 5.000000e-01 %432 = fadd float %431, 5.000000e-01 %433 = fmul float %429, 5.000000e-01 %434 = fadd float %433, 5.000000e-01 %435 = fmul float %430, 5.000000e-01 %436 = fadd float %435, 5.000000e-01 %437 = fmul float %308, %37 %438 = fmul float %312, %38 %439 = call i32 @llvm.SI.packf16(float %257, float %258) %440 = bitcast i32 %439 to float %441 = call i32 @llvm.SI.packf16(float %259, float 0.000000e+00) %442 = bitcast i32 %441 to float %443 = call i32 @llvm.SI.packf16(float %437, float %310) %444 = bitcast i32 %443 to float %445 = call i32 @llvm.SI.packf16(float %438, float 0.000000e+00) %446 = bitcast i32 %445 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %440, float %442, float %440, float %442) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %444, float %446, float %444, float %446) %447 = call i32 @llvm.SI.packf16(float %432, float %434) %448 = bitcast i32 %447 to float %449 = call i32 @llvm.SI.packf16(float %436, float 0.000000e+00) %450 = bitcast i32 %449 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %109, float %109, float %109, float %109) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %448, float %450, float %448, float %450) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 v_interp_p1_f32 v12, v0, 0, 4, [m0] ; C8301000 v_interp_p2_f32 v12, [v12], v1, 0, 4, [m0] ; C8311001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 v_interp_p1_f32 v14, v0, 2, 4, [m0] ; C8381200 v_interp_p2_f32 v14, [v14], v1, 2, 4, [m0] ; C8391201 v_interp_p1_f32 v15, v0, 0, 5, [m0] ; C83C1400 v_interp_p2_f32 v15, [v15], v1, 0, 5, [m0] ; C83D1401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v17, v0, 2, 5, [m0] ; C8441600 v_interp_p2_f32 v17, [v17], v1, 2, 5, [m0] ; C8451601 v_interp_p1_f32 v18, v0, 3, 5, [m0] ; C8481700 v_interp_p2_f32 v18, [v18], v1, 3, 5, [m0] ; C8491701 v_interp_p1_f32 v19, v0, 0, 6, [m0] ; C84C1800 v_interp_p2_f32 v19, [v19], v1, 0, 6, [m0] ; C84D1801 v_interp_p1_f32 v20, v0, 1, 6, [m0] ; C8501900 v_interp_p2_f32 v20, [v20], v1, 1, 6, [m0] ; C8511901 v_interp_p1_f32 v21, v0, 2, 6, [m0] ; C8541A00 v_interp_p2_f32 v21, [v21], v1, 2, 6, [m0] ; C8551A01 v_interp_p1_f32 v22, v0, 3, 6, [m0] ; C8581B00 v_interp_p2_f32 v22, [v22], v1, 3, 6, [m0] ; C8591B01 v_interp_p1_f32 v23, v0, 0, 7, [m0] ; C85C1C00 v_interp_p2_f32 v23, [v23], v1, 0, 7, [m0] ; C85D1C01 v_interp_p1_f32 v24, v0, 1, 7, [m0] ; C8601D00 v_interp_p2_f32 v24, [v24], v1, 1, 7, [m0] ; C8611D01 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710 v_interp_p1_f32 v25, v0, 2, 7, [m0] ; C8641E00 s_load_dwordx8 s[60:67], s[6:7], 0x18 ; C0DE0718 v_interp_p2_f32 v25, [v25], v1, 2, 7, [m0] ; C8651E01 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_interp_p1_f32 v26, v0, 3, 7, [m0] ; C8681F00 v_interp_p2_f32 v26, [v26], v1, 3, 7, [m0] ; C8691F01 s_load_dwordx4 s[8:11], s[4:5], 0x10 ; C0840510 s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[44:51], s[32:35] ; F0800F00 010B1B15 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[52:59], s[36:39] ; F0800F00 012D1F17 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[60:67], s[40:43] ; F0800F00 014F2319 s_load_dwordx4 s[44:47], s[4:5], 0x14 ; C0960514 s_load_dwordx4 s[48:51], s[4:5], 0x1c ; C098051C image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[24:31], s[20:23] ; F0800700 00A62713 s_load_dwordx8 s[64:71], s[6:7], 0x30 ; C0E00730 s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738 s_load_dwordx4 s[80:83], s[4:5], 0x18 ; C0A80518 s_load_dwordx8 s[84:91], s[6:7], 0x28 ; C0EA0728 s_load_dwordx4 s[52:55], s[4:5], 0x2c ; C09A052C s_load_dwordx8 s[56:63], s[6:7], 0x58 ; C0DC0758 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[32:35], s[4:5], 0x20 ; C0900520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[36:43], s[6:7], 0x40 ; C0D20740 s_load_dwordx8 s[24:31], s[6:7], 0x48 ; C0CC0748 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[72:79], s[48:51] ; F0800700 01922A19 image_sample v[45:47], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[64:71], s[80:83] ; F0800700 02902D17 image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[84:91], s[44:47] ; F0800700 01753015 image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800700 00433313 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[56:63], s[52:55] ; F0800A00 01AE0019 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 v_subrev_f32_e32 v25, v14, v38 ; 0A324D0E v_add_f32_e32 v14, v38, v14 ; 061C1D26 s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x38 ; C2070138 s_buffer_load_dword s15, s[0:3], 0x39 ; C2078139 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v26, s8, v35 ; 10344608 v_mul_f32_e32 v35, s9, v36 ; 10464809 v_mul_f32_e32 v36, s10, v37 ; 10484A0A s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A v_subrev_f32_e32 v37, v13, v34 ; 0A4A450D v_add_f32_e32 v13, v34, v13 ; 061A1B22 v_mul_f32_e32 v31, s11, v31 ; 103E3E0B v_mul_f32_e32 v32, s12, v32 ; 1040400C v_mul_f32_e32 v33, s13, v33 ; 1042420D v_subrev_f32_e32 v34, v12, v30 ; 0A443D0C v_add_f32_e32 v12, v30, v12 ; 0618191E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v27, s8, v27 ; 10363608 v_mul_f32_e32 v28, s9, v28 ; 10383809 v_mul_f32_e32 v29, s10, v29 ; 103A3A0A v_max_f32_e32 v30, 0, v34 ; 203C4480 v_min_f32_e32 v12, 1.0, v12 ; 1E1818F2 v_subrev_f32_e32 v12, v30, v12 ; 0A18191E v_rcp_f32_e32 v12, v12 ; 7E18550C v_max_f32_e32 v34, 0, v37 ; 20444A80 v_min_f32_e32 v13, 1.0, v13 ; 1E1A1AF2 v_subrev_f32_e32 v13, v34, v13 ; 0A1A1B22 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_subrev_f32_e32 v9, v30, v9 ; 0A12131E v_mul_f32_e32 v9, v12, v9 ; 1012130C v_subrev_f32_e32 v10, v34, v10 ; 0A141522 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_max_f32_e32 v12, 0, v25 ; 20183280 v_min_f32_e32 v13, 1.0, v14 ; 1E1A1CF2 v_subrev_f32_e32 v13, v12, v13 ; 0A1A1B0C v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 v_mad_f32 v25, -2.0, v10, v14 ; D2820019 043A14F5 v_mul_f32_e32 v25, v25, v10 ; 10321519 v_mul_f32_e32 v10, v25, v10 ; 10141519 v_subrev_f32_e32 v11, v12, v11 ; 0A16170C v_mul_f32_e32 v11, v13, v11 ; 1016170D v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mad_f32 v12, -2.0, v9, v14 ; D282000C 043A12F5 v_mac_f32_e32 v14, -2.0, v11 ; 3E1C16F5 v_mul_f32_e32 v13, v14, v11 ; 101A170E v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mul_f32_e32 v12, v12, v9 ; 1018130C v_max_f32_e32 v10, 0, v10 ; 20141480 v_mad_f32 v13, -v9, v12, 1.0 ; D282000D 23CA1909 v_min_f32_e32 v10, v13, v10 ; 1E14150D v_subrev_f32_e32 v13, v10, v13 ; 0A1A1B0A v_max_f32_e32 v11, 0, v11 ; 20161680 v_min_f32_e32 v11, v13, v11 ; 1E16170D v_subrev_f32_e32 v13, v11, v13 ; 0A1A1B0B s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 v_mul_f32_e32 v14, v51, v13 ; 101C1B33 v_mul_f32_e32 v25, v52, v13 ; 10321B34 v_mul_f32_e32 v30, v53, v13 ; 103C1B35 v_mul_f32_e32 v9, v12, v9 ; 1012130C v_mac_f32_e32 v14, v48, v9 ; 3E1C1330 v_mac_f32_e32 v25, v49, v9 ; 3E321331 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_load_dwordx4 s[16:19], s[4:5], 0x28 ; C0880528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s8, v39 ; 10184E08 s_load_dwordx8 s[44:51], s[6:7], 0x50 ; C0D60750 v_mac_f32_e32 v30, v50, v9 ; 3E3C1332 v_mac_f32_e32 v14, v45, v10 ; 3E1C152D v_mac_f32_e32 v25, v46, v10 ; 3E32152E v_mac_f32_e32 v30, v47, v10 ; 3E3C152F v_mac_f32_e32 v14, v42, v11 ; 3E1C172A v_mac_f32_e32 v25, v43, v11 ; 3E32172B v_mac_f32_e32 v30, v44, v11 ; 3E3C172C v_mul_f32_e32 v34, s9, v40 ; 10445009 v_mul_f32_e32 v37, s10, v41 ; 104A520A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:24], 10, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[44:51], s[16:19] ; F0800A00 008B1717 image_sample v[19:20], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[36:43], s[32:35] ; F0800A00 01091313 image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[24:31], s[20:23] ; F0800A00 00A61515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v9 ; 102A1315 v_mul_f32_e32 v22, v22, v9 ; 102C1316 v_mac_f32_e32 v21, v19, v13 ; 3E2A1B13 v_mac_f32_e32 v22, v20, v13 ; 3E2C1B14 v_mul_f32_e32 v12, v12, v13 ; 10181B0C v_mul_f32_e32 v19, v34, v13 ; 10261B22 v_mul_f32_e32 v13, v37, v13 ; 101A1B25 v_mac_f32_e32 v12, v27, v9 ; 3E18131B v_mac_f32_e32 v19, v28, v9 ; 3E26131C v_mac_f32_e32 v13, v29, v9 ; 3E1A131D v_mac_f32_e32 v21, v23, v10 ; 3E2A1517 v_mac_f32_e32 v22, v24, v10 ; 3E2C1518 v_mac_f32_e32 v12, v31, v10 ; 3E18151F v_mac_f32_e32 v19, v32, v10 ; 3E261520 v_mac_f32_e32 v13, v33, v10 ; 3E1A1521 v_mac_f32_e32 v21, v0, v11 ; 3E2A1700 v_mac_f32_e32 v22, v1, v11 ; 3E2C1701 v_mac_f32_e32 v12, v11, v26 ; 3E18350B v_mul_f32_e32 v0, v3, v3 ; 10000703 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mac_f32_e32 v19, v11, v35 ; 3E26470B s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_mac_f32_e32 v13, v11, v36 ; 3E1A490B v_mul_f32_e32 v1, v0, v3 ; 10020700 v_mul_f32_e32 v3, v0, v4 ; 10060900 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v4, v16, v0 ; 10080110 v_mad_f32 v4, v3, v17, -v4 ; D2820004 84122303 v_mul_f32_e32 v5, v17, v1 ; 100A0311 v_mad_f32 v5, v0, v15, -v5 ; D2820005 84161F00 v_mul_f32_e32 v9, v15, v3 ; 1012070F v_mad_f32 v9, v1, v16, -v9 ; D2820009 84262101 v_mul_f32_e32 v10, v15, v15 ; 10141F0F v_mac_f32_e32 v10, v16, v16 ; 3E142110 v_mac_f32_e32 v10, v17, v17 ; 3E142311 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mac_f32_e32 v11, v5, v5 ; 3E160B05 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v15, v10, v15 ; 101E1F0A v_mul_f32_e32 v16, v10, v16 ; 1020210A v_mul_f32_e32 v10, v10, v17 ; 1014230A v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mad_f32 v11, 2.0, v22, -1.0 ; D282000B 03CE2CF4 v_mad_f32 v17, 2.0, v21, -1.0 ; D2820011 03CE2AF4 v_mad_f32 v20, -v17, v17, 1.0 ; D2820014 23CA2311 v_mad_f32 v20, -v11, v11, v20 ; D2820014 2452170B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s0, v11 ; 10161600 v_mul_f32_e32 v17, s0, v17 ; 10222200 v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 v_sqrt_f32_e32 v20, v20 ; 7E286714 v_mul_f32_e32 v21, v11, v11 ; 102A170B v_mac_f32_e32 v21, v17, v17 ; 3E2A2311 v_mac_f32_e32 v21, v20, v20 ; 3E2A2914 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v4, v4, v18 ; 10082504 v_mul_f32_e32 v5, v5, v18 ; 100A2505 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mul_f32_e32 v17, v21, v17 ; 10222315 v_mul_f32_e32 v4, v17, v4 ; 10080911 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v9, v17, v9 ; 10121311 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mad_f32 v4, v11, v15, -v4 ; D2820004 84121F0B v_mad_f32 v5, v11, v16, -v5 ; D2820005 8416210B v_mad_f32 v9, v11, v10, -v9 ; D2820009 8426150B v_mul_f32_e32 v10, s14, v14 ; 10141C0E v_mul_f32_e32 v11, s15, v30 ; 10163C0F v_mul_f32_e32 v14, v21, v20 ; 101C2915 v_mac_f32_e32 v4, v14, v1 ; 3E08030E v_mac_f32_e32 v5, v14, v3 ; 3E0A070E v_mac_f32_e32 v9, v14, v0 ; 3E12010E v_mul_f32_e32 v0, v6, v12 ; 10001906 v_mul_f32_e32 v1, v7, v19 ; 10022707 v_mul_f32_e32 v3, v8, v13 ; 10061B08 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v3, 0 ; D25E0001 00010103 exp 15, 0, 1, 0, 0, v0, v1, v0, v1 ; F800040F 01000100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v4 ; 10000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_mac_f32_e32 v0, v9, v9 ; 3E001309 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_cvt_pkrtz_f16_f32_e32 v1, v10, v25 ; 5E02330A v_cvt_pkrtz_f16_f32_e64 v3, v11, 0 ; D25E0003 0001010B exp 15, 1, 1, 0, 0, v1, v3, v1, v3 ; F800041F 03010301 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v0, v4 ; 10020900 v_mul_f32_e32 v3, v0, v5 ; 10060B00 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 exp 15, 2, 0, 0, 0, v2, v2, v2, v2 ; F800002F 02020202 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 56 Code Size: 1288 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 12, 28, 44} IMM[5] UINT32 {60, 24, 32, 16} IMM[6] UINT32 {48, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4].xy, TEMP[4], IMM[2].yyyy 23: MOV TEMP[7].x, TEMP[4].xxxx 24: MOV TEMP[7].y, TEMP[4].yyyy 25: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 26: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 27: MOV TEMP[7].z, TEMP[4].xxxx 28: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 31: MUL TEMP[6], TEMP[6], IMM[2].zzzz 32: ADD TEMP[6].xy, IMM[0].wwww, -TEMP[6] 33: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 34: MOV TEMP[7].w, IMM[0].xxxx 35: MOV TEMP[7].x, TEMP[6].xxxx 36: MOV TEMP[7].y, TEMP[6].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 38: ADD TEMP[5].x, IMM[0].wwww, -TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 40: MOV TEMP[7].z, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[7], TEMP[0] 42: DP4 TEMP[5].x, TEMP[7], TEMP[2] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[5].x, TEMP[7], TEMP[3] 45: MOV TEMP[4].z, TEMP[5].xxxx 46: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 49: MOV TEMP[5].w, IMM[0].wwww 50: MOV TEMP[5].x, IN[0].xxxx 51: MOV TEMP[5].y, IN[0].yyyy 52: MOV TEMP[5].z, IN[0].zzzz 53: DP4 TEMP[0].x, TEMP[5], TEMP[0] 54: DP4 TEMP[2].x, TEMP[5], TEMP[2] 55: DP4 TEMP[3].x, TEMP[5], TEMP[3] 56: MOV TEMP[5].x, TEMP[0].xxxx 57: MOV TEMP[5].y, TEMP[2].xxxx 58: MOV TEMP[5].z, TEMP[3].xxxx 59: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 60: MOV TEMP[6].x, TEMP[0].xxxx 61: MOV TEMP[6].y, TEMP[2].xxxx 62: MOV TEMP[6].z, TEMP[3].xxxx 63: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 64: MOV TEMP[6].w, TEMP[7].xxxx 65: MOV TEMP[7].x, TEMP[4].xxxx 66: MOV TEMP[7].y, TEMP[4].yyyy 67: MOV TEMP[7].z, TEMP[4].zzzz 68: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 69: RSQ TEMP[8].x, TEMP[8].xxxx 70: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 71: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 72: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 73: MUL TEMP[4].xyz, IMM[2].zzzz, TEMP[4].xyzz 74: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 75: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 76: MOV_SAT TEMP[4].x, TEMP[4].xxxx 77: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 78: MOV_SAT TEMP[4].x, TEMP[4].xxxx 79: MOV TEMP[7].w, TEMP[4].xxxx 80: MOV TEMP[4].w, IMM[0].wwww 81: MOV TEMP[4].x, TEMP[0].xxxx 82: MOV TEMP[4].y, TEMP[2].xxxx 83: MOV TEMP[4].z, TEMP[3].xxxx 84: MOV TEMP[0].x, CONST[4][0].wwww 85: MOV TEMP[0].y, CONST[4][1].wwww 86: MOV TEMP[0].z, CONST[4][2].wwww 87: MOV TEMP[0].w, CONST[4][3].wwww 88: DP4 TEMP[0].x, TEMP[4], TEMP[0] 89: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 90: MOV TEMP[3].xy, IN[3].xyyy 91: MOV TEMP[3].w, IMM[0].xxxx 92: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 93: MOV TEMP[5].x, CONST[4][0].xxxx 94: MOV TEMP[5].y, CONST[4][1].xxxx 95: MOV TEMP[5].z, CONST[4][2].xxxx 96: MOV TEMP[5].w, CONST[4][3].xxxx 97: DP4 TEMP[5].x, TEMP[4], TEMP[5] 98: MOV TEMP[8].x, CONST[4][0].yyyy 99: MOV TEMP[8].y, CONST[4][1].yyyy 100: MOV TEMP[8].z, CONST[4][2].yyyy 101: MOV TEMP[8].w, CONST[4][3].yyyy 102: DP4 TEMP[8].x, TEMP[4], TEMP[8] 103: MOV TEMP[5].y, -TEMP[8].xxxx 104: MOV TEMP[8].x, CONST[4][0].zzzz 105: MOV TEMP[8].y, CONST[4][1].zzzz 106: MOV TEMP[8].z, CONST[4][2].zzzz 107: MOV TEMP[8].w, CONST[4][3].zzzz 108: DP4 TEMP[4].x, TEMP[4], TEMP[8] 109: MAD TEMP[4].x, IMM[2].zzzz, TEMP[4].xxxx, -TEMP[0].xxxx 110: MOV TEMP[5].z, TEMP[4].xxxx 111: MOV TEMP[5].w, TEMP[0].xxxx 112: MOV OUT[1], TEMP[1] 113: MOV OUT[2].xy, TEMP[2].xyxx 114: MOV OUT[4], TEMP[7] 115: MOV OUT[6], IMM[0].xxxx 116: MOV OUT[5], TEMP[3] 117: MOV OUT[0], TEMP[5] 118: MOV OUT[3], TEMP[6] 119: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.load.const(<16 x i8> %42, i32 0) %44 = call float @llvm.SI.load.const(<16 x i8> %42, i32 4) %45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 8) %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %10, %6 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <4 x i32> undef, i32 %77, i32 0 %80 = insertelement <4 x i32> %79, i32 %78, i32 1 %81 = insertelement <4 x i32> %80, i32 0, i32 2 %82 = bitcast <8 x i32> %47 to <32 x i8> %83 = bitcast <4 x i32> %49 to <16 x i8> %84 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %81, <32 x i8> %82, <16 x i8> %83, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = bitcast float %75 to i32 %90 = bitcast float %76 to i32 %91 = insertelement <4 x i32> , i32 %89, i32 1 %92 = insertelement <4 x i32> %91, i32 %90, i32 2 %93 = insertelement <4 x i32> %92, i32 0, i32 3 %94 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %93, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = bitcast float %75 to i32 %100 = bitcast float %76 to i32 %101 = insertelement <4 x i32> , i32 %99, i32 1 %102 = insertelement <4 x i32> %101, i32 %100, i32 2 %103 = insertelement <4 x i32> %102, i32 0, i32 3 %104 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %103, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = extractelement <4 x float> %104, i32 2 %108 = extractelement <4 x float> %104, i32 3 %109 = fmul float %61, 2.550000e+02 %110 = fadd float %109, -1.280000e+02 %111 = fmul float %62, 2.550000e+02 %112 = fadd float %111, -1.280000e+02 %113 = fmul float %63, 2.550000e+02 %114 = fadd float %113, -1.280000e+02 %115 = fmul float %64, 2.550000e+02 %116 = fadd float %115, -1.280000e+02 %117 = fcmp olt float %110, 0.000000e+00 %118 = fcmp olt float %112, 0.000000e+00 %119 = fcmp olt float %114, 0.000000e+00 %120 = fcmp olt float %116, 0.000000e+00 %121 = select i1 %117, float 1.000000e+00, float 0.000000e+00 %122 = call float @fabs(float %110) %123 = call float @fabs(float %112) %124 = call float @fabs(float %114) %125 = call float @fabs(float %116) %126 = fsub float %122, %121 %127 = select i1 %118, float -1.000000e+00, float -0.000000e+00 %128 = fadd float %123, %127 %129 = select i1 %119, float -1.000000e+00, float -0.000000e+00 %130 = fadd float %124, %129 %131 = select i1 %120, float -1.000000e+00, float -0.000000e+00 %132 = fadd float %125, %131 %133 = fadd float %126, -6.400000e+01 %134 = fadd float %128, -6.400000e+01 %135 = fadd float %130, -6.400000e+01 %136 = fadd float %132, -6.400000e+01 %137 = fcmp olt float %133, 0.000000e+00 %138 = fcmp olt float %134, 0.000000e+00 %139 = select i1 %137, float 1.000000e+00, float 0.000000e+00 %140 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %141 = call float @fabs(float %133) %142 = call float @fabs(float %134) %143 = call float @fabs(float %135) %144 = call float @fabs(float %136) %145 = fsub float %141, %139 %146 = fsub float %142, %140 %147 = fmul float %145, 0x3F90410420000000 %148 = fmul float %146, 0x3F90410420000000 %149 = fsub float 1.000000e+00, %147 %150 = fsub float %149, %148 %151 = fmul float %147, %147 %152 = fmul float %148, %148 %153 = fadd float %152, %151 %154 = fmul float %150, %150 %155 = fadd float %153, %154 %156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155) %157 = fmul float %147, %156 %158 = fmul float %148, %156 %159 = fmul float %150, %156 %160 = fmul float %139, 2.000000e+00 %161 = fmul float %140, 2.000000e+00 %162 = fsub float 1.000000e+00, %160 %163 = fsub float 1.000000e+00, %161 %164 = fmul float %157, %162 %165 = fmul float %158, %163 %166 = fmul float %121, 2.000000e+00 %167 = fsub float 1.000000e+00, %166 %168 = fmul float %167, %159 %169 = fmul float %164, %85 %170 = fmul float %165, %86 %171 = fadd float %169, %170 %172 = fmul float %168, %87 %173 = fadd float %171, %172 %174 = fmul float %88, 0.000000e+00 %175 = fadd float %173, %174 %176 = fmul float %164, %95 %177 = fmul float %165, %96 %178 = fadd float %176, %177 %179 = fmul float %168, %97 %180 = fadd float %178, %179 %181 = fmul float %98, 0.000000e+00 %182 = fadd float %180, %181 %183 = fmul float %164, %105 %184 = fmul float %165, %106 %185 = fadd float %183, %184 %186 = fmul float %168, %107 %187 = fadd float %185, %186 %188 = fmul float %108, 0.000000e+00 %189 = fadd float %187, %188 %190 = fmul float %175, %175 %191 = fmul float %182, %182 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %175, %195 %197 = fmul float %182, %195 %198 = fmul float %189, %195 %199 = fmul float %54, %85 %200 = fmul float %55, %86 %201 = fadd float %199, %200 %202 = fmul float %56, %87 %203 = fadd float %201, %202 %204 = fadd float %203, %88 %205 = fmul float %54, %95 %206 = fmul float %55, %96 %207 = fadd float %205, %206 %208 = fmul float %56, %97 %209 = fadd float %207, %208 %210 = fadd float %209, %98 %211 = fmul float %54, %105 %212 = fmul float %55, %106 %213 = fadd float %211, %212 %214 = fmul float %56, %107 %215 = fadd float %213, %214 %216 = fadd float %215, %108 %217 = fsub float %204, %35 %218 = fsub float %210, %36 %219 = fsub float %216, %37 %220 = fmul float %38, %217 %221 = fmul float %39, %218 %222 = fadd float %221, %220 %223 = fmul float %40, %219 %224 = fadd float %222, %223 %225 = fmul float %217, %217 %226 = fmul float %218, %218 %227 = fadd float %226, %225 %228 = fmul float %219, %219 %229 = fadd float %227, %228 %230 = call float @llvm.AMDGPU.rsq.clamped.f32(float %229) %231 = fmul float %217, %230 %232 = fmul float %218, %230 %233 = fmul float %219, %230 %234 = fmul float %196, %43 %235 = fmul float %197, %44 %236 = fadd float %235, %234 %237 = fmul float %198, %45 %238 = fadd float %236, %237 %239 = fmul float %238, %196 %240 = fmul float %238, %197 %241 = fmul float %238, %198 %242 = fmul float %239, 2.000000e+00 %243 = fmul float %240, 2.000000e+00 %244 = fmul float %241, 2.000000e+00 %245 = fsub float %43, %242 %246 = fsub float %44, %243 %247 = fsub float %45, %244 %248 = fmul float %231, %245 %249 = fsub float -0.000000e+00, %248 %250 = fmul float %232, %246 %251 = fsub float %249, %250 %252 = fmul float %233, %247 %253 = fsub float %251, %252 %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00) %255 = call float @llvm.pow.f32(float %254, float 1.600000e+01) %256 = call float @llvm.AMDIL.clamp.(float %255, float 0.000000e+00, float 1.000000e+00) %257 = fmul float %204, %22 %258 = fmul float %210, %26 %259 = fadd float %257, %258 %260 = fmul float %216, %30 %261 = fadd float %259, %260 %262 = fadd float %261, %34 %263 = fmul float %69, %13 %264 = fadd float %263, %15 %265 = fmul float %70, %14 %266 = fadd float %265, %16 %267 = bitcast float %75 to i32 %268 = bitcast float %76 to i32 %269 = insertelement <4 x i32> , i32 %267, i32 1 %270 = insertelement <4 x i32> %269, i32 %268, i32 2 %271 = insertelement <4 x i32> %270, i32 0, i32 3 %272 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %271, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %273 = extractelement <4 x float> %272, i32 0 %274 = extractelement <4 x float> %272, i32 1 %275 = extractelement <4 x float> %272, i32 2 %276 = extractelement <4 x float> %272, i32 3 %277 = fmul float %204, %19 %278 = fmul float %210, %23 %279 = fadd float %277, %278 %280 = fmul float %216, %27 %281 = fadd float %279, %280 %282 = fadd float %281, %31 %283 = fmul float %204, %20 %284 = fmul float %210, %24 %285 = fadd float %283, %284 %286 = fmul float %216, %28 %287 = fadd float %285, %286 %288 = fadd float %287, %32 %289 = fsub float -0.000000e+00, %288 %290 = fmul float %204, %21 %291 = fmul float %210, %25 %292 = fadd float %290, %291 %293 = fmul float %216, %29 %294 = fadd float %292, %293 %295 = fadd float %294, %33 %296 = fmul float %295, 2.000000e+00 %297 = fsub float %296, %262 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %264, float %266, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %210, float %216, float %224) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %256) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %273, float %274, float %275, float %276) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %282, float %289, float %297, float %262) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 buffer_load_format_xyzw v[18:21], v3, s[8:11], 0 idxen ; E00C2000 80021203 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v20, 0 ; 7E280280 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s22, s[44:47], 0x4d ; C20B2D4D s_buffer_load_dword s23, s[44:47], 0x4e ; C20BAD4E s_buffer_load_dword s9, s[44:47], 0x50 ; C204AD50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s25, s[40:43], 0x6 ; C20CA906 s_buffer_load_dword s26, s[40:43], 0x7 ; C20D2907 s_buffer_load_dword s27, s[40:43], 0x8 ; C20DA908 s_buffer_load_dword s40, s[40:43], 0x9 ; C2142909 s_buffer_load_dword s15, s[44:47], 0x51 ; C207AD51 s_buffer_load_dword s16, s[44:47], 0x52 ; C2082D52 s_buffer_load_dword s3, s[44:47], 0x5 ; C201AD05 s_buffer_load_dword s4, s[44:47], 0x6 ; C2022D06 s_buffer_load_dword s8, s[44:47], 0x7 ; C2042D07 s_buffer_load_dword s2, s[44:47], 0x8 ; C2012D08 s_buffer_load_dword s1, s[44:47], 0x9 ; C200AD09 s_buffer_load_dword s5, s[44:47], 0x0 ; C202AD00 s_buffer_load_dword s6, s[44:47], 0x1 ; C2032D01 s_buffer_load_dword s7, s[44:47], 0x2 ; C203AD02 s_buffer_load_dword s10, s[44:47], 0x3 ; C2052D03 s_buffer_load_dword s17, s[44:47], 0x4 ; C208AD04 s_buffer_load_dword s11, s[44:47], 0xa ; C205AD0A s_buffer_load_dword s18, s[44:47], 0xb ; C2092D0B s_buffer_load_dword s12, s[44:47], 0xc ; C2062D0C s_buffer_load_dword s13, s[44:47], 0xd ; C206AD0D s_buffer_load_dword s14, s[44:47], 0xe ; C2072D0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s27 ; 7E00021B v_mov_b32_e32 v3, s40 ; 7E060228 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[32:39], s[28:31] ; F0900F00 00E81512 v_mov_b32_e32 v17, 0x10001 ; 7E2202FF 00010001 image_sample_l_o v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E81911 v_mov_b32_e32 v17, 0x20002 ; 7E2202FF 00020002 image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E81D11 v_mov_b32_e32 v17, 0x30003 ; 7E2202FF 00030003 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E80E11 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v18, v22, v8 ; 10241116 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s25, v12 ; 3E001819 v_mac_f32_e32 v3, s26, v13 ; 3E061A1A v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v2, 0, 1.0, vcc ; D2000002 01A9E480 v_sub_f32_e64 v4, |v10|, v2 ; D2080104 0002050A v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v1, |v1|, v10 ; D2080101 00021501 v_mul_f32_e32 v11, v6, v4 ; 10160906 v_mad_f32 v4, -v4, v6, 1.0 ; D2820004 23CA0D04 v_mad_f32 v4, -v1, v6, v4 ; D2820004 24120D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mac_f32_e32 v18, v21, v7 ; 3E240F15 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v6, v26, v8 ; 100C111A v_mac_f32_e32 v6, v25, v7 ; 3E0C0F19 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mac_f32_e32 v8, v29, v7 ; 3E100F1D v_mul_f32_e32 v7, v11, v11 ; 100E170B v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_mac_f32_e32 v7, v4, v4 ; 3E0E0904 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mac_f32_e32 v18, v23, v9 ; 3E241317 v_mac_f32_e32 v6, v27, v9 ; 3E0C131B v_mac_f32_e32 v8, v31, v9 ; 3E10131F v_mul_f32_e32 v9, v7, v11 ; 10121707 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v7, -2.0, v10, 1.0 ; D2820007 03CA14F5 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_add_f32_e32 v4, v24, v18 ; 06082518 v_mul_f32_e32 v7, v22, v1 ; 100E0316 v_mac_f32_e32 v7, v21, v5 ; 3E0E0B15 v_mac_f32_e32 v7, v23, v2 ; 3E0E0517 v_mac_f32_e32 v7, 0, v24 ; 3E0E3080 v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mac_f32_e32 v9, v25, v5 ; 3E120B19 v_mac_f32_e32 v1, v29, v5 ; 3E020B1D v_mac_f32_e32 v9, v27, v2 ; 3E12051B v_mac_f32_e32 v1, v31, v2 ; 3E02051F v_mac_f32_e32 v9, 0, v28 ; 3E123880 v_mac_f32_e32 v1, 0, v32 ; 3E024080 v_mul_f32_e32 v2, v7, v7 ; 10040F07 v_mac_f32_e32 v2, v9, v9 ; 3E041309 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_add_f32_e32 v5, v28, v6 ; 060A0D1C v_add_f32_e32 v6, v32, v8 ; 060C1120 exp 15, 33, 0, 0, 0, v0, v3, v20, v20 ; F800021F 14140300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v2, v7 ; 10000F02 v_mul_f32_e32 v3, v2, v9 ; 10061302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_subrev_f32_e32 v2, s24, v4 ; 0A040818 v_subrev_f32_e32 v7, s22, v5 ; 0A0E0A16 v_subrev_f32_e32 v8, s23, v6 ; 0A100C17 v_mul_f32_e32 v9, v2, v2 ; 10120502 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v10, s20, v3 ; 3E140614 v_mac_f32_e32 v10, s19, v1 ; 3E140213 v_mul_f32_e32 v11, v0, v10 ; 10161500 v_mad_f32 v11, -2.0, v11, s21 ; D282000B 005616F5 v_mul_f32_e32 v12, v9, v2 ; 10180509 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v3, v10 ; 10181503 v_mad_f32 v12, -2.0, v12, s20 ; D282000C 005218F5 v_mul_f32_e32 v13, v9, v7 ; 101A0F09 v_mad_f32 v11, -v13, v12, -v11 ; D282000B A42E190D v_mul_f32_e32 v10, v1, v10 ; 10141501 v_mad_f32 v10, -2.0, v10, s19 ; D282000A 004E14F5 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mad_f32 v9, -v9, v10, v11 ; D2820009 242E1509 v_mul_f32_e32 v2, s9, v2 ; 10040409 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v2, s15, v7 ; 3E040E0F v_mac_f32_e32 v2, s16, v8 ; 3E041010 exp 15, 34, 0, 0, 0, v4, v5, v6, v2 ; F800022F 02060504 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v2, 0x41800000, v9 ; 0E0412FF 41800000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 exp 15, 35, 0, 0, 0, v0, v3, v1, v2 ; F800023F 02010300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v5 ; 10000A08 v_mul_f32_e32 v1, s17, v5 ; 10020A11 v_mul_f32_e32 v2, s3, v5 ; 10040A03 v_mul_f32_e32 v3, s4, v5 ; 10060A04 v_mac_f32_e32 v0, s10, v4 ; 3E00080A v_mac_f32_e32 v1, s5, v4 ; 3E020805 v_mac_f32_e32 v2, s6, v4 ; 3E040806 v_mac_f32_e32 v3, s7, v4 ; 3E060807 v_mac_f32_e32 v0, s18, v6 ; 3E000C12 v_mac_f32_e32 v1, s2, v6 ; 3E020C02 v_mac_f32_e32 v2, s1, v6 ; 3E040C01 v_mac_f32_e32 v3, s11, v6 ; 3E060C0B v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v2, s13, v2 ; 0604040D v_add_f32_e32 v3, s14, v3 ; 0606060E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v14, v15, v16, v17 ; F800024F 11100F0E exp 15, 37, 0, 0, 0, v20, v20, v20, v20 ; F800025F 14141414 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 36 Code Size: 964 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL OUT[8], GENERIC[6] DCL OUT[9], GENERIC[7] DCL OUT[10], GENERIC[8] DCL OUT[11], GENERIC[9] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..7] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..24], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 20, 36, 52} IMM[5] UINT32 {8, 24, 40, 56} IMM[6] UINT32 {12, 28, 44, 60} IMM[7] UINT32 {16, 32, 48, 348} IMM[8] FLT32 { 0.0175, 0.0078, -0.5000, 0.5000} IMM[9] UINT32 {72, 80, 64, 88} IMM[10] FLT32 { 0.0001, 0.0774, 0.9479, 0.0521} IMM[11] FLT32 { 2.4000, 0.0404, 0.0100, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[5].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[5].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[5].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[5].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[8].x, TEMP[0].xxxx 90: MOV TEMP[8].y, TEMP[2].xxxx 91: MOV TEMP[8].z, TEMP[3].xxxx 92: DP3 TEMP[9].x, CONST[4][20].xyzz, TEMP[5].xyzz 93: MOV TEMP[8].w, TEMP[9].xxxx 94: MOV TEMP[9].x, TEMP[7].xxxx 95: MOV TEMP[9].y, TEMP[7].yyyy 96: MOV TEMP[9].z, TEMP[7].zzzz 97: DP3 TEMP[10].x, TEMP[5].xyzz, TEMP[5].xyzz 98: RSQ TEMP[10].x, TEMP[10].xxxx 99: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[10].xxxx 100: DP3 TEMP[10].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[7].xyzz 102: MUL TEMP[10].xyz, IMM[2].zzzz, TEMP[10].xyzz 103: ADD TEMP[10].xyz, CONST[5][0].xyzz, -TEMP[10].xyzz 104: DP3 TEMP[5].x, -TEMP[5].xyzz, TEMP[10].xyzz 105: MOV_SAT TEMP[5].x, TEMP[5].xxxx 106: POW TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[5].x, TEMP[5].xxxx 108: MOV TEMP[9].w, TEMP[5].xxxx 109: MOV TEMP[5].w, IMM[0].wwww 110: MOV TEMP[5].x, TEMP[0].xxxx 111: MOV TEMP[5].y, TEMP[2].xxxx 112: MOV TEMP[5].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].yyyy 114: MOV TEMP[0].y, CONST[4][1].yyyy 115: MOV TEMP[0].z, CONST[4][2].yyyy 116: MOV TEMP[0].w, CONST[4][3].yyyy 117: DP4 TEMP[0].x, TEMP[5], TEMP[0] 118: MOV TEMP[2].x, CONST[4][0].zzzz 119: MOV TEMP[2].y, CONST[4][1].zzzz 120: MOV TEMP[2].z, CONST[4][2].zzzz 121: MOV TEMP[2].w, CONST[4][3].zzzz 122: DP4 TEMP[2].x, TEMP[5], TEMP[2] 123: MOV TEMP[3].x, CONST[4][0].wwww 124: MOV TEMP[3].y, CONST[4][1].wwww 125: MOV TEMP[3].z, CONST[4][2].wwww 126: MOV TEMP[3].w, CONST[4][3].wwww 127: DP4 TEMP[3].x, TEMP[5], TEMP[3] 128: MOV TEMP[10].x, CONST[4][0].xxxx 129: MOV TEMP[10].y, CONST[4][1].xxxx 130: MOV TEMP[10].z, CONST[4][2].xxxx 131: MOV TEMP[10].w, CONST[4][3].xxxx 132: DP4 TEMP[5].x, TEMP[5], TEMP[10] 133: MOV TEMP[5].w, TEMP[3].xxxx 134: MUL TEMP[10].x, CONST[1][2].xxxx, IMM[8].xxxx 135: MUL TEMP[11].x, IMM[8].xxxx, CONST[1][2].yyyy 136: MUL TEMP[12].x, IMM[8].xxxx, CONST[1][2].zzzz 137: MAD TEMP[13].xy, CONST[4][21].wwww, CONST[1][3].xyyy, CONST[1][4].zwww 138: MAD TEMP[14].xy, CONST[4][21].wwww, CONST[1][3].zwww, CONST[1][5].xyyy 139: MAD TEMP[15].xy, CONST[4][21].wwww, CONST[1][4].xyyy, CONST[1][5].zwww 140: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[8].yyyy 141: MOV TEMP[16].x, -TEMP[4].zzzz 142: MOV TEMP[17].x, TEMP[4].yyyy 143: MOV TEMP[17].y, TEMP[16].xxxx 144: MOV TEMP[18].x, TEMP[4].xxxx 145: MOV TEMP[18].y, TEMP[16].xxxx 146: MOV TEMP[16].x, TEMP[4].xxxx 147: MOV TEMP[16].y, -TEMP[4].yyyy 148: ADD TEMP[4].xy, TEMP[17].xyyy, IMM[8].zzzz 149: COS TEMP[17].x, TEMP[10].xxxx 150: SIN TEMP[10].x, TEMP[10].xxxx 151: MUL TEMP[19].x, TEMP[10].xxxx, TEMP[4].yyyy 152: MAD TEMP[19].x, TEMP[17].xxxx, TEMP[4].xxxx, -TEMP[19].xxxx 153: MUL TEMP[20].x, TEMP[17].xxxx, TEMP[4].yyyy 154: MAD TEMP[20].x, TEMP[10].xxxx, TEMP[4].xxxx, TEMP[20].xxxx 155: MOV TEMP[19].y, TEMP[20].xxxx 156: MAD TEMP[19].xy, CONST[1][1].yyyy, TEMP[19].xyyy, IMM[8].wwww 157: ADD TEMP[19].xy, TEMP[19].xyyy, TEMP[13].xyyy 158: COS TEMP[20].x, TEMP[11].xxxx 159: SIN TEMP[11].x, TEMP[11].xxxx 160: MUL TEMP[21].x, TEMP[11].xxxx, TEMP[4].yyyy 161: MAD TEMP[21].x, TEMP[20].xxxx, TEMP[4].xxxx, -TEMP[21].xxxx 162: MUL TEMP[22].x, TEMP[20].xxxx, TEMP[4].yyyy 163: MAD TEMP[22].x, TEMP[11].xxxx, TEMP[4].xxxx, TEMP[22].xxxx 164: MOV TEMP[21].y, TEMP[22].xxxx 165: MAD TEMP[21].xy, TEMP[21].xyyy, CONST[1][1].zzzz, IMM[8].wwww 166: ADD TEMP[21].xy, TEMP[21].xyyy, TEMP[14].xyyy 167: ADD TEMP[18].xy, IMM[8].zzzz, TEMP[18].xyyy 168: MUL TEMP[22].x, TEMP[10].xxxx, TEMP[18].yyyy 169: MAD TEMP[22].x, TEMP[17].xxxx, TEMP[18].xxxx, -TEMP[22].xxxx 170: MUL TEMP[23].x, TEMP[17].xxxx, TEMP[18].yyyy 171: MAD TEMP[23].x, TEMP[10].xxxx, TEMP[18].xxxx, TEMP[23].xxxx 172: MOV TEMP[22].y, TEMP[23].xxxx 173: MAD TEMP[22].xy, CONST[1][1].yyyy, TEMP[22].xyyy, IMM[8].wwww 174: ADD TEMP[22].xy, TEMP[22].xyyy, TEMP[13].xyyy 175: MOV TEMP[23].x, TEMP[19].xxxx 176: MOV TEMP[23].y, TEMP[19].yyyy 177: MOV TEMP[23].z, TEMP[22].xxxx 178: MOV TEMP[23].w, TEMP[22].yyyy 179: MUL TEMP[19].x, TEMP[11].xxxx, TEMP[18].yyyy 180: MAD TEMP[19].x, TEMP[20].xxxx, TEMP[18].xxxx, -TEMP[19].xxxx 181: MUL TEMP[22].x, TEMP[11].xxxx, TEMP[18].xxxx 182: MAD TEMP[22].x, TEMP[20].xxxx, TEMP[18].yyyy, TEMP[22].xxxx 183: MOV TEMP[19].y, TEMP[22].xxxx 184: MAD TEMP[19].xy, TEMP[19].xyyy, CONST[1][1].zzzz, IMM[8].wwww 185: ADD TEMP[19].xy, TEMP[19].xyyy, TEMP[14].xyyy 186: MOV TEMP[22].x, TEMP[21].xxxx 187: MOV TEMP[22].y, TEMP[21].yyyy 188: MOV TEMP[22].z, TEMP[19].xxxx 189: MOV TEMP[22].w, TEMP[19].yyyy 190: COS TEMP[19].x, TEMP[12].xxxx 191: SIN TEMP[12].x, TEMP[12].xxxx 192: MUL TEMP[21].x, TEMP[12].xxxx, TEMP[4].yyyy 193: MAD TEMP[21].x, TEMP[19].xxxx, TEMP[4].xxxx, -TEMP[21].xxxx 194: MUL TEMP[24].x, TEMP[19].xxxx, TEMP[4].yyyy 195: MAD TEMP[4].x, TEMP[12].xxxx, TEMP[4].xxxx, TEMP[24].xxxx 196: MOV TEMP[21].y, TEMP[4].xxxx 197: MAD TEMP[4].xy, TEMP[21].xyyy, CONST[1][1].wwww, IMM[8].wwww 198: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[15].xyyy 199: MUL TEMP[21].x, TEMP[12].xxxx, TEMP[18].yyyy 200: MAD TEMP[21].x, TEMP[19].xxxx, TEMP[18].xxxx, -TEMP[21].xxxx 201: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].yyyy 202: MAD TEMP[12].x, TEMP[12].xxxx, TEMP[18].xxxx, TEMP[19].xxxx 203: MOV TEMP[21].y, TEMP[12].xxxx 204: MAD TEMP[12].xy, CONST[1][1].wwww, TEMP[21].xyyy, IMM[8].wwww 205: ADD TEMP[12].xy, TEMP[12].xyyy, TEMP[15].xyyy 206: MOV TEMP[15].x, TEMP[4].xxxx 207: MOV TEMP[15].y, TEMP[4].yyyy 208: MOV TEMP[15].z, TEMP[12].xxxx 209: MOV TEMP[15].w, TEMP[12].yyyy 210: ADD TEMP[4].xy, IMM[8].zzzz, TEMP[16].xyyy 211: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[4].yyyy 212: MAD TEMP[12].x, TEMP[17].xxxx, TEMP[4].xxxx, -TEMP[12].xxxx 213: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 214: MAD TEMP[10].x, TEMP[17].xxxx, TEMP[4].yyyy, TEMP[10].xxxx 215: MOV TEMP[12].y, TEMP[10].xxxx 216: MAD TEMP[10].xy, CONST[1][1].yyyy, TEMP[12].xyyy, IMM[8].wwww 217: ADD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xyyy 218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[4].yyyy 219: MAD TEMP[12].x, TEMP[20].xxxx, TEMP[4].xxxx, -TEMP[12].xxxx 220: MUL TEMP[13].x, TEMP[20].xxxx, TEMP[4].yyyy 221: MAD TEMP[4].x, TEMP[11].xxxx, TEMP[4].xxxx, TEMP[13].xxxx 222: MOV TEMP[12].y, TEMP[4].xxxx 223: MAD TEMP[4].xy, TEMP[12].xyyy, CONST[1][1].zzzz, IMM[8].wwww 224: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[14].xyyy 225: MOV TEMP[11].x, TEMP[10].xxxx 226: MOV TEMP[11].y, TEMP[10].yyyy 227: MOV TEMP[11].z, TEMP[4].xxxx 228: MOV TEMP[11].w, TEMP[4].yyyy 229: MOV TEMP[4].xy, IN[5].xyyy 230: MOV TEMP[4].w, IMM[0].xxxx 231: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].wyw 232: MUL TEMP[10].xyz, IN[3].xyzz, IMM[8].wwww 233: MAX TEMP[10].xyz, TEMP[10].xyzz, IMM[10].xxxx 234: MUL TEMP[12].xyz, IN[4].xyzz, IMM[10].yyyy 235: MAD TEMP[13].xyz, IN[4].xyzz, IMM[10].zzzz, IMM[10].wwww 236: POW TEMP[14].x, TEMP[13].xxxx, IMM[11].xxxx 237: POW TEMP[14].y, TEMP[13].yyyy, IMM[11].xxxx 238: POW TEMP[14].z, TEMP[13].zzzz, IMM[11].xxxx 239: FSLT TEMP[13].x, IMM[11].yyyy, IN[4].xxxx 240: UIF TEMP[13].xxxx :0 241: MOV TEMP[13].x, TEMP[14].xxxx 242: ELSE :0 243: MOV TEMP[13].x, TEMP[12].xxxx 244: ENDIF 245: FSLT TEMP[16].x, IMM[11].yyyy, IN[4].yyyy 246: UIF TEMP[16].xxxx :0 247: MOV TEMP[16].x, TEMP[14].yyyy 248: ELSE :0 249: MOV TEMP[16].x, TEMP[12].yyyy 250: ENDIF 251: FSLT TEMP[17].x, IMM[11].yyyy, IN[4].zzzz 252: UIF TEMP[17].xxxx :0 253: MOV TEMP[14].x, TEMP[14].zzzz 254: ELSE :0 255: MOV TEMP[14].x, TEMP[12].zzzz 256: ENDIF 257: MOV TEMP[12].x, TEMP[13].xxxx 258: MOV TEMP[12].y, TEMP[16].xxxx 259: MOV TEMP[12].z, TEMP[14].xxxx 260: MOV TEMP[13].w, TEMP[4].wwww 261: MUL TEMP[13].xyz, TEMP[12].xyzz, TEMP[4].xyzz 262: ABS TEMP[4].xyz, TEMP[7].xyzz 263: MAX TEMP[7].x, TEMP[4].xxxx, TEMP[4].yyyy 264: MAX TEMP[7].x, TEMP[7].xxxx, TEMP[4].zzzz 265: ADD TEMP[4].yz, TEMP[7].xxxx, -TEMP[4].xyzz 266: FSLT TEMP[7].x, TEMP[4].yyyy, IMM[11].zzzz 267: UIF TEMP[7].xxxx :0 268: MOV TEMP[7].x, IMM[0].wwww 269: ELSE :0 270: MOV TEMP[7].x, IMM[0].xxxx 271: ENDIF 272: MOV TEMP[12].x, IN[2].xxxx 273: MOV TEMP[12].y, IN[2].yyyy 274: MOV TEMP[12].z, IN[2].zzzz 275: MOV TEMP[12].w, TEMP[7].xxxx 276: FSLT TEMP[4].x, TEMP[4].zzzz, IMM[11].zzzz 277: UIF TEMP[4].xxxx :0 278: MOV TEMP[4].x, IMM[0].wwww 279: ELSE :0 280: MOV TEMP[4].x, IMM[0].xxxx 281: ENDIF 282: MOV TEMP[7].x, TEMP[10].xxxx 283: MOV TEMP[7].y, TEMP[10].yyyy 284: MOV TEMP[7].z, TEMP[10].zzzz 285: MOV TEMP[7].w, TEMP[4].xxxx 286: MOV TEMP[4].xw, TEMP[5].xxxw 287: MOV TEMP[0].x, -TEMP[0].xxxx 288: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz, -TEMP[3].xxxx 289: MOV TEMP[0].y, TEMP[2].xxxx 290: MOV TEMP[4].yz, TEMP[0].yxyy 291: MOV OUT[8], TEMP[23] 292: MOV OUT[1], TEMP[1] 293: MOV OUT[9], TEMP[22] 294: MOV OUT[10], TEMP[15] 295: MOV OUT[11], TEMP[11] 296: MOV OUT[3], TEMP[9] 297: MOV OUT[7], TEMP[6] 298: MOV OUT[6], TEMP[7] 299: MOV OUT[4], TEMP[13] 300: MOV OUT[0], TEMP[4] 301: MOV OUT[2], TEMP[8] 302: MOV OUT[5], TEMP[12] 303: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 0) %34 = call float @llvm.SI.load.const(<16 x i8> %32, i32 4) %35 = call float @llvm.SI.load.const(<16 x i8> %32, i32 8) %36 = call float @llvm.SI.load.const(<16 x i8> %32, i32 12) %37 = call float @llvm.SI.load.const(<16 x i8> %32, i32 16) %38 = call float @llvm.SI.load.const(<16 x i8> %32, i32 20) %39 = call float @llvm.SI.load.const(<16 x i8> %32, i32 24) %40 = call float @llvm.SI.load.const(<16 x i8> %32, i32 28) %41 = call float @llvm.SI.load.const(<16 x i8> %32, i32 32) %42 = call float @llvm.SI.load.const(<16 x i8> %32, i32 36) %43 = call float @llvm.SI.load.const(<16 x i8> %32, i32 40) %44 = call float @llvm.SI.load.const(<16 x i8> %32, i32 44) %45 = call float @llvm.SI.load.const(<16 x i8> %32, i32 48) %46 = call float @llvm.SI.load.const(<16 x i8> %32, i32 52) %47 = call float @llvm.SI.load.const(<16 x i8> %32, i32 56) %48 = call float @llvm.SI.load.const(<16 x i8> %32, i32 60) %49 = call float @llvm.SI.load.const(<16 x i8> %32, i32 304) %50 = call float @llvm.SI.load.const(<16 x i8> %32, i32 308) %51 = call float @llvm.SI.load.const(<16 x i8> %32, i32 312) %52 = call float @llvm.SI.load.const(<16 x i8> %32, i32 320) %53 = call float @llvm.SI.load.const(<16 x i8> %32, i32 324) %54 = call float @llvm.SI.load.const(<16 x i8> %32, i32 328) %55 = call float @llvm.SI.load.const(<16 x i8> %32, i32 348) %56 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = call float @llvm.SI.load.const(<16 x i8> %57, i32 0) %59 = call float @llvm.SI.load.const(<16 x i8> %57, i32 4) %60 = call float @llvm.SI.load.const(<16 x i8> %57, i32 8) %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %7 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = add i32 %5, %7 %97 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %96) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %10, %6 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = bitcast float %105 to i32 %108 = bitcast float %106 to i32 %109 = insertelement <4 x i32> undef, i32 %107, i32 0 %110 = insertelement <4 x i32> %109, i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 0, i32 2 %112 = bitcast <8 x i32> %62 to <32 x i8> %113 = bitcast <4 x i32> %64 to <16 x i8> %114 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %111, <32 x i8> %112, <16 x i8> %113, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = extractelement <4 x float> %114, i32 3 %119 = bitcast float %105 to i32 %120 = bitcast float %106 to i32 %121 = insertelement <4 x i32> , i32 %119, i32 1 %122 = insertelement <4 x i32> %121, i32 %120, i32 2 %123 = insertelement <4 x i32> %122, i32 0, i32 3 %124 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %123, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %125 = extractelement <4 x float> %124, i32 0 %126 = extractelement <4 x float> %124, i32 1 %127 = extractelement <4 x float> %124, i32 2 %128 = extractelement <4 x float> %124, i32 3 %129 = bitcast float %105 to i32 %130 = bitcast float %106 to i32 %131 = insertelement <4 x i32> , i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 %130, i32 2 %133 = insertelement <4 x i32> %132, i32 0, i32 3 %134 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %133, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = fmul float %76, 2.550000e+02 %140 = fadd float %139, -1.280000e+02 %141 = fmul float %77, 2.550000e+02 %142 = fadd float %141, -1.280000e+02 %143 = fmul float %78, 2.550000e+02 %144 = fadd float %143, -1.280000e+02 %145 = fmul float %79, 2.550000e+02 %146 = fadd float %145, -1.280000e+02 %147 = fcmp olt float %140, 0.000000e+00 %148 = fcmp olt float %142, 0.000000e+00 %149 = fcmp olt float %144, 0.000000e+00 %150 = fcmp olt float %146, 0.000000e+00 %151 = select i1 %147, float 1.000000e+00, float 0.000000e+00 %152 = select i1 %149, float 1.000000e+00, float 0.000000e+00 %153 = select i1 %150, float 1.000000e+00, float 0.000000e+00 %154 = call float @fabs(float %140) %155 = call float @fabs(float %142) %156 = call float @fabs(float %144) %157 = call float @fabs(float %146) %158 = fsub float %154, %151 %159 = select i1 %148, float -1.000000e+00, float -0.000000e+00 %160 = fadd float %155, %159 %161 = fsub float %156, %152 %162 = fsub float %157, %153 %163 = fadd float %158, -6.400000e+01 %164 = fadd float %160, -6.400000e+01 %165 = fadd float %161, -6.400000e+01 %166 = fadd float %162, -6.400000e+01 %167 = fcmp olt float %163, 0.000000e+00 %168 = fcmp olt float %164, 0.000000e+00 %169 = fcmp olt float %165, 0.000000e+00 %170 = fcmp olt float %166, 0.000000e+00 %171 = select i1 %167, float 1.000000e+00, float 0.000000e+00 %172 = select i1 %168, float 1.000000e+00, float 0.000000e+00 %173 = select i1 %169, float 1.000000e+00, float 0.000000e+00 %174 = select i1 %170, float 1.000000e+00, float 0.000000e+00 %175 = call float @fabs(float %163) %176 = call float @fabs(float %164) %177 = call float @fabs(float %165) %178 = call float @fabs(float %166) %179 = fsub float %175, %171 %180 = fsub float %176, %172 %181 = fsub float %177, %173 %182 = fsub float %178, %174 %183 = fmul float %179, 0x3F90410420000000 %184 = fmul float %180, 0x3F90410420000000 %185 = fmul float %181, 0x3F90410420000000 %186 = fmul float %182, 0x3F90410420000000 %187 = fmul float %171, 2.000000e+00 %188 = fmul float %172, 2.000000e+00 %189 = fmul float %173, 2.000000e+00 %190 = fmul float %174, 2.000000e+00 %191 = fsub float 1.000000e+00, %187 %192 = fsub float 1.000000e+00, %188 %193 = fsub float 1.000000e+00, %189 %194 = fsub float 1.000000e+00, %190 %195 = fmul float %151, 2.000000e+00 %196 = fmul float %152, 2.000000e+00 %197 = fmul float %153, 2.000000e+00 %198 = fsub float 1.000000e+00, %195 %199 = fsub float 1.000000e+00, %196 %200 = fsub float 1.000000e+00, %197 %201 = fsub float 1.000000e+00, %183 %202 = fsub float %201, %184 %203 = fmul float %183, %183 %204 = fmul float %184, %184 %205 = fadd float %204, %203 %206 = fmul float %202, %202 %207 = fadd float %205, %206 %208 = call float @llvm.AMDGPU.rsq.clamped.f32(float %207) %209 = fmul float %183, %208 %210 = fmul float %184, %208 %211 = fmul float %202, %208 %212 = fmul float %209, %191 %213 = fmul float %210, %192 %214 = fsub float 1.000000e+00, %185 %215 = fsub float %214, %186 %216 = fmul float %185, %185 %217 = fmul float %186, %186 %218 = fadd float %217, %216 %219 = fmul float %215, %215 %220 = fadd float %218, %219 %221 = call float @llvm.AMDGPU.rsq.clamped.f32(float %220) %222 = fmul float %185, %221 %223 = fmul float %186, %221 %224 = fmul float %215, %221 %225 = fmul float %222, %193 %226 = fmul float %223, %194 %227 = fmul float %211, %198 %228 = fmul float %212, %115 %229 = fmul float %213, %116 %230 = fadd float %228, %229 %231 = fmul float %227, %117 %232 = fadd float %230, %231 %233 = fmul float %118, 0.000000e+00 %234 = fadd float %232, %233 %235 = fmul float %212, %125 %236 = fmul float %213, %126 %237 = fadd float %235, %236 %238 = fmul float %227, %127 %239 = fadd float %237, %238 %240 = fmul float %128, 0.000000e+00 %241 = fadd float %239, %240 %242 = fmul float %212, %135 %243 = fmul float %213, %136 %244 = fadd float %242, %243 %245 = fmul float %227, %137 %246 = fadd float %244, %245 %247 = fmul float %138, 0.000000e+00 %248 = fadd float %246, %247 %249 = fmul float %234, %234 %250 = fmul float %241, %241 %251 = fadd float %250, %249 %252 = fmul float %248, %248 %253 = fadd float %251, %252 %254 = call float @llvm.AMDGPU.rsq.clamped.f32(float %253) %255 = fmul float %234, %254 %256 = fmul float %241, %254 %257 = fmul float %248, %254 %258 = fmul float %224, %199 %259 = fmul float %225, %115 %260 = fmul float %226, %116 %261 = fadd float %259, %260 %262 = fmul float %258, %117 %263 = fadd float %261, %262 %264 = fmul float %118, 0.000000e+00 %265 = fadd float %263, %264 %266 = fmul float %225, %125 %267 = fmul float %226, %126 %268 = fadd float %266, %267 %269 = fmul float %258, %127 %270 = fadd float %268, %269 %271 = fmul float %128, 0.000000e+00 %272 = fadd float %270, %271 %273 = fmul float %225, %135 %274 = fmul float %226, %136 %275 = fadd float %273, %274 %276 = fmul float %258, %137 %277 = fadd float %275, %276 %278 = fmul float %138, 0.000000e+00 %279 = fadd float %277, %278 %280 = fmul float %265, %255 %281 = fmul float %272, %256 %282 = fadd float %281, %280 %283 = fmul float %279, %257 %284 = fadd float %282, %283 %285 = fmul float %284, %255 %286 = fmul float %284, %256 %287 = fmul float %284, %257 %288 = fsub float %265, %285 %289 = fsub float %272, %286 %290 = fsub float %279, %287 %291 = fmul float %288, %288 %292 = fmul float %289, %289 %293 = fadd float %292, %291 %294 = fmul float %290, %290 %295 = fadd float %293, %294 %296 = call float @llvm.AMDGPU.rsq.clamped.f32(float %295) %297 = fmul float %288, %296 %298 = fmul float %289, %296 %299 = fmul float %290, %296 %300 = fmul float %69, %115 %301 = fmul float %70, %116 %302 = fadd float %300, %301 %303 = fmul float %71, %117 %304 = fadd float %302, %303 %305 = fadd float %304, %118 %306 = fmul float %69, %125 %307 = fmul float %70, %126 %308 = fadd float %306, %307 %309 = fmul float %71, %127 %310 = fadd float %308, %309 %311 = fadd float %310, %128 %312 = fmul float %69, %135 %313 = fmul float %70, %136 %314 = fadd float %312, %313 %315 = fmul float %71, %137 %316 = fadd float %314, %315 %317 = fadd float %316, %138 %318 = fsub float %305, %49 %319 = fsub float %311, %50 %320 = fsub float %317, %51 %321 = fmul float %52, %318 %322 = fmul float %53, %319 %323 = fadd float %322, %321 %324 = fmul float %54, %320 %325 = fadd float %323, %324 %326 = fmul float %318, %318 %327 = fmul float %319, %319 %328 = fadd float %327, %326 %329 = fmul float %320, %320 %330 = fadd float %328, %329 %331 = call float @llvm.AMDGPU.rsq.clamped.f32(float %330) %332 = fmul float %318, %331 %333 = fmul float %319, %331 %334 = fmul float %320, %331 %335 = fmul float %255, %58 %336 = fmul float %256, %59 %337 = fadd float %336, %335 %338 = fmul float %257, %60 %339 = fadd float %337, %338 %340 = fmul float %339, %255 %341 = fmul float %339, %256 %342 = fmul float %339, %257 %343 = fmul float %340, 2.000000e+00 %344 = fmul float %341, 2.000000e+00 %345 = fmul float %342, 2.000000e+00 %346 = fsub float %58, %343 %347 = fsub float %59, %344 %348 = fsub float %60, %345 %349 = fmul float %332, %346 %350 = fsub float -0.000000e+00, %349 %351 = fmul float %333, %347 %352 = fsub float %350, %351 %353 = fmul float %334, %348 %354 = fsub float %352, %353 %355 = call float @llvm.AMDIL.clamp.(float %354, float 0.000000e+00, float 1.000000e+00) %356 = call float @llvm.pow.f32(float %355, float 1.600000e+01) %357 = call float @llvm.AMDIL.clamp.(float %356, float 0.000000e+00, float 1.000000e+00) %358 = fmul float %305, %34 %359 = fmul float %311, %38 %360 = fadd float %358, %359 %361 = fmul float %317, %42 %362 = fadd float %360, %361 %363 = fadd float %362, %46 %364 = fmul float %305, %35 %365 = fmul float %311, %39 %366 = fadd float %364, %365 %367 = fmul float %317, %43 %368 = fadd float %366, %367 %369 = fadd float %368, %47 %370 = fmul float %305, %36 %371 = fmul float %311, %40 %372 = fadd float %370, %371 %373 = fmul float %317, %44 %374 = fadd float %372, %373 %375 = fadd float %374, %48 %376 = fmul float %305, %33 %377 = fmul float %311, %37 %378 = fadd float %376, %377 %379 = fmul float %317, %41 %380 = fadd float %378, %379 %381 = fadd float %380, %45 %382 = fmul float %16, 0x3F91DF4720000000 %383 = fmul float %17, 0x3F91DF4720000000 %384 = fmul float %18, 0x3F91DF4720000000 %385 = fmul float %55, %19 %386 = fadd float %385, %25 %387 = fmul float %55, %20 %388 = fadd float %387, %26 %389 = fmul float %55, %21 %390 = fadd float %389, %27 %391 = fmul float %55, %22 %392 = fadd float %391, %28 %393 = fmul float %55, %23 %394 = fadd float %393, %29 %395 = fmul float %55, %24 %396 = fadd float %395, %30 %397 = fmul float %305, 7.812500e-03 %398 = fmul float %311, 7.812500e-03 %399 = fmul float %317, 7.812500e-03 %400 = fadd float %398, -5.000000e-01 %401 = fsub float -5.000000e-01, %399 %402 = call float @llvm.cos.f32(float %382) %403 = call float @llvm.sin.f32(float %382) %404 = fmul float %403, %401 %405 = fmul float %402, %400 %406 = fsub float %405, %404 %407 = fmul float %402, %401 %408 = fmul float %403, %400 %409 = fadd float %408, %407 %410 = fmul float %13, %406 %411 = fadd float %410, 5.000000e-01 %412 = fmul float %13, %409 %413 = fadd float %412, 5.000000e-01 %414 = fadd float %411, %386 %415 = fadd float %413, %388 %416 = call float @llvm.cos.f32(float %383) %417 = call float @llvm.sin.f32(float %383) %418 = fmul float %417, %401 %419 = fmul float %416, %400 %420 = fsub float %419, %418 %421 = fmul float %416, %401 %422 = fmul float %417, %400 %423 = fadd float %422, %421 %424 = fmul float %420, %14 %425 = fadd float %424, 5.000000e-01 %426 = fmul float %423, %14 %427 = fadd float %426, 5.000000e-01 %428 = fadd float %425, %390 %429 = fadd float %427, %392 %430 = fadd float %397, -5.000000e-01 %431 = fsub float -5.000000e-01, %399 %432 = fmul float %403, %431 %433 = fmul float %402, %430 %434 = fsub float %433, %432 %435 = fmul float %402, %431 %436 = fmul float %403, %430 %437 = fadd float %436, %435 %438 = fmul float %13, %434 %439 = fadd float %438, 5.000000e-01 %440 = fmul float %13, %437 %441 = fadd float %440, 5.000000e-01 %442 = fadd float %439, %386 %443 = fadd float %441, %388 %444 = fmul float %417, %431 %445 = fmul float %416, %430 %446 = fsub float %445, %444 %447 = fmul float %417, %430 %448 = fmul float %416, %431 %449 = fadd float %448, %447 %450 = fmul float %446, %14 %451 = fadd float %450, 5.000000e-01 %452 = fmul float %449, %14 %453 = fadd float %452, 5.000000e-01 %454 = fadd float %451, %390 %455 = fadd float %453, %392 %456 = call float @llvm.cos.f32(float %384) %457 = call float @llvm.sin.f32(float %384) %458 = fmul float %457, %401 %459 = fmul float %456, %400 %460 = fsub float %459, %458 %461 = fmul float %456, %401 %462 = fmul float %457, %400 %463 = fadd float %462, %461 %464 = fmul float %460, %15 %465 = fadd float %464, 5.000000e-01 %466 = fmul float %463, %15 %467 = fadd float %466, 5.000000e-01 %468 = fadd float %465, %394 %469 = fadd float %467, %396 %470 = fmul float %457, %431 %471 = fmul float %456, %430 %472 = fsub float %471, %470 %473 = fmul float %456, %431 %474 = fmul float %457, %430 %475 = fadd float %474, %473 %476 = fmul float %15, %472 %477 = fadd float %476, 5.000000e-01 %478 = fmul float %15, %475 %479 = fadd float %478, 5.000000e-01 %480 = fadd float %477, %394 %481 = fadd float %479, %396 %482 = fadd float %397, -5.000000e-01 %483 = fsub float -5.000000e-01, %398 %484 = fmul float %403, %483 %485 = fmul float %402, %482 %486 = fsub float %485, %484 %487 = fmul float %403, %482 %488 = fmul float %402, %483 %489 = fadd float %488, %487 %490 = fmul float %13, %486 %491 = fadd float %490, 5.000000e-01 %492 = fmul float %13, %489 %493 = fadd float %492, 5.000000e-01 %494 = fadd float %491, %386 %495 = fadd float %493, %388 %496 = fmul float %417, %483 %497 = fmul float %416, %482 %498 = fsub float %497, %496 %499 = fmul float %416, %483 %500 = fmul float %417, %482 %501 = fadd float %500, %499 %502 = fmul float %498, %14 %503 = fadd float %502, 5.000000e-01 %504 = fmul float %501, %14 %505 = fadd float %504, 5.000000e-01 %506 = fadd float %503, %390 %507 = fadd float %505, %392 %508 = bitcast float %105 to i32 %509 = bitcast float %106 to i32 %510 = insertelement <4 x i32> , i32 %508, i32 1 %511 = insertelement <4 x i32> %510, i32 %509, i32 2 %512 = insertelement <4 x i32> %511, i32 0, i32 3 %513 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %512, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %514 = extractelement <4 x float> %513, i32 0 %515 = extractelement <4 x float> %513, i32 1 %516 = extractelement <4 x float> %513, i32 2 %517 = extractelement <4 x float> %513, i32 3 %518 = fmul float %91, 5.000000e-01 %519 = fmul float %92, 5.000000e-01 %520 = fmul float %93, 5.000000e-01 %521 = call float @llvm.maxnum.f32(float %518, float 0x3F1A36E2E0000000) %522 = call float @llvm.maxnum.f32(float %519, float 0x3F1A36E2E0000000) %523 = call float @llvm.maxnum.f32(float %520, float 0x3F1A36E2E0000000) %524 = fmul float %98, 0x3FB3D07220000000 %525 = fmul float %99, 0x3FB3D07220000000 %526 = fmul float %100, 0x3FB3D07220000000 %527 = fmul float %98, 0x3FEE54EDE0000000 %528 = fadd float %527, 0x3FAAB12320000000 %529 = fmul float %99, 0x3FEE54EDE0000000 %530 = fadd float %529, 0x3FAAB12320000000 %531 = fmul float %100, 0x3FEE54EDE0000000 %532 = fadd float %531, 0x3FAAB12320000000 %533 = call float @llvm.pow.f32(float %528, float 0x4003333340000000) %534 = call float @llvm.pow.f32(float %530, float 0x4003333340000000) %535 = call float @llvm.pow.f32(float %532, float 0x4003333340000000) %536 = fcmp ogt float %98, 0x3FA4B5DCC0000000 %. = select i1 %536, float %533, float %524 %537 = fcmp ogt float %99, 0x3FA4B5DCC0000000 %temp64.0 = select i1 %537, float %534, float %525 %538 = fcmp ogt float %100, 0x3FA4B5DCC0000000 %.112 = select i1 %538, float %535, float %526 %539 = fmul float %., %514 %540 = fmul float %temp64.0, %515 %541 = fmul float %.112, %516 %542 = call float @fabs(float %255) %543 = call float @fabs(float %256) %544 = call float @fabs(float %257) %545 = call float @llvm.maxnum.f32(float %542, float %543) %546 = call float @llvm.maxnum.f32(float %545, float %544) %547 = fsub float %546, %543 %548 = fsub float %546, %544 %549 = fcmp olt float %547, 0x3F847AE140000000 %temp28.0 = select i1 %549, float 1.000000e+00, float 0.000000e+00 %550 = fcmp olt float %548, 0x3F847AE140000000 %.113 = select i1 %550, float 1.000000e+00, float 0.000000e+00 %551 = fsub float -0.000000e+00, %363 %552 = fmul float %369, 2.000000e+00 %553 = fsub float %552, %375 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %305, float %311, float %317, float %325) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %255, float %256, float %257, float %357) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %539, float %540, float %541, float %517) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %84, float %85, float %86, float %temp28.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %521, float %522, float %523, float %.113) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %297, float %298, float %299, float %200) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %414, float %415, float %442, float %443) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %428, float %429, float %454, float %455) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %468, float %469, float %480, float %481) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %494, float %495, float %506, float %507) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %381, float %551, float %553, float %375) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v17, 0xc3000000 ; 7E2202FF C3000000 v_mov_b32_e32 v19, 0x437f0000 ; 7E2602FF 437F0000 v_mov_b32_e32 v26, 0x80000000 ; 7E3402FF 80000000 v_mov_b32_e32 v18, 0xc2800000 ; 7E2402FF C2800000 v_mov_b32_e32 v12, 0x3c820821 ; 7E1802FF 3C820821 v_mov_b32_e32 v25, 0x3d558919 ; 7E3202FF 3D558919 v_add_i32_e32 v4, s10, v0 ; 4A08000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_mov_b32_e32 v27, 0x3f72a76f ; 7E3602FF 3F72A76F v_mov_b32_e32 v0, 0x3c8efa39 ; 7E0002FF 3C8EFA39 v_mov_b32_e32 v24, 0x4019999a ; 7E3002FF 4019999A v_mov_b32_e32 v28, 0x3d9e8391 ; 7E3802FF 3D9E8391 v_mov_b32_e32 v29, 0x3d25aee6 ; 7E3A02FF 3D25AEE6 v_mov_b32_e32 v2, 0x3c23d70a ; 7E0402FF 3C23D70A v_mov_b32_e32 v1, 0x38d1b717 ; 7E0202FF 38D1B717 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[30:33], v4, s[12:15], 0 idxen ; E00C2000 80031E04 buffer_load_format_xyzw v[20:23], v4, s[16:19], 0 idxen ; E00C2000 80041404 buffer_load_format_xyzw v[13:16], v4, s[20:23], 0 idxen ; E00C2000 80050D04 buffer_load_format_xyzw v[8:11], v4, s[36:39], 0 idxen ; E00C2000 80090804 s_waitcnt vmcnt(3) ; BF8C0773 buffer_load_format_xyzw v[33:36], v4, s[40:43], 0 idxen ; E00C2000 800A2104 buffer_load_format_xyzw v[4:7], v3, s[44:47], 0 idxen ; E00C2000 800B0403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_buffer_load_dword s17, s[48:51], 0x0 ; C208B100 s_buffer_load_dword s16, s[48:51], 0x1 ; C2083101 s_buffer_load_dword s15, s[48:51], 0x2 ; C207B102 s_buffer_load_dword s14, s[8:11], 0x51 ; C2070951 s_buffer_load_dword s13, s[8:11], 0x52 ; C2068952 s_buffer_load_dword s12, s[8:11], 0x57 ; C2060957 s_buffer_load_dword s19, s[8:11], 0x4c ; C209894C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s20, s[8:11], 0x4e ; C20A094E s_buffer_load_dword s18, s[8:11], 0x50 ; C2090950 image_sample_l v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[28:35], s[24:27] ; F0900F00 00C72404 v_mov_b32_e32 v3, 0x10001 ; 7E0602FF 00010001 image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72803 v_mov_b32_e32 v3, 0x20002 ; 7E0602FF 00020002 image_sample_l_o v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72C03 v_mov_b32_e32 v3, 0x30003 ; 7E0602FF 00030003 image_sample_l_o v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C73003 exp 15, 32, 0, 0, 0, v6, v6, v6, v6 ; F800020F 06060606 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v3, v37, v31 ; 10063F25 v_mad_f32 v4, v19, v21, v17 ; D2820004 04462B13 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v5, v26, -1.0, vcc ; D2000005 01A9E71A v_add_f32_e64 v4, |v4|, v5 ; D2060104 00020B04 v_mad_f32 v5, v27, v33, v25 ; D2820005 0466431B v_mad_f32 v7, v27, v34, v25 ; D2820007 0466451B v_mac_f32_e32 v25, v27, v35 ; 3E32471B v_cmp_gt_f32_e32 vcc, v33, v29 ; 7C083B21 v_cmp_gt_f32_e64 s[0:1], v34, v29 ; D0080000 00023B22 v_cmp_gt_f32_e64 s[4:5], v35, v29 ; D0080004 00023B23 v_mad_f32 v11, v19, v20, v17 ; D282000B 04462913 v_cmp_gt_f32_e64 s[6:7], 0, v11 ; D0080006 00021680 v_cndmask_b32_e64 v16, 0, 1.0, s[6:7] ; D2000010 0019E480 v_sub_f32_e64 v11, |v11|, v16 ; D208010B 0002210B v_add_f32_e32 v11, v18, v11 ; 06161712 v_add_f32_e32 v4, v18, v4 ; 06080912 v_cmp_gt_f32_e64 s[6:7], 0, v11 ; D0080006 00021680 v_cndmask_b32_e64 v20, 0, 1.0, s[6:7] ; D2000014 0019E480 v_sub_f32_e64 v11, |v11|, v20 ; D208010B 0002290B v_cmp_gt_f32_e64 s[6:7], 0, v4 ; D0080006 00020880 v_cndmask_b32_e64 v21, 0, 1.0, s[6:7] ; D2000015 0019E480 v_sub_f32_e64 v4, |v4|, v21 ; D2080104 00022B04 v_mul_f32_e32 v26, v12, v11 ; 1034170C v_mad_f32 v11, -v11, v12, 1.0 ; D282000B 23CA190B v_mad_f32 v11, -v4, v12, v11 ; D282000B 242E1904 v_mul_f32_e32 v4, v12, v4 ; 1008090C v_mul_f32_e32 v27, v26, v26 ; 1036351A v_mac_f32_e32 v27, v4, v4 ; 3E360904 v_mac_f32_e32 v27, v11, v11 ; 3E36170B v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v29, v28, v33 ; 103A431C v_mul_f32_e32 v33, v28, v34 ; 1042451C v_mul_f32_e32 v28, v28, v35 ; 1038471C v_mul_f32_e32 v26, v27, v26 ; 1034351B v_mul_f32_e32 v4, v27, v4 ; 1008091B v_mul_f32_e32 v11, v27, v11 ; 1016171B v_mad_f32 v20, -2.0, v20, 1.0 ; D2820014 03CA28F5 v_mul_f32_e32 v20, v20, v26 ; 10283514 v_mad_f32 v21, -2.0, v21, 1.0 ; D2820015 03CA2AF5 v_mul_f32_e32 v4, v21, v4 ; 10080915 v_mad_f32 v16, -2.0, v16, 1.0 ; D2820010 03CA20F5 v_mul_f32_e32 v11, v16, v11 ; 10161710 v_mul_f32_e32 v16, v37, v4 ; 10200925 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v21, v41, v4 ; 102A0929 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v4, v45, v4 ; 1008092D v_mac_f32_e32 v16, v36, v20 ; 3E202924 v_mac_f32_e32 v21, v40, v20 ; 3E2A2928 v_mac_f32_e32 v4, v44, v20 ; 3E08292C v_mac_f32_e32 v16, v38, v11 ; 3E201726 v_mac_f32_e32 v21, v42, v11 ; 3E2A172A v_mac_f32_e32 v4, v46, v11 ; 3E08172E v_mac_f32_e32 v3, v36, v30 ; 3E063D24 v_mul_f32_e32 v11, v41, v31 ; 10163F29 v_mac_f32_e32 v11, v40, v30 ; 3E163D28 v_mul_f32_e32 v20, v45, v31 ; 10283F2D v_mac_f32_e32 v20, v44, v30 ; 3E283D2C v_mac_f32_e32 v16, 0, v39 ; 3E204E80 v_mac_f32_e32 v21, 0, v43 ; 3E2A5680 v_mac_f32_e32 v4, 0, v47 ; 3E085E80 v_mul_f32_e32 v26, v16, v16 ; 10342110 v_mac_f32_e32 v26, v21, v21 ; 3E342B15 v_mac_f32_e32 v26, v4, v4 ; 3E340904 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mac_f32_e32 v3, v38, v32 ; 3E064126 v_mac_f32_e32 v11, v42, v32 ; 3E16412A v_mac_f32_e32 v20, v46, v32 ; 3E28412E v_mul_f32_e32 v16, v26, v16 ; 1020211A v_mul_f32_e32 v21, v26, v21 ; 102A2B1A v_mul_f32_e32 v4, v26, v4 ; 1008091A v_add_f32_e32 v3, v39, v3 ; 06060727 v_add_f32_e32 v11, v43, v11 ; 0616172B v_add_f32_e32 v20, v47, v20 ; 0628292F v_subrev_f32_e32 v26, s19, v3 ; 0A340613 v_subrev_f32_e32 v27, s21, v11 ; 0A361615 v_mul_f32_e32 v30, v26, v26 ; 103C351A v_mac_f32_e32 v30, v27, v27 ; 3E3C371B v_subrev_f32_e32 v31, s20, v20 ; 0A3E2814 v_mac_f32_e32 v30, v31, v31 ; 3E3C3F1F v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mul_f32_e32 v32, s17, v16 ; 10402011 v_mac_f32_e32 v32, s16, v21 ; 3E402A10 v_mac_f32_e32 v32, s15, v4 ; 3E40080F v_mul_f32_e32 v34, v16, v32 ; 10444110 v_mad_f32 v34, -2.0, v34, s17 ; D2820022 004644F5 v_mul_f32_e32 v35, v30, v26 ; 1046351E v_mul_f32_e32 v34, v34, v35 ; 10444722 v_mul_f32_e32 v35, v21, v32 ; 10464115 v_mad_f32 v35, -2.0, v35, s16 ; D2820023 004246F5 v_mul_f32_e32 v52, v30, v27 ; 1068371E v_mad_f32 v34, -v52, v35, -v34 ; D2820022 A48A4734 v_mul_f32_e32 v32, v4, v32 ; 10404104 v_mad_f32 v32, -2.0, v32, s15 ; D2820020 003E40F5 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_log_f32_e32 v25, v25 ; 7E324F19 v_mul_f32_e32 v30, v30, v31 ; 103C3F1E v_mad_f32 v30, -v30, v32, v34 ; D282001E 248A411E v_mul_legacy_f32_e32 v5, v24, v5 ; 0E0A0B18 v_mul_legacy_f32_e32 v7, v24, v7 ; 0E0E0F18 v_mul_legacy_f32_e32 v24, v24, v25 ; 0E303318 v_mul_f32_e32 v25, s18, v26 ; 10323412 v_mac_f32_e32 v25, s14, v27 ; 3E32360E v_mac_f32_e32 v25, s13, v31 ; 3E323E0D v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cndmask_b32_e32 v5, v29, v5 ; 000A0B1D v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_cndmask_b32_e64 v7, v33, v7, s[0:1] ; D2000007 00020F21 v_add_f32_e64 v26, 0, v30 clamp ; D206081A 00023C80 v_log_f32_e32 v26, v26 ; 7E344F1A v_exp_f32_e32 v24, v24 ; 7E304B18 v_cndmask_b32_e64 v24, v28, v24, s[4:5] ; D2000018 0012311C exp 15, 33, 0, 0, 0, v3, v11, v20, v25 ; F800021F 19140B03 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_legacy_f32_e32 v25, 0x41800000, v26 ; 0E3234FF 41800000 v_exp_f32_e32 v25, v25 ; 7E324B19 v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 exp 15, 34, 0, 0, 0, v16, v21, v4, v25 ; F800022F 19041510 v_mul_f32_e32 v5, v48, v5 ; 100A0B30 v_mul_f32_e32 v7, v49, v7 ; 100E0F31 v_mul_f32_e32 v24, v50, v24 ; 10303132 exp 15, 35, 0, 0, 0, v5, v7, v24, v51 ; F800023F 33180705 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v5, v19, v22, v17 ; D2820005 04462D13 v_mac_f32_e32 v17, v19, v23 ; 3E222F13 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_sub_f32_e64 v5, |v5|, v7 ; D2080105 00020F05 v_cmp_gt_f32_e32 vcc, 0, v17 ; 7C082280 v_cndmask_b32_e64 v19, 0, 1.0, vcc ; D2000013 01A9E480 v_sub_f32_e64 v17, |v17|, v19 ; D2080111 00022711 v_add_f32_e32 v5, v18, v5 ; 060A0B12 v_add_f32_e32 v17, v18, v17 ; 06222312 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v18, 0, 1.0, vcc ; D2000012 01A9E480 v_sub_f32_e64 v5, |v5|, v18 ; D2080105 00022505 v_cmp_gt_f32_e32 vcc, 0, v17 ; 7C082280 v_cndmask_b32_e64 v22, 0, 1.0, vcc ; D2000016 01A9E480 v_sub_f32_e64 v17, |v17|, v22 ; D2080111 00022D11 v_mul_f32_e32 v23, v12, v5 ; 102E0B0C v_mad_f32 v5, -v5, v12, 1.0 ; D2820005 23CA1905 v_mad_f32 v5, -v17, v12, v5 ; D2820005 24161911 v_mul_f32_e32 v12, v12, v17 ; 1018230C v_mul_f32_e32 v17, v23, v23 ; 10222F17 v_mac_f32_e32 v17, v12, v12 ; 3E22190C v_mac_f32_e32 v17, v5, v5 ; 3E220B05 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_max3_f32 v24, |v16|, |v21|, |v4| ; D2A80718 04122B10 v_sub_f32_e64 v25, v24, |v21| ; D2080219 00022B18 v_cmp_lt_f32_e32 vcc, v25, v2 ; 7C020519 v_cndmask_b32_e64 v25, 0, 1.0, vcc ; D2000019 01A9E480 exp 15, 36, 0, 0, 0, v13, v14, v15, v25 ; F800024F 190F0E0D s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v13, v17, v23 ; 101A2F11 v_mul_f32_e32 v12, v17, v12 ; 10181911 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mad_f32 v14, -2.0, v18, 1.0 ; D282000E 03CA24F5 v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_mad_f32 v14, -2.0, v22, 1.0 ; D282000E 03CA2CF5 v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mad_f32 v7, -2.0, v7, 1.0 ; D2820007 03CA0EF5 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v7, v37, v12 ; 100E1925 v_mac_f32_e32 v7, v36, v13 ; 3E0E1B24 v_mac_f32_e32 v7, v38, v5 ; 3E0E0B26 v_mac_f32_e32 v7, 0, v39 ; 3E0E4E80 v_mul_f32_e32 v14, v41, v12 ; 101C1929 v_mac_f32_e32 v14, v40, v13 ; 3E1C1B28 v_mac_f32_e32 v14, v42, v5 ; 3E1C0B2A v_mac_f32_e32 v14, 0, v43 ; 3E1C5680 v_mul_f32_e32 v12, v45, v12 ; 1018192D v_mac_f32_e32 v12, v44, v13 ; 3E181B2C v_mac_f32_e32 v12, v46, v5 ; 3E180B2E v_mac_f32_e32 v12, 0, v47 ; 3E185E80 v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0 v_mul_f32_e32 v8, 0.5, v9 ; 101012F0 v_mul_f32_e32 v9, 0.5, v10 ; 101214F0 v_sub_f32_e64 v10, v24, |v4| ; D208020A 00020918 v_cmp_lt_f32_e32 vcc, v10, v2 ; 7C02050A v_max_f32_e32 v2, v1, v5 ; 20040B01 v_max_f32_e32 v5, v1, v8 ; 200A1101 v_max_f32_e32 v1, v1, v9 ; 20021301 v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480 exp 15, 37, 0, 0, 0, v2, v5, v1, v8 ; F800025F 08010502 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v16, v7 ; 10020F10 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_mac_f32_e32 v1, v21, v14 ; 3E021D15 v_mac_f32_e32 v1, v4, v12 ; 3E021904 v_mad_f32 v2, -v1, v16, v7 ; D2820002 241E2101 v_mad_f32 v5, -v1, v21, v14 ; D2820005 243A2B01 v_mad_f32 v1, -v1, v4, v12 ; D2820001 24320901 v_mul_f32_e32 v4, v2, v2 ; 10080502 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_mac_f32_e32 v4, v1, v1 ; 3E080301 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_buffer_load_dword s6, s[0:3], 0xd ; C203010D s_buffer_load_dword s7, s[0:3], 0x13 ; C2038113 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v5, v4, v5 ; 100A0B04 s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mad_f32 v4, -2.0, v19, 1.0 ; D2820004 03CA26F5 exp 15, 38, 0, 0, 0, v2, v5, v1, v4 ; F800026F 04010502 s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v1, s4 ; 7E020204 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F v_mov_b32_e32 v2, s5 ; 7E040205 v_mac_f32_e32 v1, s12, v2 ; 3E02040C v_mov_b32_e32 v2, s7 ; 7E040207 v_mov_b32_e32 v4, s6 ; 7E080206 v_mac_f32_e32 v2, s12, v4 ; 3E04080C s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 v_mul_f32_e32 v4, s13, v0 ; 1008000D v_mov_b32_e32 v5, 0x3e22f983 ; 7E0A02FF 3E22F983 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_fract_f32_e32 v4, v4 ; 7E084104 v_cos_f32_e32 v7, v4 ; 7E0E6D04 v_sin_f32_e32 v4, v4 ; 7E086B04 v_mov_b32_e32 v8, 0x3c000000 ; 7E1002FF 3C000000 v_mad_f32 v9, v11, v8, -0.5 ; D2820009 03C6110B v_mad_f32 v10, -v20, v8, -0.5 ; D282000A 23C61114 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 v_mad_f32 v12, v3, v8, -0.5 ; D282000C 03C61103 v_mul_f32_e32 v13, v10, v4 ; 101A090A v_mad_f32 v14, v7, v9, -v13 ; D282000E 84361307 v_mad_f32 v13, v7, v12, -v13 ; D282000D 84361907 v_mul_f32_e32 v15, v10, v7 ; 101E0F0A v_mad_f32 v16, v9, v4, v15 ; D2820010 043E0909 v_mac_f32_e32 v15, v12, v4 ; 3E1E090C s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 s_buffer_load_dword s13, s[0:3], 0x7 ; C2068107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v14, s6, v14, 0.5 ; D282000E 03C21C06 v_mad_f32 v16, s6, v16, 0.5 ; D2820010 03C22006 s_buffer_load_dword s16, s[0:3], 0x15 ; C2080115 v_mad_f32 v13, s6, v13, 0.5 ; D282000D 03C21A06 v_mad_f32 v15, s6, v15, 0.5 ; D282000F 03C21E06 v_add_f32_e32 v14, v1, v14 ; 061C1D01 v_add_f32_e32 v16, v2, v16 ; 06202102 v_add_f32_e32 v13, v1, v13 ; 061A1B01 v_add_f32_e32 v15, v2, v15 ; 061E1F02 exp 15, 39, 0, 0, 0, v14, v16, v13, v15 ; F800027F 0F0D100E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v13, s14 ; 7E1A020E v_mov_b32_e32 v14, s4 ; 7E1C0204 v_mac_f32_e32 v13, s12, v14 ; 3E1A1C0C s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_buffer_load_dword s14, s[0:3], 0x17 ; C2070117 v_mov_b32_e32 v14, s16 ; 7E1C0210 v_mov_b32_e32 v15, s15 ; 7E1E020F v_mac_f32_e32 v14, s12, v15 ; 3E1C1E0C s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s0, s[0:3], 0x11 ; C2000111 v_mul_f32_e32 v15, s5, v0 ; 101E0005 v_mul_f32_e32 v15, v5, v15 ; 101E1F05 v_fract_f32_e32 v15, v15 ; 7E1E410F v_cos_f32_e32 v16, v15 ; 7E206D0F v_sin_f32_e32 v15, v15 ; 7E1E6B0F v_mul_f32_e32 v17, v10, v16 ; 1022210A v_mac_f32_e32 v17, v9, v15 ; 3E221F09 v_mul_f32_e32 v18, v10, v15 ; 10241F0A v_mad_f32 v19, v16, v9, -v18 ; D2820013 844A1310 v_mad_f32 v18, v16, v12, -v18 ; D2820012 844A1910 v_mul_f32_e32 v21, v12, v15 ; 102A1F0C v_mac_f32_e32 v21, v10, v16 ; 3E2A210A v_mad_f32 v19, v19, s7, 0.5 ; D2820013 03C00F13 v_mad_f32 v17, v17, s7, 0.5 ; D2820011 03C00F11 v_mad_f32 v18, v18, s7, 0.5 ; D2820012 03C00F12 v_mad_f32 v21, v21, s7, 0.5 ; D2820015 03C00F15 v_add_f32_e32 v19, v13, v19 ; 0626270D v_add_f32_e32 v17, v14, v17 ; 0622230E v_add_f32_e32 v18, v13, v18 ; 0624250D v_add_f32_e32 v21, v14, v21 ; 062A2B0E exp 15, 40, 0, 0, 0, v19, v17, v18, v21 ; F800028F 15121113 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v17, s4 ; 7E220204 v_mov_b32_e32 v18, s16 ; 7E240210 v_mac_f32_e32 v17, s12, v18 ; 3E22240C v_mov_b32_e32 v18, s14 ; 7E24020E v_mov_b32_e32 v19, s0 ; 7E260200 v_mac_f32_e32 v18, s12, v19 ; 3E24260C v_mul_f32_e32 v0, s15, v0 ; 1000000F v_mul_f32_e32 v0, v5, v0 ; 10000105 v_fract_f32_e32 v0, v0 ; 7E004100 v_cos_f32_e32 v5, v0 ; 7E0A6D00 v_sin_f32_e32 v0, v0 ; 7E006B00 v_mul_f32_e32 v19, v10, v0 ; 1026010A v_mul_f32_e32 v10, v10, v5 ; 10140B0A v_mad_f32 v21, v5, v9, -v19 ; D2820015 844E1305 v_mad_f32 v5, v5, v12, -v19 ; D2820005 844E1905 v_mad_f32 v9, v9, v0, v10 ; D2820009 042A0109 v_mac_f32_e32 v10, v12, v0 ; 3E14010C v_mad_f32 v0, v21, s13, 0.5 ; D2820000 03C01B15 v_mad_f32 v5, s13, v5, 0.5 ; D2820005 03C20A0D v_add_f32_e32 v0, v17, v0 ; 06000111 v_add_f32_e32 v5, v17, v5 ; 060A0B11 v_mad_f32 v9, v9, s13, 0.5 ; D2820009 03C01B09 v_mad_f32 v10, s13, v10, 0.5 ; D282000A 03C2140D v_add_f32_e32 v9, v18, v9 ; 06121312 v_add_f32_e32 v10, v18, v10 ; 06141512 exp 15, 41, 0, 0, 0, v0, v9, v5, v10 ; F800029F 0A050900 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, -v11, v8, -0.5 ; D2820000 23C6110B v_mul_f32_e32 v5, v12, v4 ; 100A090C v_mul_f32_e32 v4, v0, v4 ; 10080900 v_mad_f32 v4, v7, v12, -v4 ; D2820004 84121907 v_mac_f32_e32 v5, v0, v7 ; 3E0A0F00 v_mul_f32_e32 v7, v0, v15 ; 100E1F00 v_mul_f32_e32 v0, v0, v16 ; 10002100 v_mad_f32 v7, v16, v12, -v7 ; D2820007 841E1910 v_mac_f32_e32 v0, v12, v15 ; 3E001F0C v_mad_f32 v4, s6, v4, 0.5 ; D2820004 03C20806 v_add_f32_e32 v1, v1, v4 ; 06020901 v_mad_f32 v4, s6, v5, 0.5 ; D2820004 03C20A06 v_add_f32_e32 v2, v2, v4 ; 06040902 v_mad_f32 v4, v7, s7, 0.5 ; D2820004 03C00F07 v_add_f32_e32 v4, v13, v4 ; 0608090D v_mad_f32 v0, v0, s7, 0.5 ; D2820000 03C00F00 v_add_f32_e32 v0, v14, v0 ; 0600010E exp 15, 42, 0, 0, 0, v1, v2, v4, v0 ; F80002AF 00040201 s_buffer_load_dword s0, s[8:11], 0xf ; C200090F s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903 s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906 s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909 s_buffer_load_dword s15, s[8:11], 0xa ; C207890A s_buffer_load_dword s16, s[8:11], 0xb ; C208090B s_buffer_load_dword s17, s[8:11], 0xc ; C208890C s_buffer_load_dword s18, s[8:11], 0xd ; C209090D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s6, v11 ; 10001606 v_mul_f32_e32 v1, s7, v11 ; 10021607 v_mul_f32_e32 v2, s12, v11 ; 1004160C v_mul_f32_e32 v4, s5, v11 ; 10081605 v_mac_f32_e32 v0, s2, v3 ; 3E000602 v_mac_f32_e32 v1, s3, v3 ; 3E020603 v_mac_f32_e32 v2, s4, v3 ; 3E040604 v_mac_f32_e32 v4, s1, v3 ; 3E080601 v_mac_f32_e32 v0, s14, v20 ; 3E00280E v_mac_f32_e32 v1, s15, v20 ; 3E02280F v_mac_f32_e32 v2, s16, v20 ; 3E042810 v_mac_f32_e32 v4, s13, v20 ; 3E08280D v_add_f32_e32 v0, s18, v0 ; 06000012 v_add_f32_e32 v1, s8, v1 ; 06020208 v_add_f32_e32 v2, s0, v2 ; 06040400 v_add_f32_e32 v3, s17, v4 ; 06060811 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_mad_f32 v1, 2.0, v1, -v2 ; D2820001 840A02F4 exp 15, 12, 0, 0, 0, v3, v0, v1, v2 ; F80000CF 02010003 exp 15, 13, 0, 1, 0, v6, v6, v6, v6 ; F80008DF 06060606 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 56 Code Size: 2196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[6], PERSPECTIVE DCL IN[6], GENERIC[7], PERSPECTIVE DCL IN[7], GENERIC[8], PERSPECTIVE DCL IN[8], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL SVIEW[11], 2D, FLOAT DCL CONST[1][0..24] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 0.0078, 1.0000, 2.0000, 0.0000} IMM[1] UINT32 {0, 368, 384, 304} IMM[2] UINT32 {320, 64, 48, 32} IMM[3] FLT32 { 3.0000, -1.0000, 0.0000, 0.0000} IMM[4] UINT32 {16, 224, 228, 0} IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[0].xyzz, IMM[0].xxxx 4: MOV TEMP[2].x, -TEMP[1].zzzz 5: MOV TEMP[3].z, IMM[0].yyyy 6: MOV TEMP[3].x, TEMP[1].yyyy 7: MOV TEMP[3].y, TEMP[2].xxxx 8: MOV TEMP[4].x, TEMP[1].xxxx 9: MOV TEMP[5].y, IMM[0].yyyy 10: MOV TEMP[5].x, TEMP[2].xxxx 11: MOV TEMP[4].yz, TEMP[5].yxyy 12: MOV TEMP[2].x, TEMP[1].xxxx 13: MOV TEMP[5].y, IMM[0].yyyy 14: MOV TEMP[5].x, -TEMP[1].yyyy 15: MOV TEMP[2].yz, TEMP[5].yxyy 16: DP3 TEMP[1].x, CONST[1][23].xyzz, TEMP[3].xyzz 17: DP3 TEMP[3].x, CONST[1][24].xyzz, TEMP[3].xyzz 18: MOV TEMP[1].y, TEMP[3].xxxx 19: DP3 TEMP[3].x, CONST[1][23].xyzz, TEMP[4].xyzz 20: DP3 TEMP[4].x, CONST[1][24].xyzz, TEMP[4].xyzz 21: MOV TEMP[3].y, TEMP[4].xxxx 22: DP3 TEMP[4].x, CONST[1][23].xyzz, TEMP[2].xyzz 23: DP3 TEMP[5].x, CONST[1][24].xyzz, TEMP[2].xyzz 24: MOV TEMP[4].y, TEMP[5].xxxx 25: DP3 TEMP[5].x, CONST[1][19].xyzz, TEMP[2].xyzz 26: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[2].xyzz 27: MOV TEMP[5].y, TEMP[2].xxxx 28: MUL TEMP[2].x, IN[3].wwww, IMM[0].zzzz 29: MOV_SAT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[6].x, IMM[0].zzzz, IN[4].wwww 31: MOV_SAT TEMP[6].x, TEMP[6].xxxx 32: MOV TEMP[7].xy, IN[6].xyyy 33: TEX TEMP[7], TEMP[7], SAMP[1], 2D 34: MOV TEMP[8].xy, IN[6].zwww 35: TEX TEMP[8], TEMP[8], SAMP[1], 2D 36: LRP TEMP[7], TEMP[2].xxxx, TEMP[8], TEMP[7] 37: MOV TEMP[8].xy, IN[8].zwww 38: TEX TEMP[8], TEMP[8], SAMP[1], 2D 39: LRP TEMP[7], TEMP[6].xxxx, TEMP[8], TEMP[7] 40: MOV TEMP[8].xy, IN[7].xyyy 41: TEX TEMP[8], TEMP[8], SAMP[2], 2D 42: MOV TEMP[9].xy, IN[7].zwww 43: TEX TEMP[9], TEMP[9], SAMP[2], 2D 44: LRP TEMP[8], TEMP[2].xxxx, TEMP[9], TEMP[8] 45: MOV TEMP[9].xy, TEMP[5].xyyy 46: TEX TEMP[9], TEMP[9], SAMP[2], 2D 47: LRP TEMP[8], TEMP[6].xxxx, TEMP[9], TEMP[8] 48: MOV TEMP[9].xy, TEMP[1].xyyy 49: TEX TEMP[9], TEMP[9], SAMP[3], 2D 50: MOV TEMP[10].xy, TEMP[3].xyyy 51: TEX TEMP[10], TEMP[10], SAMP[3], 2D 52: LRP TEMP[9], TEMP[2].xxxx, TEMP[10], TEMP[9] 53: MOV TEMP[10].xy, TEMP[4].xyyy 54: TEX TEMP[10], TEMP[10], SAMP[3], 2D 55: LRP TEMP[9], TEMP[6].xxxx, TEMP[10], TEMP[9] 56: ADD TEMP[10].x, TEMP[7].wwww, -IN[4].xxxx 57: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 58: ADD TEMP[11].x, IN[3].xxxx, -TEMP[10].xxxx 59: ADD TEMP[12].x, IN[4].xxxx, TEMP[7].wwww 60: MIN TEMP[12].x, IMM[0].yyyy, TEMP[12].xxxx 61: ADD TEMP[10].x, TEMP[12].xxxx, -TEMP[10].xxxx 62: RCP TEMP[10].x, TEMP[10].xxxx 63: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 64: MOV_SAT TEMP[10].x, TEMP[10].xxxx 65: MUL TEMP[11].x, IMM[0].zzzz, TEMP[10].xxxx 66: ADD TEMP[11].x, IMM[3].xxxx, -TEMP[11].xxxx 67: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[11].xxxx 68: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 69: ADD TEMP[11].x, IMM[0].yyyy, -TEMP[10].xxxx 70: ADD TEMP[12].x, TEMP[8].wwww, -IN[4].yyyy 71: MAX TEMP[12].x, IMM[0].wwww, TEMP[12].xxxx 72: ADD TEMP[13].x, IN[3].yyyy, -TEMP[12].xxxx 73: ADD TEMP[14].x, IN[4].yyyy, TEMP[8].wwww 74: MIN TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx 75: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 76: RCP TEMP[12].x, TEMP[12].xxxx 77: MUL TEMP[12].x, TEMP[13].xxxx, TEMP[12].xxxx 78: MOV_SAT TEMP[12].x, TEMP[12].xxxx 79: MUL TEMP[13].x, IMM[0].zzzz, TEMP[12].xxxx 80: ADD TEMP[13].x, IMM[3].xxxx, -TEMP[13].xxxx 81: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[13].xxxx 82: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 83: MAX TEMP[12].x, TEMP[12].xxxx, IMM[0].wwww 84: MIN TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 85: ADD TEMP[11].x, TEMP[11].xxxx, -TEMP[12].xxxx 86: ADD TEMP[13].x, TEMP[9].wwww, -IN[4].zzzz 87: MAX TEMP[13].x, IMM[0].wwww, TEMP[13].xxxx 88: ADD TEMP[14].x, IN[3].zzzz, -TEMP[13].xxxx 89: ADD TEMP[15].x, IN[4].zzzz, TEMP[9].wwww 90: MIN TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx 91: ADD TEMP[13].x, TEMP[15].xxxx, -TEMP[13].xxxx 92: RCP TEMP[13].x, TEMP[13].xxxx 93: MUL TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 94: MOV_SAT TEMP[13].x, TEMP[13].xxxx 95: MUL TEMP[14].x, IMM[0].zzzz, TEMP[13].xxxx 96: ADD TEMP[14].x, IMM[3].xxxx, -TEMP[14].xxxx 97: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[14].xxxx 98: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx 99: MAX TEMP[13].x, TEMP[13].xxxx, IMM[0].wwww 100: MIN TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx 101: ADD TEMP[11].x, TEMP[11].xxxx, -TEMP[13].xxxx 102: MUL TEMP[9].xyz, CONST[1][4].xyzz, TEMP[9].xyzz 103: MUL TEMP[8].xyz, CONST[1][3].xyzz, TEMP[8].xyzz 104: MUL TEMP[7].xyz, CONST[1][2].xyzz, TEMP[7].xyzz 105: MOV TEMP[14].xy, IN[5].xyyy 106: TEX TEMP[14], TEMP[14], SAMP[0], 2D 107: MOV TEMP[15].xy, IN[5].zwww 108: TEX TEMP[15], TEMP[15], SAMP[0], 2D 109: LRP TEMP[14], TEMP[2].xxxx, TEMP[15], TEMP[14] 110: MOV TEMP[15].xy, IN[8].xyyy 111: TEX TEMP[15], TEMP[15], SAMP[0], 2D 112: LRP TEMP[14].xyz, TEMP[6].xxxx, TEMP[15], TEMP[14] 113: MUL TEMP[14].xyz, CONST[1][1].xyzz, TEMP[14].xyzz 114: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[11].xxxx 115: MAD TEMP[7].xyz, TEMP[7].xyzz, TEMP[10].xxxx, TEMP[14].xyzz 116: MAD TEMP[7].xyz, TEMP[12].xxxx, TEMP[8].xyzz, TEMP[7].xyzz 117: MAD TEMP[7].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[7].xyzz 118: MUL TEMP[7].xyz, TEMP[7].xyzz, IN[2].xyzz 119: MOV TEMP[8].xy, TEMP[1].xyyy 120: TEX TEMP[8], TEMP[8], SAMP[7], 2D 121: MOV TEMP[9].xy, TEMP[3].xyyy 122: TEX TEMP[9], TEMP[9], SAMP[7], 2D 123: LRP TEMP[8], TEMP[2].xxxx, TEMP[9], TEMP[8] 124: MOV TEMP[9].xy, TEMP[4].xyyy 125: TEX TEMP[9], TEMP[9], SAMP[7], 2D 126: LRP TEMP[8].xyz, TEMP[6].xxxx, TEMP[9], TEMP[8] 127: MOV TEMP[9].xy, IN[7].xyyy 128: TEX TEMP[9], TEMP[9], SAMP[6], 2D 129: MOV TEMP[14].xy, IN[7].zwww 130: TEX TEMP[14], TEMP[14], SAMP[6], 2D 131: LRP TEMP[9], TEMP[2].xxxx, TEMP[14], TEMP[9] 132: MOV TEMP[14].xy, TEMP[5].xyyy 133: TEX TEMP[14], TEMP[14], SAMP[6], 2D 134: LRP TEMP[9].xyz, TEMP[6].xxxx, TEMP[14], TEMP[9] 135: MOV TEMP[14].xy, IN[6].xyyy 136: TEX TEMP[14], TEMP[14], SAMP[5], 2D 137: MOV TEMP[15].xy, IN[6].zwww 138: TEX TEMP[15], TEMP[15], SAMP[5], 2D 139: LRP TEMP[14], TEMP[2].xxxx, TEMP[15], TEMP[14] 140: MOV TEMP[15].xy, IN[8].zwww 141: TEX TEMP[15], TEMP[15], SAMP[5], 2D 142: LRP TEMP[14].xyz, TEMP[6].xxxx, TEMP[15], TEMP[14] 143: MOV TEMP[15].xy, IN[5].xyyy 144: TEX TEMP[15], TEMP[15], SAMP[4], 2D 145: MOV TEMP[16].xy, IN[5].zwww 146: TEX TEMP[16], TEMP[16], SAMP[4], 2D 147: LRP TEMP[15], TEMP[2].xxxx, TEMP[16], TEMP[15] 148: MOV TEMP[16].xy, IN[8].xyyy 149: TEX TEMP[16], TEMP[16], SAMP[4], 2D 150: LRP TEMP[15].xyz, TEMP[6].xxxx, TEMP[16], TEMP[15] 151: MUL TEMP[15].xyz, TEMP[11].xxxx, TEMP[15].xyzz 152: MAD TEMP[14].xyz, TEMP[10].xxxx, TEMP[14].xyzz, TEMP[15].xyzz 153: MAD TEMP[9].xyz, TEMP[12].xxxx, TEMP[9].xyzz, TEMP[14].xyzz 154: MAD TEMP[8].xyz, TEMP[13].xxxx, TEMP[8].xyzz, TEMP[9].xyzz 155: MUL TEMP[9].x, TEMP[8].xxxx, CONST[1][14].xxxx 156: MUL TEMP[14].x, TEMP[8].zzzz, CONST[1][14].yyyy 157: MOV TEMP[1].xy, TEMP[1].xyyy 158: TEX TEMP[1].yw, TEMP[1], SAMP[11], 2D 159: MOV TEMP[15].xy, IN[7].xyyy 160: TEX TEMP[15].yw, TEMP[15], SAMP[10], 2D 161: MOV TEMP[16].xy, IN[5].xyyy 162: TEX TEMP[16].yw, TEMP[16], SAMP[8], 2D 163: MOV TEMP[17].xy, IN[6].xyyy 164: TEX TEMP[17].yw, TEMP[17], SAMP[9], 2D 165: MUL TEMP[17].xy, TEMP[10].xxxx, TEMP[17].wyyy 166: MAD TEMP[16].xy, TEMP[11].xxxx, TEMP[16].wyyy, TEMP[17].xyyy 167: MAD TEMP[15].xy, TEMP[12].xxxx, TEMP[15].wyyy, TEMP[16].xyyy 168: MAD TEMP[1].xy, TEMP[13].xxxx, TEMP[1].wyyy, TEMP[15].xyyy 169: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[3].yyyy 170: MOV TEMP[15].x, TEMP[1].xxxx 171: MOV TEMP[15].y, -TEMP[1].yyyy 172: MUL TEMP[15].xy, TEMP[15].xyyy, CONST[1][0].xxxx 173: MOV TEMP[16].x, TEMP[15].xxxx 174: MOV TEMP[16].y, TEMP[15].yyyy 175: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 176: ADD TEMP[1].x, IMM[0].yyyy, -TEMP[1].xxxx 177: MOV_SAT TEMP[1].x, TEMP[1].xxxx 178: SQRT TEMP[1].x, TEMP[1].xxxx 179: MOV TEMP[16].z, TEMP[1].xxxx 180: DP3 TEMP[1].x, TEMP[16].xyzz, TEMP[16].xyzz 181: RSQ TEMP[1].x, TEMP[1].xxxx 182: MUL TEMP[1].xyz, TEMP[16].xyzz, TEMP[1].xxxx 183: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[0].wyww 184: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 185: ADD TEMP[15].xyz, IMM[0].wyww, -TEMP[15].xyzz 186: DP3 TEMP[16].x, TEMP[15].xyzz, TEMP[15].xyzz 187: SQRT TEMP[16].x, TEMP[16].xxxx 188: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[3].zzzz 189: UIF TEMP[16].xxxx :0 190: MOV TEMP[16].xyz, IMM[0].wyww 191: ELSE :0 192: MOV TEMP[16].xyz, TEMP[15].xyzx 193: ENDIF 194: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[3].wwyy 195: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 196: ADD TEMP[15].xyz, IMM[3].wwyy, -TEMP[15].xyzz 197: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[15].xyzz 198: SQRT TEMP[17].x, TEMP[17].xxxx 199: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[3].zzzz 200: UIF TEMP[17].xxxx :0 201: MOV TEMP[17].xyz, IMM[3].wwyw 202: ELSE :0 203: MOV TEMP[17].xyz, TEMP[15].xyzx 204: ENDIF 205: MUL TEMP[15].xyz, TEMP[17].xyzz, TEMP[1].yyyy 206: MAD TEMP[15].xyz, TEMP[16].xyzz, TEMP[1].xxxx, TEMP[15].xyzz 207: MAD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].zzzz, TEMP[15].xyzz 208: MOV TEMP[3].xy, TEMP[3].xyyy 209: TEX TEMP[3].yw, TEMP[3], SAMP[11], 2D 210: MOV TEMP[15].xy, IN[7].zwww 211: TEX TEMP[15].yw, TEMP[15], SAMP[10], 2D 212: MOV TEMP[16].xy, IN[5].zwww 213: TEX TEMP[16].yw, TEMP[16], SAMP[8], 2D 214: MOV TEMP[18].xy, IN[6].zwww 215: TEX TEMP[18].yw, TEMP[18], SAMP[9], 2D 216: MUL TEMP[18].xy, TEMP[10].xxxx, TEMP[18].wyyy 217: MAD TEMP[16].xy, TEMP[11].xxxx, TEMP[16].wyyy, TEMP[18].xyyy 218: MAD TEMP[15].xy, TEMP[12].xxxx, TEMP[15].wyyy, TEMP[16].xyyy 219: MAD TEMP[3].xy, TEMP[13].xxxx, TEMP[3].wyyy, TEMP[15].xyyy 220: MAD TEMP[3].xy, IMM[0].zzzz, TEMP[3].xyyy, IMM[3].yyyy 221: MOV TEMP[15].x, TEMP[3].xxxx 222: MOV TEMP[15].y, -TEMP[3].yyyy 223: MUL TEMP[15].xy, CONST[1][0].xxxx, TEMP[15].xyyy 224: MOV TEMP[16].x, TEMP[15].xxxx 225: MOV TEMP[16].y, TEMP[15].yyyy 226: DP2 TEMP[3].x, TEMP[3].xyyy, TEMP[3].xyyy 227: ADD TEMP[3].x, IMM[0].yyyy, -TEMP[3].xxxx 228: MOV_SAT TEMP[3].x, TEMP[3].xxxx 229: SQRT TEMP[3].x, TEMP[3].xxxx 230: MOV TEMP[16].z, TEMP[3].xxxx 231: DP3 TEMP[3].x, TEMP[16].xyzz, TEMP[16].xyzz 232: RSQ TEMP[3].x, TEMP[3].xxxx 233: MUL TEMP[3].xyz, TEMP[16].xyzz, TEMP[3].xxxx 234: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[0].ywww 235: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 236: ADD TEMP[15].xyz, IMM[0].ywww, -TEMP[15].xyzz 237: DP3 TEMP[16].x, TEMP[15].xyzz, TEMP[15].xyzz 238: SQRT TEMP[16].x, TEMP[16].xxxx 239: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[3].zzzz 240: UIF TEMP[16].xxxx :0 241: MOV TEMP[16].xyz, IMM[0].ywwy 242: ELSE :0 243: MOV TEMP[16].xyz, TEMP[15].xyzx 244: ENDIF 245: MUL TEMP[15].xyz, TEMP[17].xyzz, TEMP[3].yyyy 246: MAD TEMP[15].xyz, TEMP[16].xyzz, TEMP[3].xxxx, TEMP[15].xyzz 247: MAD TEMP[3].xyz, TEMP[0].xyzz, TEMP[3].zzzz, TEMP[15].xyzz 248: MOV TEMP[4].xy, TEMP[4].xyyy 249: TEX TEMP[4].yw, TEMP[4], SAMP[11], 2D 250: MOV TEMP[5].xy, TEMP[5].xyyy 251: TEX TEMP[5].yw, TEMP[5], SAMP[10], 2D 252: MOV TEMP[15].xy, IN[8].zwww 253: TEX TEMP[15].yw, TEMP[15], SAMP[9], 2D 254: MOV TEMP[17].xy, IN[8].xyyy 255: TEX TEMP[17].yw, TEMP[17], SAMP[8], 2D 256: MUL TEMP[11].xy, TEMP[11].xxxx, TEMP[17].wyyy 257: MAD TEMP[10].xy, TEMP[10].xxxx, TEMP[15].wyyy, TEMP[11].xyyy 258: MAD TEMP[5].xy, TEMP[12].xxxx, TEMP[5].wyyy, TEMP[10].xyyy 259: MAD TEMP[4].xy, TEMP[13].xxxx, TEMP[4].wyyy, TEMP[5].xyyy 260: MAD TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy, IMM[3].yyyy 261: MOV TEMP[5].x, TEMP[4].xxxx 262: MOV TEMP[5].y, -TEMP[4].yyyy 263: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 264: MOV TEMP[10].x, TEMP[5].xxxx 265: MOV TEMP[10].y, TEMP[5].yyyy 266: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 267: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 268: MOV_SAT TEMP[4].x, TEMP[4].xxxx 269: SQRT TEMP[4].x, TEMP[4].xxxx 270: MOV TEMP[10].z, TEMP[4].xxxx 271: DP3 TEMP[4].x, TEMP[10].xyzz, TEMP[10].xyzz 272: RSQ TEMP[4].x, TEMP[4].xxxx 273: MUL TEMP[4].xyz, TEMP[10].xyzz, TEMP[4].xxxx 274: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[3].wyww 275: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 276: ADD TEMP[5].xyz, IMM[3].wyww, -TEMP[5].xyzz 277: DP3 TEMP[10].x, TEMP[5].xyzz, TEMP[5].xyzz 278: SQRT TEMP[10].x, TEMP[10].xxxx 279: FSLT TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz 280: UIF TEMP[10].xxxx :0 281: MOV TEMP[10].xyz, IMM[3].wyww 282: ELSE :0 283: MOV TEMP[10].xyz, TEMP[5].xyzx 284: ENDIF 285: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[1].xyzz 286: MUL TEMP[2].xyz, TEMP[10].xyzz, TEMP[4].yyyy 287: MAD TEMP[2].xyz, TEMP[16].xyzz, TEMP[4].xxxx, TEMP[2].xyzz 288: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[2].xyzz 289: LRP TEMP[0].xyz, TEMP[6].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 290: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx, IMM[5].xxxx 291: MOV TEMP[1].w, IMM[0].wwww 292: MOV TEMP[1].x, TEMP[0].xxxx 293: MOV TEMP[1].y, TEMP[0].yyyy 294: MOV TEMP[1].z, TEMP[0].zzzz 295: MOV TEMP[0].w, IMM[0].wwww 296: MOV TEMP[0].x, TEMP[7].xxxx 297: MOV TEMP[0].y, TEMP[7].yyyy 298: MOV TEMP[0].z, TEMP[7].zzzz 299: MOV TEMP[2].w, IMM[0].wwww 300: MOV TEMP[2].x, TEMP[9].xxxx 301: MOV TEMP[2].y, TEMP[8].yyyy 302: MOV TEMP[2].z, TEMP[14].xxxx 303: MOV OUT[2], IN[0].wwww 304: MOV OUT[0], TEMP[0] 305: MOV OUT[3], TEMP[1] 306: MOV OUT[1], TEMP[2] 307: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0 %85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 11 %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 11 %98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 8, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 8, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 8, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 3, i32 8, i32 %5, <2 x i32> %7) %133 = fmul float %103, %103 %134 = fmul float %104, %104 %135 = fadd float %134, %133 %136 = fmul float %105, %105 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %103, %138 %140 = fmul float %104, %138 %141 = fmul float %105, %138 %142 = fmul float %99, 7.812500e-03 %143 = fmul float %100, 7.812500e-03 %144 = fmul float %101, 7.812500e-03 %145 = fsub float -0.000000e+00, %144 %146 = fsub float -0.000000e+00, %143 %147 = fmul float %45, %143 %148 = fmul float %46, %145 %149 = fadd float %148, %147 %150 = fadd float %149, %47 %151 = fmul float %48, %143 %152 = fmul float %49, %145 %153 = fadd float %152, %151 %154 = fadd float %153, %50 %155 = fmul float %45, %142 %156 = fmul float %46, %145 %157 = fadd float %156, %155 %158 = fadd float %157, %47 %159 = fmul float %48, %142 %160 = fmul float %49, %145 %161 = fadd float %160, %159 %162 = fadd float %161, %50 %163 = fmul float %45, %142 %164 = fmul float %46, %146 %165 = fadd float %164, %163 %166 = fadd float %165, %47 %167 = fmul float %48, %142 %168 = fmul float %49, %146 %169 = fadd float %168, %167 %170 = fadd float %169, %50 %171 = fmul float %39, %142 %172 = fmul float %40, %146 %173 = fadd float %172, %171 %174 = fadd float %173, %41 %175 = fmul float %42, %142 %176 = fmul float %43, %146 %177 = fadd float %176, %175 %178 = fadd float %177, %44 %179 = fmul float %112, 2.000000e+00 %180 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) %181 = fmul float %116, 2.000000e+00 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = bitcast float %121 to i32 %184 = bitcast float %122 to i32 %185 = insertelement <2 x i32> undef, i32 %183, i32 0 %186 = insertelement <2 x i32> %185, i32 %184, i32 1 %187 = bitcast <8 x i32> %56 to <32 x i8> %188 = bitcast <4 x i32> %58 to <16 x i8> %189 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %186, <32 x i8> %187, <16 x i8> %188, i32 2) %190 = extractelement <4 x float> %189, i32 0 %191 = extractelement <4 x float> %189, i32 1 %192 = extractelement <4 x float> %189, i32 2 %193 = extractelement <4 x float> %189, i32 3 %194 = bitcast float %123 to i32 %195 = bitcast float %124 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %56 to <32 x i8> %199 = bitcast <4 x i32> %58 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = extractelement <4 x float> %200, i32 3 %205 = call float @llvm.AMDGPU.lrp(float %180, float %201, float %190) %206 = call float @llvm.AMDGPU.lrp(float %180, float %202, float %191) %207 = call float @llvm.AMDGPU.lrp(float %180, float %203, float %192) %208 = call float @llvm.AMDGPU.lrp(float %180, float %204, float %193) %209 = bitcast float %131 to i32 %210 = bitcast float %132 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = bitcast <8 x i32> %56 to <32 x i8> %214 = bitcast <4 x i32> %58 to <16 x i8> %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %213, <16 x i8> %214, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = call float @llvm.AMDGPU.lrp(float %182, float %216, float %205) %221 = call float @llvm.AMDGPU.lrp(float %182, float %217, float %206) %222 = call float @llvm.AMDGPU.lrp(float %182, float %218, float %207) %223 = call float @llvm.AMDGPU.lrp(float %182, float %219, float %208) %224 = bitcast float %125 to i32 %225 = bitcast float %126 to i32 %226 = insertelement <2 x i32> undef, i32 %224, i32 0 %227 = insertelement <2 x i32> %226, i32 %225, i32 1 %228 = bitcast <8 x i32> %60 to <32 x i8> %229 = bitcast <4 x i32> %62 to <16 x i8> %230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2) %231 = extractelement <4 x float> %230, i32 0 %232 = extractelement <4 x float> %230, i32 1 %233 = extractelement <4 x float> %230, i32 2 %234 = extractelement <4 x float> %230, i32 3 %235 = bitcast float %127 to i32 %236 = bitcast float %128 to i32 %237 = insertelement <2 x i32> undef, i32 %235, i32 0 %238 = insertelement <2 x i32> %237, i32 %236, i32 1 %239 = bitcast <8 x i32> %60 to <32 x i8> %240 = bitcast <4 x i32> %62 to <16 x i8> %241 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %238, <32 x i8> %239, <16 x i8> %240, i32 2) %242 = extractelement <4 x float> %241, i32 0 %243 = extractelement <4 x float> %241, i32 1 %244 = extractelement <4 x float> %241, i32 2 %245 = extractelement <4 x float> %241, i32 3 %246 = call float @llvm.AMDGPU.lrp(float %180, float %242, float %231) %247 = call float @llvm.AMDGPU.lrp(float %180, float %243, float %232) %248 = call float @llvm.AMDGPU.lrp(float %180, float %244, float %233) %249 = call float @llvm.AMDGPU.lrp(float %180, float %245, float %234) %250 = bitcast float %174 to i32 %251 = bitcast float %178 to i32 %252 = insertelement <2 x i32> undef, i32 %250, i32 0 %253 = insertelement <2 x i32> %252, i32 %251, i32 1 %254 = bitcast <8 x i32> %60 to <32 x i8> %255 = bitcast <4 x i32> %62 to <16 x i8> %256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %254, <16 x i8> %255, i32 2) %257 = extractelement <4 x float> %256, i32 0 %258 = extractelement <4 x float> %256, i32 1 %259 = extractelement <4 x float> %256, i32 2 %260 = extractelement <4 x float> %256, i32 3 %261 = call float @llvm.AMDGPU.lrp(float %182, float %257, float %246) %262 = call float @llvm.AMDGPU.lrp(float %182, float %258, float %247) %263 = call float @llvm.AMDGPU.lrp(float %182, float %259, float %248) %264 = call float @llvm.AMDGPU.lrp(float %182, float %260, float %249) %265 = bitcast float %150 to i32 %266 = bitcast float %154 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = bitcast <8 x i32> %64 to <32 x i8> %270 = bitcast <4 x i32> %66 to <16 x i8> %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = extractelement <4 x float> %271, i32 2 %275 = extractelement <4 x float> %271, i32 3 %276 = bitcast float %158 to i32 %277 = bitcast float %162 to i32 %278 = insertelement <2 x i32> undef, i32 %276, i32 0 %279 = insertelement <2 x i32> %278, i32 %277, i32 1 %280 = bitcast <8 x i32> %64 to <32 x i8> %281 = bitcast <4 x i32> %66 to <16 x i8> %282 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = extractelement <4 x float> %282, i32 3 %287 = call float @llvm.AMDGPU.lrp(float %180, float %283, float %272) %288 = call float @llvm.AMDGPU.lrp(float %180, float %284, float %273) %289 = call float @llvm.AMDGPU.lrp(float %180, float %285, float %274) %290 = call float @llvm.AMDGPU.lrp(float %180, float %286, float %275) %291 = bitcast float %166 to i32 %292 = bitcast float %170 to i32 %293 = insertelement <2 x i32> undef, i32 %291, i32 0 %294 = insertelement <2 x i32> %293, i32 %292, i32 1 %295 = bitcast <8 x i32> %64 to <32 x i8> %296 = bitcast <4 x i32> %66 to <16 x i8> %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %294, <32 x i8> %295, <16 x i8> %296, i32 2) %298 = extractelement <4 x float> %297, i32 0 %299 = extractelement <4 x float> %297, i32 1 %300 = extractelement <4 x float> %297, i32 2 %301 = extractelement <4 x float> %297, i32 3 %302 = call float @llvm.AMDGPU.lrp(float %182, float %298, float %287) %303 = call float @llvm.AMDGPU.lrp(float %182, float %299, float %288) %304 = call float @llvm.AMDGPU.lrp(float %182, float %300, float %289) %305 = call float @llvm.AMDGPU.lrp(float %182, float %301, float %290) %306 = fsub float %223, %113 %307 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00) %308 = fsub float %109, %307 %309 = fadd float %113, %223 %310 = call float @llvm.minnum.f32(float %309, float 1.000000e+00) %311 = fsub float %310, %307 %312 = fdiv float 1.000000e+00, %311 %313 = fmul float %308, %312 %314 = call float @llvm.AMDIL.clamp.(float %313, float 0.000000e+00, float 1.000000e+00) %315 = fmul float %314, 2.000000e+00 %316 = fsub float 3.000000e+00, %315 %317 = fmul float %314, %316 %318 = fmul float %314, %317 %319 = fsub float 1.000000e+00, %318 %320 = fsub float %264, %114 %321 = call float @llvm.maxnum.f32(float %320, float 0.000000e+00) %322 = fsub float %110, %321 %323 = fadd float %114, %264 %324 = call float @llvm.minnum.f32(float %323, float 1.000000e+00) %325 = fsub float %324, %321 %326 = fdiv float 1.000000e+00, %325 %327 = fmul float %322, %326 %328 = call float @llvm.AMDIL.clamp.(float %327, float 0.000000e+00, float 1.000000e+00) %329 = fmul float %328, 2.000000e+00 %330 = fsub float 3.000000e+00, %329 %331 = fmul float %328, %330 %332 = fmul float %328, %331 %333 = call float @llvm.maxnum.f32(float %332, float 0.000000e+00) %334 = call float @llvm.minnum.f32(float %333, float %319) %335 = fsub float %319, %334 %336 = fsub float %305, %115 %337 = call float @llvm.maxnum.f32(float %336, float 0.000000e+00) %338 = fsub float %111, %337 %339 = fadd float %115, %305 %340 = call float @llvm.minnum.f32(float %339, float 1.000000e+00) %341 = fsub float %340, %337 %342 = fdiv float 1.000000e+00, %341 %343 = fmul float %338, %342 %344 = call float @llvm.AMDIL.clamp.(float %343, float 0.000000e+00, float 1.000000e+00) %345 = fmul float %344, 2.000000e+00 %346 = fsub float 3.000000e+00, %345 %347 = fmul float %344, %346 %348 = fmul float %344, %347 %349 = call float @llvm.maxnum.f32(float %348, float 0.000000e+00) %350 = call float @llvm.minnum.f32(float %349, float %335) %351 = fsub float %335, %350 %352 = fmul float %34, %302 %353 = fmul float %35, %303 %354 = fmul float %36, %304 %355 = fmul float %31, %261 %356 = fmul float %32, %262 %357 = fmul float %33, %263 %358 = fmul float %28, %220 %359 = fmul float %29, %221 %360 = fmul float %30, %222 %361 = bitcast float %117 to i32 %362 = bitcast float %118 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %52 to <32 x i8> %366 = bitcast <4 x i32> %54 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = extractelement <4 x float> %367, i32 1 %370 = extractelement <4 x float> %367, i32 2 %371 = extractelement <4 x float> %367, i32 3 %372 = bitcast float %119 to i32 %373 = bitcast float %120 to i32 %374 = insertelement <2 x i32> undef, i32 %372, i32 0 %375 = insertelement <2 x i32> %374, i32 %373, i32 1 %376 = bitcast <8 x i32> %52 to <32 x i8> %377 = bitcast <4 x i32> %54 to <16 x i8> %378 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %375, <32 x i8> %376, <16 x i8> %377, i32 2) %379 = extractelement <4 x float> %378, i32 0 %380 = extractelement <4 x float> %378, i32 1 %381 = extractelement <4 x float> %378, i32 2 %382 = extractelement <4 x float> %378, i32 3 %383 = call float @llvm.AMDGPU.lrp(float %180, float %379, float %368) %384 = call float @llvm.AMDGPU.lrp(float %180, float %380, float %369) %385 = call float @llvm.AMDGPU.lrp(float %180, float %381, float %370) %386 = call float @llvm.AMDGPU.lrp(float %180, float %382, float %371) %387 = bitcast float %129 to i32 %388 = bitcast float %130 to i32 %389 = insertelement <2 x i32> undef, i32 %387, i32 0 %390 = insertelement <2 x i32> %389, i32 %388, i32 1 %391 = bitcast <8 x i32> %52 to <32 x i8> %392 = bitcast <4 x i32> %54 to <16 x i8> %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %390, <32 x i8> %391, <16 x i8> %392, i32 2) %394 = extractelement <4 x float> %393, i32 0 %395 = extractelement <4 x float> %393, i32 1 %396 = extractelement <4 x float> %393, i32 2 %397 = call float @llvm.AMDGPU.lrp(float %182, float %394, float %383) %398 = call float @llvm.AMDGPU.lrp(float %182, float %395, float %384) %399 = call float @llvm.AMDGPU.lrp(float %182, float %396, float %385) %400 = fmul float %25, %397 %401 = fmul float %26, %398 %402 = fmul float %27, %399 %403 = fmul float %400, %351 %404 = fmul float %401, %351 %405 = fmul float %402, %351 %406 = fmul float %358, %318 %407 = fadd float %406, %403 %408 = fmul float %359, %318 %409 = fadd float %408, %404 %410 = fmul float %360, %318 %411 = fadd float %410, %405 %412 = fmul float %334, %355 %413 = fadd float %412, %407 %414 = fmul float %334, %356 %415 = fadd float %414, %409 %416 = fmul float %334, %357 %417 = fadd float %416, %411 %418 = fmul float %352, %350 %419 = fadd float %418, %413 %420 = fmul float %353, %350 %421 = fadd float %420, %415 %422 = fmul float %354, %350 %423 = fadd float %422, %417 %424 = fmul float %419, %106 %425 = fmul float %421, %107 %426 = fmul float %423, %108 %427 = bitcast float %150 to i32 %428 = bitcast float %154 to i32 %429 = insertelement <2 x i32> undef, i32 %427, i32 0 %430 = insertelement <2 x i32> %429, i32 %428, i32 1 %431 = bitcast <8 x i32> %80 to <32 x i8> %432 = bitcast <4 x i32> %82 to <16 x i8> %433 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 0 %435 = extractelement <4 x float> %433, i32 1 %436 = extractelement <4 x float> %433, i32 2 %437 = extractelement <4 x float> %433, i32 3 %438 = bitcast float %158 to i32 %439 = bitcast float %162 to i32 %440 = insertelement <2 x i32> undef, i32 %438, i32 0 %441 = insertelement <2 x i32> %440, i32 %439, i32 1 %442 = bitcast <8 x i32> %80 to <32 x i8> %443 = bitcast <4 x i32> %82 to <16 x i8> %444 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %441, <32 x i8> %442, <16 x i8> %443, i32 2) %445 = extractelement <4 x float> %444, i32 0 %446 = extractelement <4 x float> %444, i32 1 %447 = extractelement <4 x float> %444, i32 2 %448 = extractelement <4 x float> %444, i32 3 %449 = call float @llvm.AMDGPU.lrp(float %180, float %445, float %434) %450 = call float @llvm.AMDGPU.lrp(float %180, float %446, float %435) %451 = call float @llvm.AMDGPU.lrp(float %180, float %447, float %436) %452 = call float @llvm.AMDGPU.lrp(float %180, float %448, float %437) %453 = bitcast float %166 to i32 %454 = bitcast float %170 to i32 %455 = insertelement <2 x i32> undef, i32 %453, i32 0 %456 = insertelement <2 x i32> %455, i32 %454, i32 1 %457 = bitcast <8 x i32> %80 to <32 x i8> %458 = bitcast <4 x i32> %82 to <16 x i8> %459 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %456, <32 x i8> %457, <16 x i8> %458, i32 2) %460 = extractelement <4 x float> %459, i32 0 %461 = extractelement <4 x float> %459, i32 1 %462 = extractelement <4 x float> %459, i32 2 %463 = call float @llvm.AMDGPU.lrp(float %182, float %460, float %449) %464 = call float @llvm.AMDGPU.lrp(float %182, float %461, float %450) %465 = call float @llvm.AMDGPU.lrp(float %182, float %462, float %451) %466 = bitcast float %125 to i32 %467 = bitcast float %126 to i32 %468 = insertelement <2 x i32> undef, i32 %466, i32 0 %469 = insertelement <2 x i32> %468, i32 %467, i32 1 %470 = bitcast <8 x i32> %76 to <32 x i8> %471 = bitcast <4 x i32> %78 to <16 x i8> %472 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2) %473 = extractelement <4 x float> %472, i32 0 %474 = extractelement <4 x float> %472, i32 1 %475 = extractelement <4 x float> %472, i32 2 %476 = extractelement <4 x float> %472, i32 3 %477 = bitcast float %127 to i32 %478 = bitcast float %128 to i32 %479 = insertelement <2 x i32> undef, i32 %477, i32 0 %480 = insertelement <2 x i32> %479, i32 %478, i32 1 %481 = bitcast <8 x i32> %76 to <32 x i8> %482 = bitcast <4 x i32> %78 to <16 x i8> %483 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %480, <32 x i8> %481, <16 x i8> %482, i32 2) %484 = extractelement <4 x float> %483, i32 0 %485 = extractelement <4 x float> %483, i32 1 %486 = extractelement <4 x float> %483, i32 2 %487 = extractelement <4 x float> %483, i32 3 %488 = call float @llvm.AMDGPU.lrp(float %180, float %484, float %473) %489 = call float @llvm.AMDGPU.lrp(float %180, float %485, float %474) %490 = call float @llvm.AMDGPU.lrp(float %180, float %486, float %475) %491 = call float @llvm.AMDGPU.lrp(float %180, float %487, float %476) %492 = bitcast float %174 to i32 %493 = bitcast float %178 to i32 %494 = insertelement <2 x i32> undef, i32 %492, i32 0 %495 = insertelement <2 x i32> %494, i32 %493, i32 1 %496 = bitcast <8 x i32> %76 to <32 x i8> %497 = bitcast <4 x i32> %78 to <16 x i8> %498 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 2) %499 = extractelement <4 x float> %498, i32 0 %500 = extractelement <4 x float> %498, i32 1 %501 = extractelement <4 x float> %498, i32 2 %502 = call float @llvm.AMDGPU.lrp(float %182, float %499, float %488) %503 = call float @llvm.AMDGPU.lrp(float %182, float %500, float %489) %504 = call float @llvm.AMDGPU.lrp(float %182, float %501, float %490) %505 = bitcast float %121 to i32 %506 = bitcast float %122 to i32 %507 = insertelement <2 x i32> undef, i32 %505, i32 0 %508 = insertelement <2 x i32> %507, i32 %506, i32 1 %509 = bitcast <8 x i32> %72 to <32 x i8> %510 = bitcast <4 x i32> %74 to <16 x i8> %511 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 2) %512 = extractelement <4 x float> %511, i32 0 %513 = extractelement <4 x float> %511, i32 1 %514 = extractelement <4 x float> %511, i32 2 %515 = extractelement <4 x float> %511, i32 3 %516 = bitcast float %123 to i32 %517 = bitcast float %124 to i32 %518 = insertelement <2 x i32> undef, i32 %516, i32 0 %519 = insertelement <2 x i32> %518, i32 %517, i32 1 %520 = bitcast <8 x i32> %72 to <32 x i8> %521 = bitcast <4 x i32> %74 to <16 x i8> %522 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %519, <32 x i8> %520, <16 x i8> %521, i32 2) %523 = extractelement <4 x float> %522, i32 0 %524 = extractelement <4 x float> %522, i32 1 %525 = extractelement <4 x float> %522, i32 2 %526 = extractelement <4 x float> %522, i32 3 %527 = call float @llvm.AMDGPU.lrp(float %180, float %523, float %512) %528 = call float @llvm.AMDGPU.lrp(float %180, float %524, float %513) %529 = call float @llvm.AMDGPU.lrp(float %180, float %525, float %514) %530 = call float @llvm.AMDGPU.lrp(float %180, float %526, float %515) %531 = bitcast float %131 to i32 %532 = bitcast float %132 to i32 %533 = insertelement <2 x i32> undef, i32 %531, i32 0 %534 = insertelement <2 x i32> %533, i32 %532, i32 1 %535 = bitcast <8 x i32> %72 to <32 x i8> %536 = bitcast <4 x i32> %74 to <16 x i8> %537 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %534, <32 x i8> %535, <16 x i8> %536, i32 2) %538 = extractelement <4 x float> %537, i32 0 %539 = extractelement <4 x float> %537, i32 1 %540 = extractelement <4 x float> %537, i32 2 %541 = call float @llvm.AMDGPU.lrp(float %182, float %538, float %527) %542 = call float @llvm.AMDGPU.lrp(float %182, float %539, float %528) %543 = call float @llvm.AMDGPU.lrp(float %182, float %540, float %529) %544 = bitcast float %117 to i32 %545 = bitcast float %118 to i32 %546 = insertelement <2 x i32> undef, i32 %544, i32 0 %547 = insertelement <2 x i32> %546, i32 %545, i32 1 %548 = bitcast <8 x i32> %68 to <32 x i8> %549 = bitcast <4 x i32> %70 to <16 x i8> %550 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 2) %551 = extractelement <4 x float> %550, i32 0 %552 = extractelement <4 x float> %550, i32 1 %553 = extractelement <4 x float> %550, i32 2 %554 = extractelement <4 x float> %550, i32 3 %555 = bitcast float %119 to i32 %556 = bitcast float %120 to i32 %557 = insertelement <2 x i32> undef, i32 %555, i32 0 %558 = insertelement <2 x i32> %557, i32 %556, i32 1 %559 = bitcast <8 x i32> %68 to <32 x i8> %560 = bitcast <4 x i32> %70 to <16 x i8> %561 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %558, <32 x i8> %559, <16 x i8> %560, i32 2) %562 = extractelement <4 x float> %561, i32 0 %563 = extractelement <4 x float> %561, i32 1 %564 = extractelement <4 x float> %561, i32 2 %565 = extractelement <4 x float> %561, i32 3 %566 = call float @llvm.AMDGPU.lrp(float %180, float %562, float %551) %567 = call float @llvm.AMDGPU.lrp(float %180, float %563, float %552) %568 = call float @llvm.AMDGPU.lrp(float %180, float %564, float %553) %569 = call float @llvm.AMDGPU.lrp(float %180, float %565, float %554) %570 = bitcast float %129 to i32 %571 = bitcast float %130 to i32 %572 = insertelement <2 x i32> undef, i32 %570, i32 0 %573 = insertelement <2 x i32> %572, i32 %571, i32 1 %574 = bitcast <8 x i32> %68 to <32 x i8> %575 = bitcast <4 x i32> %70 to <16 x i8> %576 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %573, <32 x i8> %574, <16 x i8> %575, i32 2) %577 = extractelement <4 x float> %576, i32 0 %578 = extractelement <4 x float> %576, i32 1 %579 = extractelement <4 x float> %576, i32 2 %580 = call float @llvm.AMDGPU.lrp(float %182, float %577, float %566) %581 = call float @llvm.AMDGPU.lrp(float %182, float %578, float %567) %582 = call float @llvm.AMDGPU.lrp(float %182, float %579, float %568) %583 = fmul float %351, %580 %584 = fmul float %351, %581 %585 = fmul float %351, %582 %586 = fmul float %318, %541 %587 = fadd float %586, %583 %588 = fmul float %318, %542 %589 = fadd float %588, %584 %590 = fmul float %318, %543 %591 = fadd float %590, %585 %592 = fmul float %334, %502 %593 = fadd float %592, %587 %594 = fmul float %334, %503 %595 = fadd float %594, %589 %596 = fmul float %334, %504 %597 = fadd float %596, %591 %598 = fmul float %350, %463 %599 = fadd float %598, %593 %600 = fmul float %350, %464 %601 = fadd float %600, %595 %602 = fmul float %350, %465 %603 = fadd float %602, %597 %604 = fmul float %599, %37 %605 = fmul float %603, %38 %606 = bitcast float %150 to i32 %607 = bitcast float %154 to i32 %608 = insertelement <2 x i32> undef, i32 %606, i32 0 %609 = insertelement <2 x i32> %608, i32 %607, i32 1 %610 = bitcast <8 x i32> %96 to <32 x i8> %611 = bitcast <4 x i32> %98 to <16 x i8> %612 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %609, <32 x i8> %610, <16 x i8> %611, i32 2) %613 = extractelement <4 x float> %612, i32 1 %614 = extractelement <4 x float> %612, i32 3 %615 = bitcast float %125 to i32 %616 = bitcast float %126 to i32 %617 = insertelement <2 x i32> undef, i32 %615, i32 0 %618 = insertelement <2 x i32> %617, i32 %616, i32 1 %619 = bitcast <8 x i32> %92 to <32 x i8> %620 = bitcast <4 x i32> %94 to <16 x i8> %621 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %618, <32 x i8> %619, <16 x i8> %620, i32 2) %622 = extractelement <4 x float> %621, i32 1 %623 = extractelement <4 x float> %621, i32 3 %624 = bitcast float %117 to i32 %625 = bitcast float %118 to i32 %626 = insertelement <2 x i32> undef, i32 %624, i32 0 %627 = insertelement <2 x i32> %626, i32 %625, i32 1 %628 = bitcast <8 x i32> %84 to <32 x i8> %629 = bitcast <4 x i32> %86 to <16 x i8> %630 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %627, <32 x i8> %628, <16 x i8> %629, i32 2) %631 = extractelement <4 x float> %630, i32 1 %632 = extractelement <4 x float> %630, i32 3 %633 = bitcast float %121 to i32 %634 = bitcast float %122 to i32 %635 = insertelement <2 x i32> undef, i32 %633, i32 0 %636 = insertelement <2 x i32> %635, i32 %634, i32 1 %637 = bitcast <8 x i32> %88 to <32 x i8> %638 = bitcast <4 x i32> %90 to <16 x i8> %639 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %636, <32 x i8> %637, <16 x i8> %638, i32 2) %640 = extractelement <4 x float> %639, i32 1 %641 = extractelement <4 x float> %639, i32 3 %642 = fmul float %318, %641 %643 = fmul float %318, %640 %644 = fmul float %351, %632 %645 = fadd float %644, %642 %646 = fmul float %351, %631 %647 = fadd float %646, %643 %648 = fmul float %334, %623 %649 = fadd float %648, %645 %650 = fmul float %334, %622 %651 = fadd float %650, %647 %652 = fmul float %350, %614 %653 = fadd float %652, %649 %654 = fmul float %350, %613 %655 = fadd float %654, %651 %656 = fmul float %653, 2.000000e+00 %657 = fadd float %656, -1.000000e+00 %658 = fmul float %655, 2.000000e+00 %659 = fadd float %658, -1.000000e+00 %660 = fmul float %657, %24 %661 = fmul float %659, %24 %662 = fmul float %657, %657 %663 = fmul float %659, %659 %664 = fadd float %662, %663 %665 = fsub float 1.000000e+00, %664 %666 = call float @llvm.AMDIL.clamp.(float %665, float 0.000000e+00, float 1.000000e+00) %667 = call float @llvm.sqrt.f32(float %666) %668 = fmul float %660, %660 %669 = fmul float %661, %661 %670 = fadd float %669, %668 %671 = fmul float %667, %667 %672 = fadd float %670, %671 %673 = call float @llvm.AMDGPU.rsq.clamped.f32(float %672) %674 = fmul float %660, %673 %675 = fmul float %661, %673 %676 = fsub float -0.000000e+00, %675 %677 = fmul float %667, %673 %678 = fmul float %139, 0.000000e+00 %679 = fadd float %140, %678 %680 = fmul float %141, 0.000000e+00 %681 = fadd float %679, %680 %682 = fmul float %681, %139 %683 = fmul float %681, %140 %684 = fmul float %681, %141 %685 = fsub float 0.000000e+00, %682 %686 = fsub float 1.000000e+00, %683 %687 = fsub float 0.000000e+00, %684 %688 = fmul float %685, %685 %689 = fmul float %686, %686 %690 = fadd float %689, %688 %691 = fmul float %687, %687 %692 = fadd float %690, %691 %693 = call float @llvm.sqrt.f32(float %692) %694 = fcmp olt float %693, 0x3EE4F8B580000000 %. = select i1 %694, float 0.000000e+00, float %685 %.85 = select i1 %694, float 1.000000e+00, float %686 %.86 = select i1 %694, float 0.000000e+00, float %687 %695 = fmul float %139, 0.000000e+00 %696 = fmul float %140, 0.000000e+00 %697 = fadd float %696, %695 %698 = fsub float %697, %141 %699 = fmul float %698, %139 %700 = fmul float %698, %140 %701 = fmul float %698, %141 %702 = fsub float 0.000000e+00, %699 %703 = fsub float 0.000000e+00, %700 %704 = fsub float -1.000000e+00, %701 %705 = fmul float %702, %702 %706 = fmul float %703, %703 %707 = fadd float %706, %705 %708 = fmul float %704, %704 %709 = fadd float %707, %708 %710 = call float @llvm.sqrt.f32(float %709) %711 = fcmp olt float %710, 0x3EE4F8B580000000 %temp68.0 = select i1 %711, float 0.000000e+00, float %702 %temp69.0 = select i1 %711, float 0.000000e+00, float %703 %temp70.0 = select i1 %711, float -1.000000e+00, float %704 %712 = fmul float %temp68.0, %676 %713 = fmul float %temp69.0, %676 %714 = fmul float %temp70.0, %676 %715 = fmul float %., %674 %716 = fadd float %715, %712 %717 = fmul float %.85, %674 %718 = fadd float %717, %713 %719 = fmul float %.86, %674 %720 = fadd float %719, %714 %721 = fmul float %139, %677 %722 = fadd float %721, %716 %723 = fmul float %140, %677 %724 = fadd float %723, %718 %725 = fmul float %141, %677 %726 = fadd float %725, %720 %727 = bitcast float %158 to i32 %728 = bitcast float %162 to i32 %729 = insertelement <2 x i32> undef, i32 %727, i32 0 %730 = insertelement <2 x i32> %729, i32 %728, i32 1 %731 = bitcast <8 x i32> %96 to <32 x i8> %732 = bitcast <4 x i32> %98 to <16 x i8> %733 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %730, <32 x i8> %731, <16 x i8> %732, i32 2) %734 = extractelement <4 x float> %733, i32 1 %735 = extractelement <4 x float> %733, i32 3 %736 = bitcast float %127 to i32 %737 = bitcast float %128 to i32 %738 = insertelement <2 x i32> undef, i32 %736, i32 0 %739 = insertelement <2 x i32> %738, i32 %737, i32 1 %740 = bitcast <8 x i32> %92 to <32 x i8> %741 = bitcast <4 x i32> %94 to <16 x i8> %742 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %739, <32 x i8> %740, <16 x i8> %741, i32 2) %743 = extractelement <4 x float> %742, i32 1 %744 = extractelement <4 x float> %742, i32 3 %745 = bitcast float %119 to i32 %746 = bitcast float %120 to i32 %747 = insertelement <2 x i32> undef, i32 %745, i32 0 %748 = insertelement <2 x i32> %747, i32 %746, i32 1 %749 = bitcast <8 x i32> %84 to <32 x i8> %750 = bitcast <4 x i32> %86 to <16 x i8> %751 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %748, <32 x i8> %749, <16 x i8> %750, i32 2) %752 = extractelement <4 x float> %751, i32 1 %753 = extractelement <4 x float> %751, i32 3 %754 = bitcast float %123 to i32 %755 = bitcast float %124 to i32 %756 = insertelement <2 x i32> undef, i32 %754, i32 0 %757 = insertelement <2 x i32> %756, i32 %755, i32 1 %758 = bitcast <8 x i32> %88 to <32 x i8> %759 = bitcast <4 x i32> %90 to <16 x i8> %760 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %757, <32 x i8> %758, <16 x i8> %759, i32 2) %761 = extractelement <4 x float> %760, i32 1 %762 = extractelement <4 x float> %760, i32 3 %763 = fmul float %318, %762 %764 = fmul float %318, %761 %765 = fmul float %351, %753 %766 = fadd float %765, %763 %767 = fmul float %351, %752 %768 = fadd float %767, %764 %769 = fmul float %334, %744 %770 = fadd float %769, %766 %771 = fmul float %334, %743 %772 = fadd float %771, %768 %773 = fmul float %350, %735 %774 = fadd float %773, %770 %775 = fmul float %350, %734 %776 = fadd float %775, %772 %777 = fmul float %774, 2.000000e+00 %778 = fadd float %777, -1.000000e+00 %779 = fmul float %776, 2.000000e+00 %780 = fadd float %779, -1.000000e+00 %781 = fmul float %24, %778 %782 = fmul float %780, %24 %783 = fmul float %778, %778 %784 = fmul float %780, %780 %785 = fadd float %783, %784 %786 = fsub float 1.000000e+00, %785 %787 = call float @llvm.AMDIL.clamp.(float %786, float 0.000000e+00, float 1.000000e+00) %788 = call float @llvm.sqrt.f32(float %787) %789 = fmul float %781, %781 %790 = fmul float %782, %782 %791 = fadd float %790, %789 %792 = fmul float %788, %788 %793 = fadd float %791, %792 %794 = call float @llvm.AMDGPU.rsq.clamped.f32(float %793) %795 = fmul float %781, %794 %796 = fmul float %782, %794 %797 = fsub float -0.000000e+00, %796 %798 = fmul float %788, %794 %799 = fmul float %140, 0.000000e+00 %800 = fadd float %799, %139 %801 = fmul float %141, 0.000000e+00 %802 = fadd float %800, %801 %803 = fmul float %802, %139 %804 = fmul float %802, %140 %805 = fmul float %802, %141 %806 = fsub float 1.000000e+00, %803 %807 = fsub float 0.000000e+00, %804 %808 = fsub float 0.000000e+00, %805 %809 = fmul float %806, %806 %810 = fmul float %807, %807 %811 = fadd float %810, %809 %812 = fmul float %808, %808 %813 = fadd float %811, %812 %814 = call float @llvm.sqrt.f32(float %813) %815 = fcmp olt float %814, 0x3EE4F8B580000000 %.87 = select i1 %815, float 1.000000e+00, float %806 %.88 = select i1 %815, float 0.000000e+00, float %807 %.89 = select i1 %815, float 0.000000e+00, float %808 %816 = fmul float %temp68.0, %797 %817 = fmul float %temp69.0, %797 %818 = fmul float %temp70.0, %797 %819 = fmul float %.87, %795 %820 = fadd float %819, %816 %821 = fmul float %.88, %795 %822 = fadd float %821, %817 %823 = fmul float %.89, %795 %824 = fadd float %823, %818 %825 = fmul float %139, %798 %826 = fadd float %825, %820 %827 = fmul float %140, %798 %828 = fadd float %827, %822 %829 = fmul float %141, %798 %830 = fadd float %829, %824 %831 = bitcast float %166 to i32 %832 = bitcast float %170 to i32 %833 = insertelement <2 x i32> undef, i32 %831, i32 0 %834 = insertelement <2 x i32> %833, i32 %832, i32 1 %835 = bitcast <8 x i32> %96 to <32 x i8> %836 = bitcast <4 x i32> %98 to <16 x i8> %837 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %834, <32 x i8> %835, <16 x i8> %836, i32 2) %838 = extractelement <4 x float> %837, i32 1 %839 = extractelement <4 x float> %837, i32 3 %840 = bitcast float %174 to i32 %841 = bitcast float %178 to i32 %842 = insertelement <2 x i32> undef, i32 %840, i32 0 %843 = insertelement <2 x i32> %842, i32 %841, i32 1 %844 = bitcast <8 x i32> %92 to <32 x i8> %845 = bitcast <4 x i32> %94 to <16 x i8> %846 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %843, <32 x i8> %844, <16 x i8> %845, i32 2) %847 = extractelement <4 x float> %846, i32 1 %848 = extractelement <4 x float> %846, i32 3 %849 = bitcast float %131 to i32 %850 = bitcast float %132 to i32 %851 = insertelement <2 x i32> undef, i32 %849, i32 0 %852 = insertelement <2 x i32> %851, i32 %850, i32 1 %853 = bitcast <8 x i32> %88 to <32 x i8> %854 = bitcast <4 x i32> %90 to <16 x i8> %855 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %852, <32 x i8> %853, <16 x i8> %854, i32 2) %856 = extractelement <4 x float> %855, i32 1 %857 = extractelement <4 x float> %855, i32 3 %858 = bitcast float %129 to i32 %859 = bitcast float %130 to i32 %860 = insertelement <2 x i32> undef, i32 %858, i32 0 %861 = insertelement <2 x i32> %860, i32 %859, i32 1 %862 = bitcast <8 x i32> %84 to <32 x i8> %863 = bitcast <4 x i32> %86 to <16 x i8> %864 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %861, <32 x i8> %862, <16 x i8> %863, i32 2) %865 = extractelement <4 x float> %864, i32 1 %866 = extractelement <4 x float> %864, i32 3 %867 = fmul float %351, %866 %868 = fmul float %351, %865 %869 = fmul float %318, %857 %870 = fadd float %869, %867 %871 = fmul float %318, %856 %872 = fadd float %871, %868 %873 = fmul float %334, %848 %874 = fadd float %873, %870 %875 = fmul float %334, %847 %876 = fadd float %875, %872 %877 = fmul float %350, %839 %878 = fadd float %877, %874 %879 = fmul float %350, %838 %880 = fadd float %879, %876 %881 = fmul float %878, 2.000000e+00 %882 = fadd float %881, -1.000000e+00 %883 = fmul float %880, 2.000000e+00 %884 = fadd float %883, -1.000000e+00 %885 = fmul float %882, %24 %886 = fmul float %884, %24 %887 = fmul float %882, %882 %888 = fmul float %884, %884 %889 = fadd float %887, %888 %890 = fsub float 1.000000e+00, %889 %891 = call float @llvm.AMDIL.clamp.(float %890, float 0.000000e+00, float 1.000000e+00) %892 = call float @llvm.sqrt.f32(float %891) %893 = fmul float %885, %885 %894 = fmul float %886, %886 %895 = fadd float %894, %893 %896 = fmul float %892, %892 %897 = fadd float %895, %896 %898 = call float @llvm.AMDGPU.rsq.clamped.f32(float %897) %899 = fmul float %885, %898 %900 = fmul float %886, %898 %901 = fsub float -0.000000e+00, %900 %902 = fmul float %892, %898 %903 = fmul float %139, 0.000000e+00 %904 = fsub float %903, %140 %905 = fmul float %141, 0.000000e+00 %906 = fadd float %904, %905 %907 = fmul float %906, %139 %908 = fmul float %906, %140 %909 = fmul float %906, %141 %910 = fsub float 0.000000e+00, %907 %911 = fsub float -1.000000e+00, %908 %912 = fsub float 0.000000e+00, %909 %913 = fmul float %910, %910 %914 = fmul float %911, %911 %915 = fadd float %914, %913 %916 = fmul float %912, %912 %917 = fadd float %915, %916 %918 = call float @llvm.sqrt.f32(float %917) %919 = fcmp olt float %918, 0x3EE4F8B580000000 %temp40.0 = select i1 %919, float 0.000000e+00, float %910 %temp41.0 = select i1 %919, float -1.000000e+00, float %911 %temp42.0 = select i1 %919, float 0.000000e+00, float %912 %920 = call float @llvm.AMDGPU.lrp(float %180, float %826, float %722) %921 = call float @llvm.AMDGPU.lrp(float %180, float %828, float %724) %922 = call float @llvm.AMDGPU.lrp(float %180, float %830, float %726) %923 = fmul float %temp40.0, %901 %924 = fmul float %temp41.0, %901 %925 = fmul float %temp42.0, %901 %926 = fmul float %.87, %899 %927 = fadd float %926, %923 %928 = fmul float %.88, %899 %929 = fadd float %928, %924 %930 = fmul float %.89, %899 %931 = fadd float %930, %925 %932 = fmul float %139, %902 %933 = fadd float %932, %927 %934 = fmul float %140, %902 %935 = fadd float %934, %929 %936 = fmul float %141, %902 %937 = fadd float %936, %931 %938 = call float @llvm.AMDGPU.lrp(float %182, float %933, float %920) %939 = call float @llvm.AMDGPU.lrp(float %182, float %935, float %921) %940 = call float @llvm.AMDGPU.lrp(float %182, float %937, float %922) %941 = fmul float %938, 5.000000e-01 %942 = fadd float %941, 5.000000e-01 %943 = fmul float %939, 5.000000e-01 %944 = fadd float %943, 5.000000e-01 %945 = fmul float %940, 5.000000e-01 %946 = fadd float %945, 5.000000e-01 %947 = call i32 @llvm.SI.packf16(float %424, float %425) %948 = bitcast i32 %947 to float %949 = call i32 @llvm.SI.packf16(float %426, float 0.000000e+00) %950 = bitcast i32 %949 to float %951 = call i32 @llvm.SI.packf16(float %604, float %601) %952 = bitcast i32 %951 to float %953 = call i32 @llvm.SI.packf16(float %605, float 0.000000e+00) %954 = bitcast i32 %953 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %948, float %950, float %948, float %950) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %952, float %954, float %952, float %954) %955 = call i32 @llvm.SI.packf16(float %942, float %944) %956 = bitcast i32 %955 to float %957 = call i32 @llvm.SI.packf16(float %946, float 0.000000e+00) %958 = bitcast i32 %957 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %102, float %102, float %102, float %102) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %956, float %958, float %956, float %958) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v41, v0, 0, 0, [m0] ; C8A40000 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p2_f32 v41, [v41], v1, 0, 0, [m0] ; C8A50001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v42, v0, 1, 0, [m0] ; C8A80100 v_interp_p2_f32 v42, [v42], v1, 1, 0, [m0] ; C8A90101 v_interp_p1_f32 v53, v0, 2, 0, [m0] ; C8D40200 v_interp_p2_f32 v53, [v53], v1, 2, 0, [m0] ; C8D50201 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s25, s[0:3], 0x4d ; C20C814D s_buffer_load_dword s19, s[0:3], 0x4e ; C209814E s_buffer_load_dword s24, s[0:3], 0x50 ; C20C0150 s_buffer_load_dword s26, s[0:3], 0x51 ; C20D0151 s_buffer_load_dword s27, s[0:3], 0x52 ; C20D8152 s_buffer_load_dword s14, s[0:3], 0x5c ; C207015C s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E s_buffer_load_dword s12, s[0:3], 0x60 ; C2060160 s_buffer_load_dword s13, s[0:3], 0x61 ; C2068161 s_buffer_load_dword s17, s[0:3], 0x62 ; C2088162 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600 v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v9, v0, 2, 3, [m0] ; C8240E00 v_interp_p2_f32 v9, [v9], v1, 2, 3, [m0] ; C8250E01 v_interp_p1_f32 v16, v0, 3, 3, [m0] ; C8400F00 v_interp_p2_f32 v16, [v16], v1, 3, 3, [m0] ; C8410F01 v_interp_p1_f32 v14, v0, 0, 4, [m0] ; C8381000 v_interp_p2_f32 v14, [v14], v1, 0, 4, [m0] ; C8391001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 v_interp_p1_f32 v11, v0, 2, 4, [m0] ; C82C1200 v_interp_p2_f32 v11, [v11], v1, 2, 4, [m0] ; C82D1201 v_interp_p1_f32 v15, v0, 3, 4, [m0] ; C83C1300 v_interp_p2_f32 v15, [v15], v1, 3, 4, [m0] ; C83D1301 v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400 v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401 v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500 v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501 s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 v_interp_p1_f32 v23, v0, 2, 5, [m0] ; C85C1600 v_interp_p2_f32 v23, [v23], v1, 2, 5, [m0] ; C85D1601 v_interp_p1_f32 v24, v0, 3, 5, [m0] ; C8601700 v_interp_p2_f32 v24, [v24], v1, 3, 5, [m0] ; C8611701 v_interp_p1_f32 v54, v0, 0, 6, [m0] ; C8D81800 v_interp_p2_f32 v54, [v54], v1, 0, 6, [m0] ; C8D91801 v_interp_p1_f32 v55, v0, 1, 6, [m0] ; C8DC1900 v_interp_p2_f32 v55, [v55], v1, 1, 6, [m0] ; C8DD1901 v_interp_p1_f32 v25, v0, 2, 6, [m0] ; C8641A00 v_interp_p2_f32 v25, [v25], v1, 2, 6, [m0] ; C8651A01 v_interp_p1_f32 v26, v0, 3, 6, [m0] ; C8681B00 v_interp_p2_f32 v26, [v26], v1, 3, 6, [m0] ; C8691B01 v_interp_p1_f32 v56, v0, 0, 7, [m0] ; C8E01C00 v_interp_p2_f32 v56, [v56], v1, 0, 7, [m0] ; C8E11C01 v_interp_p1_f32 v57, v0, 1, 7, [m0] ; C8E41D00 v_interp_p2_f32 v57, [v57], v1, 1, 7, [m0] ; C8E51D01 v_interp_p1_f32 v35, v0, 2, 7, [m0] ; C88C1E00 v_interp_p2_f32 v35, [v35], v1, 2, 7, [m0] ; C88D1E01 v_interp_p1_f32 v36, v0, 3, 7, [m0] ; C8901F00 v_interp_p2_f32 v36, [v36], v1, 3, 7, [m0] ; C8911F01 v_interp_p1_f32 v17, v0, 0, 8, [m0] ; C8442000 v_interp_p2_f32 v17, [v17], v1, 0, 8, [m0] ; C8452001 v_interp_p1_f32 v18, v0, 1, 8, [m0] ; C8482100 v_interp_p2_f32 v18, [v18], v1, 1, 8, [m0] ; C8492101 s_load_dwordx8 s[64:71], s[6:7], 0x8 ; C0E00708 v_interp_p1_f32 v19, v0, 2, 8, [m0] ; C84C2200 v_interp_p2_f32 v19, [v19], v1, 2, 8, [m0] ; C84D2201 v_interp_p1_f32 v20, v0, 3, 8, [m0] ; C8502300 v_interp_p2_f32 v20, [v20], v1, 3, 8, [m0] ; C8512301 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[64:71], s[28:31] ; F0800F00 00F01B36 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[64:71], s[28:31] ; F0800F00 00F01F19 image_sample v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[64:71], s[28:31] ; F0800F00 00F02513 v_mov_b32_e32 v58, 0x3c000000 ; 7E7402FF 3C000000 v_mul_f32_e32 v59, v58, v42 ; 1076553A v_mul_f32_e32 v0, s25, v59 ; 10007619 v_mul_f32_e32 v60, v58, v41 ; 1078533A v_mad_f32 v0, s18, v60, -v0 ; D2820000 84027812 v_add_f32_e32 v0, s19, v0 ; 06000013 v_mul_f32_e32 v1, s26, v59 ; 1002761A v_mad_f32 v1, s24, v60, -v1 ; D2820001 84067818 v_add_f32_e32 v1, s27, v1 ; 0602021B image_sample v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[56:63], s[20:23] ; F0800F00 00AE2938 image_sample v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[56:63], s[20:23] ; F0800F00 00AE2D23 image_sample v[49:52], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[56:63], s[20:23] ; F0800F00 00AE3100 v_mul_f32_e32 v53, v58, v53 ; 106A6B3A v_mul_f32_e32 v58, s15, v53 ; 10746A0F v_mad_f32 v58, s14, v60, -v58 ; D282003A 84EA780E v_mul_f32_e32 v61, s13, v53 ; 107A6A0D v_mad_f32 v61, s12, v60, -v61 ; D282003D 84F6780C v_add_f32_e32 v62, s16, v58 ; 067C7410 v_add_f32_e32 v63, s17, v61 ; 067E7A11 v_mov_b32_e32 v58, s16 ; 7E740210 v_mad_f32 v58, s14, v59, v58 ; D282003A 04EA760E v_mul_f32_e32 v61, s15, v59 ; 107A760F v_mad_f32 v61, s14, v60, -v61 ; D282003D 84F6780E v_add_f32_e32 v64, s16, v61 ; 06807A10 v_mad_f32 v66, s15, -v53, v58 ; D2820042 44EA6A0F v_mul_f32_e32 v58, s13, v59 ; 1074760D s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_mad_f32 v58, s12, v60, -v58 ; D282003A 84EA780C v_add_f32_e32 v65, s17, v58 ; 06827411 v_mov_b32_e32 v58, s17 ; 7E740211 v_mad_f32 v58, s12, v59, v58 ; D282003A 04EA760C v_mad_f32 v67, s13, -v53, v58 ; D2820043 44EA6A0D image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[48:55], s[44:47] ; F0800F00 016C3A42 image_sample v[68:71], 15, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[48:55], s[44:47] ; F0800F00 016C443E image_sample v[72:75], 15, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[48:55], s[44:47] ; F0800F00 016C4840 s_load_dwordx4 s[44:47], s[4:5], 0x1c ; C096051C s_load_dwordx8 s[48:55], s[6:7], 0x38 ; C0D80738 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 image_sample v[76:78], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[20:27], s[8:11] ; F0800700 00454C15 image_sample v[79:81], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[20:27], s[8:11] ; F0800700 00454F17 image_sample v[82:84], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[20:27], s[8:11] ; F0800700 00455211 s_load_dwordx4 s[68:71], s[4:5], 0x18 ; C0A20518 s_load_dwordx8 s[72:79], s[6:7], 0x30 ; C0E40730 s_load_dwordx4 s[8:11], s[4:5], 0x20 ; C0840520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[12:19], s[6:7], 0x40 ; C0C60740 s_load_dwordx8 s[24:31], s[6:7], 0x48 ; C0CC0748 s_waitcnt vmcnt(3) ; BF8C0773 image_sample v[85:87], 7, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[48:55], s[44:47] ; F0800700 016C5542 s_load_dwordx4 s[80:83], s[4:5], 0x14 ; C0A80514 s_load_dwordx8 s[84:91], s[6:7], 0x28 ; C0EA0728 image_sample v[88:90], 7, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[48:55], s[44:47] ; F0800700 016C583E image_sample v[91:93], 7, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[48:55], s[44:47] ; F0800700 016C5B40 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 image_sample v[94:96], 7, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[72:79], s[68:71] ; F0800700 02325E38 s_load_dwordx4 s[44:47], s[4:5], 0x28 ; C0960528 s_load_dwordx4 s[56:59], s[4:5], 0x2c ; C09C052C s_load_dwordx8 s[60:67], s[6:7], 0x58 ; C0DE0758 s_load_dwordx8 s[48:55], s[6:7], 0x50 ; C0D80750 image_sample v[97:99], 7, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[72:79], s[68:71] ; F0800700 02326123 image_sample v[100:102], 7, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[72:79], s[68:71] ; F0800700 02326400 s_waitcnt vmcnt(5) ; BF8C0775 image_sample v[103:105], 7, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[84:91], s[80:83] ; F0800700 02956736 image_sample v[106:108], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[84:91], s[80:83] ; F0800700 02956A19 image_sample v[109:111], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[84:91], s[80:83] ; F0800700 02956D13 image_sample v[112:114], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[36:43], s[32:35] ; F0800700 01097015 image_sample v[115:117], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[36:43], s[32:35] ; F0800700 01097317 image_sample v[118:120], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[32:35] ; F0800700 01097611 s_waitcnt vmcnt(8) lgkmcnt(0) ; BF8C0078 image_sample v[66:67], 10, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[60:67], s[56:59] ; F0800A00 01CF4242 image_sample v[56:57], 10, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[48:55], s[44:47] ; F0800A00 016C3838 image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[12:19], s[8:11] ; F0800A00 00431515 image_sample v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[24:31], s[20:23] ; F0800A00 00A63536 image_sample v[62:63], 10, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[60:67], s[56:59] ; F0800A00 01CF3E3E image_sample v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[48:55], s[44:47] ; F0800A00 016C2323 image_sample v[23:24], 10, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[12:19], s[8:11] ; F0800A00 00431717 image_sample v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[24:31], s[20:23] ; F0800A00 00A61919 image_sample v[64:65], 10, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[60:67], s[56:59] ; F0800A00 01CF4040 v_add_f32_e32 v16, v16, v16 ; 06202110 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v55, 1.0, v16 ; 086E20F2 v_mul_f32_e32 v27, v27, v55 ; 10366F1B v_mul_f32_e32 v28, v28, v55 ; 10386F1C v_mul_f32_e32 v29, v29, v55 ; 103A6F1D v_mul_f32_e32 v30, v30, v55 ; 103C6F1E v_mac_f32_e32 v27, v31, v16 ; 3E36211F v_mac_f32_e32 v28, v32, v16 ; 3E382120 v_mac_f32_e32 v29, v33, v16 ; 3E3A2121 v_mac_f32_e32 v30, v34, v16 ; 3E3C2122 v_mul_f32_e32 v31, v41, v55 ; 103E6F29 v_mul_f32_e32 v32, v42, v55 ; 10406F2A v_mul_f32_e32 v33, v43, v55 ; 10426F2B v_mul_f32_e32 v34, v44, v55 ; 10446F2C v_mac_f32_e32 v31, v45, v16 ; 3E3E212D v_mac_f32_e32 v32, v46, v16 ; 3E40212E v_mac_f32_e32 v33, v47, v16 ; 3E42212F v_mac_f32_e32 v34, v48, v16 ; 3E442130 v_mul_f32_e32 v41, v58, v55 ; 10526F3A v_mul_f32_e32 v42, v59, v55 ; 10546F3B v_mul_f32_e32 v43, v60, v55 ; 10566F3C v_mul_f32_e32 v44, v61, v55 ; 10586F3D v_mac_f32_e32 v41, v68, v16 ; 3E522144 v_mac_f32_e32 v42, v69, v16 ; 3E542145 v_mac_f32_e32 v43, v70, v16 ; 3E562146 v_mac_f32_e32 v44, v71, v16 ; 3E582147 v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_sub_f32_e32 v45, 1.0, v15 ; 085A1EF2 v_mad_f32 v46, v45, v30, -v14 ; D282002E 843A3D2D v_mac_f32_e32 v14, v30, v45 ; 3E1C5B1E v_mul_f32_e32 v27, v27, v45 ; 10365B1B v_mac_f32_e32 v27, v37, v15 ; 3E361F25 v_mul_f32_e32 v28, v28, v45 ; 10385B1C v_mac_f32_e32 v28, v38, v15 ; 3E381F26 v_mul_f32_e32 v29, v29, v45 ; 103A5B1D v_mac_f32_e32 v29, v39, v15 ; 3E3A1F27 v_mac_f32_e32 v46, v40, v15 ; 3E5C1F28 v_mac_f32_e32 v14, v40, v15 ; 3E1C1F28 v_mad_f32 v30, v45, v34, -v13 ; D282001E 8436452D v_mac_f32_e32 v13, v34, v45 ; 3E1A5B22 v_mul_f32_e32 v31, v31, v45 ; 103E5B1F v_mac_f32_e32 v31, v49, v15 ; 3E3E1F31 v_mul_f32_e32 v32, v32, v45 ; 10405B20 v_mac_f32_e32 v32, v50, v15 ; 3E401F32 v_mul_f32_e32 v33, v33, v45 ; 10425B21 v_mac_f32_e32 v33, v51, v15 ; 3E421F33 v_mac_f32_e32 v30, v52, v15 ; 3E3C1F34 v_mac_f32_e32 v13, v52, v15 ; 3E1A1F34 v_mad_f32 v34, v45, v44, -v11 ; D2820022 842E592D v_mac_f32_e32 v11, v44, v45 ; 3E165B2C v_mul_f32_e32 v37, v41, v45 ; 104A5B29 v_mac_f32_e32 v37, v72, v15 ; 3E4A1F48 v_mul_f32_e32 v38, v42, v45 ; 104C5B2A v_mac_f32_e32 v38, v73, v15 ; 3E4C1F49 v_mul_f32_e32 v39, v43, v45 ; 104E5B2B v_mac_f32_e32 v39, v74, v15 ; 3E4E1F4A v_mac_f32_e32 v34, v75, v15 ; 3E441F4B v_mac_f32_e32 v11, v75, v15 ; 3E161F4B v_mul_f32_e32 v40, v76, v55 ; 10506F4C v_mul_f32_e32 v41, v77, v55 ; 10526F4D v_mul_f32_e32 v42, v78, v55 ; 10546F4E v_mac_f32_e32 v40, v79, v16 ; 3E50214F v_mac_f32_e32 v41, v80, v16 ; 3E522150 v_mac_f32_e32 v42, v81, v16 ; 3E542151 v_mul_f32_e32 v40, v40, v45 ; 10505B28 v_mac_f32_e32 v40, v82, v15 ; 3E501F52 v_mul_f32_e32 v41, v41, v45 ; 10525B29 v_mac_f32_e32 v41, v83, v15 ; 3E521F53 v_mul_f32_e32 v42, v42, v45 ; 10545B2A v_mac_f32_e32 v42, v84, v15 ; 3E541F54 v_mul_f32_e32 v43, v85, v55 ; 10566F55 v_mul_f32_e32 v44, v86, v55 ; 10586F56 v_mul_f32_e32 v47, v87, v55 ; 105E6F57 v_mac_f32_e32 v43, v88, v16 ; 3E562158 v_mac_f32_e32 v44, v89, v16 ; 3E582159 v_mac_f32_e32 v47, v90, v16 ; 3E5E215A v_mul_f32_e32 v43, v43, v45 ; 10565B2B v_mac_f32_e32 v43, v91, v15 ; 3E561F5B v_mul_f32_e32 v44, v44, v45 ; 10585B2C v_mac_f32_e32 v44, v92, v15 ; 3E581F5C v_mul_f32_e32 v47, v47, v45 ; 105E5B2F v_mac_f32_e32 v47, v93, v15 ; 3E5E1F5D v_mul_f32_e32 v48, v94, v55 ; 10606F5E v_mul_f32_e32 v49, v95, v55 ; 10626F5F v_mul_f32_e32 v50, v96, v55 ; 10646F60 s_waitcnt ; BF8C077F v_mac_f32_e32 v48, v97, v16 ; 3E602161 v_mac_f32_e32 v49, v98, v16 ; 3E622162 v_mac_f32_e32 v50, v99, v16 ; 3E642163 v_mul_f32_e32 v48, v48, v45 ; 10605B30 v_mac_f32_e32 v48, v100, v15 ; 3E601F64 v_mul_f32_e32 v49, v49, v45 ; 10625B31 v_mac_f32_e32 v49, v101, v15 ; 3E621F65 v_mul_f32_e32 v50, v50, v45 ; 10645B32 v_mac_f32_e32 v50, v102, v15 ; 3E641F66 s_waitcnt vmcnt(14) ; BF8C077E v_mul_f32_e32 v51, v103, v55 ; 10666F67 v_mul_f32_e32 v52, v104, v55 ; 10686F68 v_mul_f32_e32 v58, v105, v55 ; 10746F69 s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v51, v106, v16 ; 3E66216A v_mac_f32_e32 v52, v107, v16 ; 3E68216B v_mac_f32_e32 v58, v108, v16 ; 3E74216C v_mul_f32_e32 v51, v51, v45 ; 10665B33 s_waitcnt vmcnt(12) ; BF8C077C v_mac_f32_e32 v51, v109, v15 ; 3E661F6D v_mul_f32_e32 v52, v52, v45 ; 10685B34 v_mac_f32_e32 v52, v110, v15 ; 3E681F6E v_mul_f32_e32 v58, v58, v45 ; 10745B3A v_mac_f32_e32 v58, v111, v15 ; 3E741F6F s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v59, v112, v55 ; 10766F70 v_mul_f32_e32 v60, v113, v55 ; 10786F71 v_mul_f32_e32 v61, v114, v55 ; 107A6F72 s_waitcnt vmcnt(10) ; BF8C077A v_mac_f32_e32 v59, v115, v16 ; 3E762173 v_mac_f32_e32 v60, v116, v16 ; 3E782174 v_mac_f32_e32 v61, v117, v16 ; 3E7A2175 v_max_f32_e32 v46, 0, v46 ; 205C5C80 v_min_f32_e32 v14, 1.0, v14 ; 1E1C1CF2 v_subrev_f32_e32 v14, v46, v14 ; 0A1C1D2E v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mul_f32_e32 v59, v59, v45 ; 10765B3B s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v59, v118, v15 ; 3E761F76 v_mul_f32_e32 v60, v60, v45 ; 10785B3C v_mac_f32_e32 v60, v119, v15 ; 3E781F77 v_mul_f32_e32 v61, v61, v45 ; 107A5B3D v_mac_f32_e32 v61, v120, v15 ; 3E7A1F78 v_subrev_f32_e32 v12, v46, v12 ; 0A18192E v_mul_f32_e32 v12, v14, v12 ; 1018190E v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 v_mad_f32 v46, -2.0, v12, v14 ; D282002E 043A18F5 v_mul_f32_e32 v46, v46, v12 ; 105C192E v_max_f32_e32 v30, 0, v30 ; 203C3C80 v_min_f32_e32 v13, 1.0, v13 ; 1E1A1AF2 v_subrev_f32_e32 v13, v30, v13 ; 0A1A1B1E v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mul_f32_e32 v68, v46, v12 ; 1088192E v_mad_f32 v12, -v12, v46, 1.0 ; D282000C 23CA5D0C v_subrev_f32_e32 v10, v30, v10 ; 0A14151E v_mul_f32_e32 v10, v13, v10 ; 1014150D v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_max_f32_e32 v13, 0, v34 ; 201A4480 v_min_f32_e32 v11, 1.0, v11 ; 1E1616F2 v_subrev_f32_e32 v11, v13, v11 ; 0A16170D v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v30, -2.0, v10, v14 ; D282001E 043A14F5 v_mul_f32_e32 v30, v30, v10 ; 103C151E v_mul_f32_e32 v10, v30, v10 ; 1014151E v_subrev_f32_e32 v9, v13, v9 ; 0A12130D v_mul_f32_e32 v9, v11, v9 ; 1012130B v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mac_f32_e32 v14, -2.0, v9 ; 3E1C12F5 v_mul_f32_e32 v11, v14, v9 ; 1016130E v_mul_f32_e32 v9, v11, v9 ; 1012130B s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v11, v54, v68 ; 10168936 v_mul_f32_e32 v13, v53, v68 ; 101A8935 v_max_f32_e32 v10, 0, v10 ; 20141480 v_min_f32_e32 v10, v12, v10 ; 1E14150C v_subrev_f32_e32 v12, v10, v12 ; 0A18190A v_max_f32_e32 v9, 0, v9 ; 20121280 v_min_f32_e32 v9, v12, v9 ; 1E12130C v_subrev_f32_e32 v12, v9, v12 ; 0A181909 v_mac_f32_e32 v11, v22, v12 ; 3E161916 v_mac_f32_e32 v13, v21, v12 ; 3E1A1915 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v14, v26, v68 ; 101C891A v_mul_f32_e32 v21, v25, v68 ; 102A8919 v_mac_f32_e32 v14, v24, v12 ; 3E1C1918 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 v_mac_f32_e32 v21, v23, v12 ; 3E2A1917 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[48:55], s[44:47] ; F0800A00 016C0000 image_sample v[19:20], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[24:31], s[20:23] ; F0800A00 00A61313 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 image_sample v[17:18], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[12:19], s[8:11] ; F0800A00 00431111 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v18, v18, v12 ; 10241912 v_mul_f32_e32 v17, v17, v12 ; 10221911 v_mac_f32_e32 v18, v20, v68 ; 3E248914 v_mac_f32_e32 v17, v19, v68 ; 3E228913 v_mul_f32_e32 v19, s4, v27 ; 10263604 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 v_mul_f32_e32 v20, s5, v40 ; 10285005 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A v_mul_f32_e32 v20, v12, v20 ; 1028290C v_mac_f32_e32 v20, v68, v19 ; 3E282744 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s4, v28 ; 10263804 v_mul_f32_e32 v22, s6, v41 ; 102C5206 v_mul_f32_e32 v22, v12, v22 ; 102C2D0C v_mac_f32_e32 v22, v68, v19 ; 3E2C2744 v_mul_f32_e32 v19, s5, v29 ; 10263A05 v_mul_f32_e32 v23, s7, v42 ; 102E5407 v_mul_f32_e32 v23, v12, v23 ; 102E2F0C v_mac_f32_e32 v23, v68, v19 ; 3E2E2744 v_mul_f32_e32 v19, v59, v12 ; 1026193B v_mul_f32_e32 v24, v60, v12 ; 1030193C v_mul_f32_e32 v12, v61, v12 ; 1018193D v_mac_f32_e32 v19, v51, v68 ; 3E268933 v_mac_f32_e32 v24, v52, v68 ; 3E308934 v_mac_f32_e32 v12, v58, v68 ; 3E18893A v_mac_f32_e32 v11, v57, v10 ; 3E161539 v_mac_f32_e32 v13, v56, v10 ; 3E1A1538 v_mac_f32_e32 v14, v36, v10 ; 3E1C1524 v_mac_f32_e32 v21, v35, v10 ; 3E2A1523 v_mac_f32_e32 v18, v1, v10 ; 3E241501 v_mac_f32_e32 v17, v0, v10 ; 3E221500 v_mul_f32_e32 v0, s8, v31 ; 10003E08 v_mac_f32_e32 v20, v0, v10 ; 3E281500 v_mul_f32_e32 v0, s9, v32 ; 10004009 v_mac_f32_e32 v22, v0, v10 ; 3E2C1500 v_mul_f32_e32 v0, s10, v33 ; 1000420A v_mac_f32_e32 v23, v0, v10 ; 3E2E1500 v_mac_f32_e32 v19, v48, v10 ; 3E261530 v_mac_f32_e32 v24, v49, v10 ; 3E301531 v_mac_f32_e32 v12, v50, v10 ; 3E181532 v_mac_f32_e32 v11, v67, v9 ; 3E161343 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 v_mac_f32_e32 v13, v66, v9 ; 3E1A1342 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 v_mac_f32_e32 v14, v63, v9 ; 3E1C133F s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 v_mac_f32_e32 v21, v62, v9 ; 3E2A133E v_mac_f32_e32 v18, v65, v9 ; 3E241341 v_mac_f32_e32 v17, v64, v9 ; 3E221340 s_buffer_load_dword s7, s[0:3], 0x38 ; C2038138 s_buffer_load_dword s8, s[0:3], 0x39 ; C2040139 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v37 ; 10004A04 v_mac_f32_e32 v20, v9, v0 ; 3E280109 v_mul_f32_e32 v0, s5, v38 ; 10004C05 v_mac_f32_e32 v22, v9, v0 ; 3E2C0109 v_mul_f32_e32 v0, s6, v39 ; 10004E06 v_mac_f32_e32 v23, v9, v0 ; 3E2E0109 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mac_f32_e32 v0, v8, v8 ; 3E001108 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mac_f32_e32 v19, v43, v9 ; 3E26132B v_mac_f32_e32 v24, v44, v9 ; 3E30132C v_mac_f32_e32 v12, v47, v9 ; 3E18132F v_mul_f32_e32 v1, v0, v6 ; 10020D00 v_mul_f32_e32 v6, v0, v8 ; 100C1100 v_mul_f32_e32 v9, 0, v1 ; 10120280 v_mad_f32 v8, v0, v8, v9 ; D2820008 04261100 v_mac_f32_e32 v9, 0, v6 ; 3E120C80 v_mad_f32 v9, -v7, v0, v9 ; D2820009 24260107 v_mul_f32_e32 v0, v0, v7 ; 10000F00 s_buffer_load_dword s2, s[0:3], 0x0 ; C2010100 v_mac_f32_e32 v8, 0, v0 ; 3E100080 v_mad_f32 v7, -v8, v1, 0 ; D2820007 22020308 v_mad_f32 v10, -v8, v6, 1.0 ; D282000A 23CA0D08 v_mad_f32 v8, -v8, v0, 0 ; D2820008 22020108 v_mul_f32_e32 v25, v7, v7 ; 10320F07 v_mac_f32_e32 v25, v10, v10 ; 3E32150A v_mac_f32_e32 v25, v8, v8 ; 3E321108 v_sqrt_f32_e32 v25, v25 ; 7E326719 v_mov_b32_e32 v26, 0x3727c5ac ; 7E3402FF 3727C5AC v_cmp_lt_f32_e32 vcc, v25, v26 ; 7C023519 v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4 v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4 v_mad_f32 v25, -v13, v13, 1.0 ; D2820019 23CA1B0D v_mad_f32 v25, -v11, v11, v25 ; D2820019 2466170B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s2, v11 ; 10161602 v_mul_f32_e32 v13, s2, v13 ; 101A1A02 v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 v_sqrt_f32_e32 v25, v25 ; 7E326719 v_mul_f32_e32 v27, v11, v11 ; 1036170B v_mac_f32_e32 v27, v13, v13 ; 3E361B0D v_mac_f32_e32 v27, v25, v25 ; 3E363319 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mad_f32 v28, -v9, v1, 0 ; D282001C 22020309 v_mad_f32 v29, -v9, v6, 0 ; D282001D 22020D09 v_mad_f32 v9, -v9, v0, -1.0 ; D2820009 23CE0109 v_mul_f32_e32 v30, v28, v28 ; 103C391C v_mac_f32_e32 v30, v29, v29 ; 3E3C3B1D v_mac_f32_e32 v30, v9, v9 ; 3E3C1309 v_sqrt_f32_e32 v30, v30 ; 7E3C671E v_cmp_lt_f32_e64 s[0:1], v30, v26 ; D0020000 0002351E v_mul_f32_e32 v11, v27, v11 ; 1016171B v_mul_f32_e32 v13, v27, v13 ; 101A1B1B v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_cndmask_b32_e64 v28, v28, 0, s[0:1] ; D200001C 0001011C v_mul_f32_e32 v30, v13, v28 ; 103C390D v_mad_f32 v7, v7, v11, -v30 ; D2820007 847A1707 v_cndmask_b32_e64 v10, v10, 1.0, vcc ; D200000A 01A9E50A v_cndmask_b32_e64 v29, v29, 0, s[0:1] ; D200001D 0001011D v_mul_f32_e32 v30, v13, v29 ; 103C3B0D v_mad_f32 v10, v10, v11, -v30 ; D282000A 847A170A v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108 v_cndmask_b32_e64 v9, v9, -1.0, s[0:1] ; D2000009 0001E709 v_mul_f32_e32 v13, v13, v9 ; 101A130D v_mad_f32 v8, v8, v11, -v13 ; D2820008 84361708 v_mul_f32_e32 v11, v27, v25 ; 1016331B v_mad_f32 v13, 2.0, v14, -1.0 ; D282000D 03CE1CF4 v_mad_f32 v14, 2.0, v21, -1.0 ; D282000E 03CE2AF4 v_mad_f32 v21, -v14, v14, 1.0 ; D2820015 23CA1D0E v_mad_f32 v21, -v13, v13, v21 ; D2820015 24561B0D v_mul_f32_e32 v13, s2, v13 ; 101A1A02 v_mul_f32_e32 v14, s2, v14 ; 101C1C02 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v25, v13, v13 ; 10321B0D v_mac_f32_e32 v25, v14, v14 ; 3E321D0E v_mac_f32_e32 v25, v21, v21 ; 3E322B15 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mac_f32_e32 v7, v11, v1 ; 3E0E030B v_mac_f32_e32 v10, v11, v6 ; 3E140D0B v_mac_f32_e32 v8, v11, v0 ; 3E10010B v_mul_f32_e32 v11, v25, v13 ; 10161B19 v_mul_f32_e32 v13, v25, v14 ; 101A1D19 v_mul_f32_e32 v14, v25, v21 ; 101C2B19 v_mad_f32 v21, 0, v6, v1 ; D2820015 04060C80 v_mac_f32_e32 v21, 0, v0 ; 3E2A0080 v_mad_f32 v25, -v21, v1, 1.0 ; D2820019 23CA0315 v_mad_f32 v27, -v21, v6, 0 ; D282001B 22020D15 v_mad_f32 v21, -v21, v0, 0 ; D2820015 22020115 v_mul_f32_e32 v30, v25, v25 ; 103C3319 v_mac_f32_e32 v30, v27, v27 ; 3E3C371B v_mac_f32_e32 v30, v21, v21 ; 3E3C2B15 v_sqrt_f32_e32 v30, v30 ; 7E3C671E v_cmp_lt_f32_e32 vcc, v30, v26 ; 7C02351E v_mul_f32_e32 v28, v13, v28 ; 1038390D v_mul_f32_e32 v29, v13, v29 ; 103A3B0D v_mul_f32_e32 v9, v13, v9 ; 1012130D v_cndmask_b32_e64 v13, v25, 1.0, vcc ; D200000D 01A9E519 v_cndmask_b32_e64 v25, v27, 0, vcc ; D2000019 01A9011B v_cndmask_b32_e64 v21, v21, 0, vcc ; D2000015 01A90115 v_mad_f32 v27, v13, v11, -v28 ; D282001B 8472170D v_mad_f32 v28, v25, v11, -v29 ; D282001C 84761719 v_mad_f32 v9, v21, v11, -v9 ; D2820009 84261715 v_mac_f32_e32 v27, v14, v1 ; 3E36030E v_mac_f32_e32 v28, v14, v6 ; 3E380D0E v_mac_f32_e32 v9, v14, v0 ; 3E12010E v_mul_f32_e32 v7, v7, v55 ; 100E6F07 v_mac_f32_e32 v7, v27, v16 ; 3E0E211B v_mul_f32_e32 v10, v10, v55 ; 10146F0A v_mac_f32_e32 v10, v28, v16 ; 3E14211C v_mul_f32_e32 v8, v8, v55 ; 10106F08 v_mac_f32_e32 v8, v9, v16 ; 3E102109 v_mad_f32 v9, 0, v1, -v6 ; D2820009 841A0280 v_mac_f32_e32 v9, 0, v0 ; 3E120080 v_mad_f32 v11, -v9, v1, 0 ; D282000B 22020309 v_mad_f32 v14, -v9, v6, -1.0 ; D282000E 23CE0D09 v_mad_f32 v9, -v9, v0, 0 ; D2820009 22020109 v_mul_f32_e32 v16, v11, v11 ; 1020170B v_mac_f32_e32 v16, v14, v14 ; 3E201D0E v_mac_f32_e32 v16, v9, v9 ; 3E201309 v_sqrt_f32_e32 v16, v16 ; 7E206710 v_cmp_lt_f32_e32 vcc, v16, v26 ; 7C023510 v_mad_f32 v16, 2.0, v18, -1.0 ; D2820010 03CE24F4 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mad_f32 v18, -v17, v17, 1.0 ; D2820012 23CA2311 v_mad_f32 v18, -v16, v16, v18 ; D2820012 244A2110 v_mul_f32_e32 v16, s2, v16 ; 10202002 v_mul_f32_e32 v17, s2, v17 ; 10222202 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_sqrt_f32_e32 v18, v18 ; 7E246712 v_mul_f32_e32 v26, v16, v16 ; 10342110 v_mac_f32_e32 v26, v17, v17 ; 3E342311 v_mac_f32_e32 v26, v18, v18 ; 3E342512 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_cndmask_b32_e64 v14, v14, -1.0, vcc ; D200000E 01A9E70E v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 v_mul_f32_e32 v17, v26, v17 ; 1022231A v_mul_f32_e32 v11, v17, v11 ; 10161711 v_mul_f32_e32 v14, v17, v14 ; 101C1D11 v_mul_f32_e32 v9, v17, v9 ; 10121311 v_mul_f32_e32 v16, v26, v16 ; 1020211A v_mad_f32 v11, v13, v16, -v11 ; D282000B 842E210D v_mad_f32 v13, v25, v16, -v14 ; D282000D 843A2119 v_mad_f32 v9, v21, v16, -v9 ; D2820009 84262115 v_mul_f32_e32 v14, v26, v18 ; 101C251A v_mac_f32_e32 v11, v14, v1 ; 3E16030E v_mac_f32_e32 v13, v14, v6 ; 3E1A0D0E v_mac_f32_e32 v9, v14, v0 ; 3E12010E v_mul_f32_e32 v0, v7, v45 ; 10005B07 v_mac_f32_e32 v0, v11, v15 ; 3E001F0B v_mul_f32_e32 v1, v10, v45 ; 10025B0A v_mac_f32_e32 v1, v13, v15 ; 3E021F0D v_mul_f32_e32 v6, v8, v45 ; 100C5B08 v_mac_f32_e32 v6, v9, v15 ; 3E0C1F09 v_mul_f32_e32 v3, v3, v20 ; 10062903 v_mul_f32_e32 v4, v4, v22 ; 10082D04 v_mul_f32_e32 v5, v5, v23 ; 100A2F05 v_cvt_pkrtz_f16_f32_e32 v3, v3, v4 ; 5E060903 v_cvt_pkrtz_f16_f32_e64 v4, v5, 0 ; D25E0004 00010105 exp 15, 0, 1, 0, 0, v3, v4, v3, v4 ; F800040F 04030403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s7, v19 ; 10062607 v_cvt_pkrtz_f16_f32_e32 v3, v3, v24 ; 5E063103 v_mul_f32_e32 v4, s8, v12 ; 10081808 v_cvt_pkrtz_f16_f32_e64 v4, v4, 0 ; D25E0004 00010104 exp 15, 1, 1, 0, 0, v3, v4, v3, v4 ; F800041F 04030403 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 2, 0, 0, 0, v2, v2, v2, v2 ; F800002F 02020202 v_mad_f32 v1, 0.5, v6, 0.5 ; D2820001 03C20CF0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 124 Code Size: 2848 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL IN[3], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..7], LOCAL IMM[0] UINT32 {0, 16, 4, 48} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] UINT32 {44, 0, 0, 0} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1], TEMP[2], IN[2] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[3].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[3].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[5].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[6].x, TEMP[5].xxxx 16: MOV TEMP[6].y, -TEMP[5].yyyy 17: MUL TEMP[6].xy, TEMP[6].xyyy, CONST[1][0].xxxx 18: MOV TEMP[7].x, TEMP[6].xxxx 19: MOV TEMP[7].y, TEMP[6].yyyy 20: DP2 TEMP[5].x, TEMP[5].xyyy, TEMP[5].xyyy 21: ADD TEMP[5].x, IMM[1].zzzz, -TEMP[5].xxxx 22: MOV_SAT TEMP[5].x, TEMP[5].xxxx 23: SQRT TEMP[5].x, TEMP[5].xxxx 24: MOV TEMP[7].z, TEMP[5].xxxx 25: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz 26: RSQ TEMP[5].x, TEMP[5].xxxx 27: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx 28: DP3 TEMP[6].x, IN[3].xyzz, IN[3].xyzz 29: RSQ TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[6].xyz, IN[3].xyzz, TEMP[6].xxxx 31: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[7].x, TEMP[7].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[7].xxxx 34: MUL TEMP[3].xyz, IN[3].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[5].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[5].xxxx, TEMP[6].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MUL TEMP[3].x, IN[2].wwww, TEMP[1].wwww 43: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][0].yyyy 44: MOV TEMP[5].x, TEMP[1].xxxx 45: MOV TEMP[5].y, TEMP[1].yyyy 46: MOV TEMP[5].z, TEMP[1].zzzz 47: MOV TEMP[5].w, TEMP[3].xxxx 48: MUL TEMP[1].x, CONST[1][3].xxxx, TEMP[2].xxxx 49: MOV TEMP[1].y, TEMP[2].yyyy 50: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 51: MOV TEMP[1].z, TEMP[2].xxxx 52: MOV TEMP[1].w, TEMP[3].xxxx 53: MOV TEMP[2].x, TEMP[0].xxxx 54: MOV TEMP[2].y, TEMP[0].yyyy 55: MOV TEMP[2].z, TEMP[0].zzzz 56: MUL TEMP[0].x, CONST[1][0].yyyy, TEMP[4].xxxx 57: MOV TEMP[2].w, TEMP[0].xxxx 58: MOV OUT[0], TEMP[5] 59: MOV OUT[1], TEMP[2] 60: MOV OUT[2], TEMP[1] 61: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %60 = fmul float %49, %49 %61 = fmul float %50, %50 %62 = fadd float %61, %60 %63 = fmul float %51, %51 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %49, %65 %67 = fmul float %50, %65 %68 = fmul float %51, %65 %69 = bitcast float %47 to i32 %70 = bitcast float %48 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %32, <16 x i8> %34, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = extractelement <4 x float> %73, i32 3 %78 = fmul float %26, %74 %79 = fmul float %27, %75 %80 = fmul float %28, %76 %81 = fmul float %78, %52 %82 = fmul float %79, %53 %83 = fmul float %80, %54 %84 = fmul float %77, %55 %85 = bitcast float %47 to i32 %86 = bitcast float %48 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %37, <16 x i8> %40, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = fmul float %68, %57 %94 = fmul float %66, %58 %95 = fmul float %67, %56 %96 = fmul float %67, %58 %97 = fsub float %96, %93 %98 = fmul float %68, %56 %99 = fsub float %98, %94 %100 = fmul float %66, %57 %101 = fsub float %100, %95 %102 = bitcast float %47 to i32 %103 = bitcast float %48 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %105, <32 x i8> %43, <16 x i8> %46, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 3 %110 = fmul float %109, 2.000000e+00 %111 = fadd float %110, -1.000000e+00 %112 = fmul float %108, 2.000000e+00 %113 = fadd float %112, -1.000000e+00 %114 = fmul float %111, %24 %115 = fmul float %113, %24 %116 = fmul float %111, %111 %117 = fmul float %113, %113 %118 = fadd float %116, %117 %119 = fsub float 1.000000e+00, %118 %120 = call float @llvm.AMDIL.clamp.(float %119, float 0.000000e+00, float 1.000000e+00) %121 = call float @llvm.sqrt.f32(float %120) %122 = fmul float %114, %114 %123 = fmul float %115, %115 %124 = fadd float %123, %122 %125 = fmul float %121, %121 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = fmul float %114, %127 %129 = fmul float %115, %127 %130 = fsub float -0.000000e+00, %129 %131 = fmul float %121, %127 %132 = fmul float %56, %56 %133 = fmul float %57, %57 %134 = fadd float %133, %132 %135 = fmul float %58, %58 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = fmul float %56, %137 %139 = fmul float %57, %137 %140 = fmul float %58, %137 %141 = fmul float %97, %97 %142 = fmul float %99, %99 %143 = fadd float %142, %141 %144 = fmul float %101, %101 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %97, %146 %148 = fmul float %99, %146 %149 = fmul float %101, %146 %150 = fmul float %59, %147 %151 = fmul float %59, %148 %152 = fmul float %59, %149 %153 = fmul float %150, %130 %154 = fmul float %151, %130 %155 = fmul float %152, %130 %156 = fmul float %128, %138 %157 = fadd float %156, %153 %158 = fmul float %128, %139 %159 = fadd float %158, %154 %160 = fmul float %128, %140 %161 = fadd float %160, %155 %162 = fmul float %66, %131 %163 = fadd float %162, %157 %164 = fmul float %67, %131 %165 = fadd float %164, %159 %166 = fmul float %68, %131 %167 = fadd float %166, %161 %168 = fmul float %163, %163 %169 = fmul float %165, %165 %170 = fadd float %169, %168 %171 = fmul float %167, %167 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = fmul float %163, %173 %175 = fmul float %165, %173 %176 = fmul float %167, %173 %177 = fmul float %174, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %175, 5.000000e-01 %180 = fadd float %179, 5.000000e-01 %181 = fmul float %176, 5.000000e-01 %182 = fadd float %181, 5.000000e-01 %183 = fmul float %55, %84 %184 = fmul float %183, %25 %185 = fmul float %30, %90 %186 = fmul float %29, %92 %187 = fmul float %25, %107 %188 = call i32 @llvm.SI.packf16(float %81, float %82) %189 = bitcast i32 %188 to float %190 = call i32 @llvm.SI.packf16(float %83, float %184) %191 = bitcast i32 %190 to float %192 = call i32 @llvm.SI.packf16(float %178, float %180) %193 = bitcast i32 %192 to float %194 = call i32 @llvm.SI.packf16(float %182, float %187) %195 = bitcast i32 %194 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %189, float %191, float %189, float %191) %196 = call i32 @llvm.SI.packf16(float %185, float %91) %197 = bitcast i32 %196 to float %198 = call i32 @llvm.SI.packf16(float %186, float %184) %199 = bitcast i32 %198 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %193, float %195, float %193, float %195) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %197, float %199, float %197, float %199) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x4 ; C2100104 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 s_buffer_load_dword s33, s[0:3], 0x5 ; C2108105 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_buffer_load_dword s34, s[0:3], 0x6 ; C2110106 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800F00 00A60E02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s32, v14 ; 10021C20 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mul_f32_e32 v14, s33, v15 ; 101C1E21 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v15, s34, v16 ; 101E2022 v_mul_f32_e32 v16, v10, v17 ; 1020230A image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[36:39] ; F0800700 012A1102 v_mul_f32_e32 v20, v4, v4 ; 10280904 v_mac_f32_e32 v20, v5, v5 ; 3E280B05 v_mac_f32_e32 v20, v6, v6 ; 3E280D06 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v17, s4, v17 ; 10222204 v_cvt_pkrtz_f16_f32_e32 v17, v17, v18 ; 5E222511 v_mul_f32_e32 v18, s5, v19 ; 10242605 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mul_f32_e32 v5, v20, v5 ; 100A0B14 v_mul_f32_e32 v6, v20, v6 ; 100C0D14 v_mul_f32_e32 v19, v12, v6 ; 10260D0C v_mad_f32 v19, v5, v13, -v19 ; D2820013 844E1B05 v_mul_f32_e32 v20, v11, v11 ; 1028170B v_mac_f32_e32 v20, v12, v12 ; 3E28190C v_mac_f32_e32 v20, v13, v13 ; 3E281B0D v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v21, v13, v4 ; 102A090D v_mad_f32 v21, v6, v11, -v21 ; D2820015 84561706 v_mul_f32_e32 v22, v11, v5 ; 102C0B0B v_mad_f32 v22, v4, v12, -v22 ; D2820016 845A1904 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mul_f32_e32 v23, v19, v19 ; 102E2713 v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15 v_mac_f32_e32 v23, v22, v22 ; 3E2E2D16 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_mul_f32_e32 v19, v23, v19 ; 10262717 v_mul_f32_e32 v20, v23, v21 ; 10282B17 v_mul_f32_e32 v21, v23, v22 ; 102A2D17 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 image_sample v[22:24], 11, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800B00 00431602 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, 2.0, v24, -1.0 ; D2820002 03CE30F4 v_mad_f32 v3, 2.0, v23, -1.0 ; D2820003 03CE2EF4 v_mad_f32 v23, -v3, v3, 1.0 ; D2820017 23CA0703 v_mad_f32 v23, -v2, v2, v23 ; D2820017 245E0502 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_sqrt_f32_e32 v23, v23 ; 7E2E6717 v_mul_f32_e32 v24, v2, v2 ; 10300502 v_mac_f32_e32 v24, v3, v3 ; 3E300703 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v19, v19, v0 ; 10260113 v_mul_f32_e32 v20, v20, v0 ; 10280114 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mul_f32_e32 v3, v24, v3 ; 10060718 v_mul_f32_e32 v19, v3, v19 ; 10262703 v_mul_f32_e32 v20, v3, v20 ; 10282903 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v2, v24, v2 ; 10040518 v_mad_f32 v3, v2, v11, -v19 ; D2820003 844E1702 v_mad_f32 v11, v2, v12, -v20 ; D282000B 84521902 v_mad_f32 v0, v2, v13, -v0 ; D2820000 84021B02 v_mul_f32_e32 v2, s0, v22 ; 10042C00 v_mul_f32_e32 v10, v16, v10 ; 10141510 v_mul_f32_e32 v10, s0, v10 ; 10141400 v_mul_f32_e32 v12, v24, v23 ; 10182F18 v_mac_f32_e32 v3, v12, v4 ; 3E06090C v_mac_f32_e32 v11, v12, v5 ; 3E160B0C v_mac_f32_e32 v0, v12, v6 ; 3E000D0C v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v4, v8, v14 ; 10081D08 v_mul_f32_e32 v5, v9, v15 ; 100A1F09 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v11, v11 ; 3E0C170B v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_cvt_pkrtz_f16_f32_e32 v4, v5, v10 ; 5E081505 exp 15, 0, 1, 0, 0, v1, v4, v1, v4 ; F800040F 04010401 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v6, v3 ; 10020706 v_mul_f32_e32 v3, v6, v11 ; 10061706 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v18, v10 ; 5E001512 exp 15, 2, 1, 1, 1, v17, v0, v17, v0 ; F8001C2F 00110011 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 628 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0], TEMP[1], IN[2] 5: DP3 TEMP[1].x, IN[1].xyzz, IN[1].xyzz 6: RSQ TEMP[1].x, TEMP[1].xxxx 7: MUL TEMP[1].xyz, IN[1].xyzz, TEMP[1].xxxx 8: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, IMM[1].xxxx 9: MUL TEMP[2].x, IN[2].wwww, TEMP[0].wwww 10: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][0].yyyy 11: MOV TEMP[3].x, TEMP[0].xxxx 12: MOV TEMP[3].y, TEMP[0].yyyy 13: MOV TEMP[3].z, TEMP[0].zzzz 14: MOV TEMP[3].w, TEMP[2].xxxx 15: MOV TEMP[0].xyz, IMM[1].yyyy 16: MOV TEMP[0].w, TEMP[2].xxxx 17: MOV TEMP[2].w, IMM[1].yyyy 18: MOV TEMP[2].x, TEMP[1].xxxx 19: MOV TEMP[2].y, TEMP[1].yyyy 20: MOV TEMP[2].z, TEMP[1].zzzz 21: MOV OUT[0], TEMP[3] 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %41 = bitcast float %32 to i32 %42 = bitcast float %33 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %29, <16 x i8> %31, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %25, %46 %51 = fmul float %26, %47 %52 = fmul float %27, %48 %53 = fmul float %50, %37 %54 = fmul float %51, %38 %55 = fmul float %52, %39 %56 = fmul float %49, %40 %57 = fmul float %34, %34 %58 = fmul float %35, %35 %59 = fadd float %58, %57 %60 = fmul float %36, %36 %61 = fadd float %59, %60 %62 = call float @llvm.AMDGPU.rsq.clamped.f32(float %61) %63 = fmul float %34, %62 %64 = fmul float %35, %62 %65 = fmul float %36, %62 %66 = fmul float %63, 5.000000e-01 %67 = fadd float %66, 5.000000e-01 %68 = fmul float %64, 5.000000e-01 %69 = fadd float %68, 5.000000e-01 %70 = fmul float %65, 5.000000e-01 %71 = fadd float %70, 5.000000e-01 %72 = fmul float %40, %56 %73 = fmul float %72, %24 %74 = call i32 @llvm.SI.packf16(float %53, float %54) %75 = bitcast i32 %74 to float %76 = call i32 @llvm.SI.packf16(float %55, float %73) %77 = bitcast i32 %76 to float %78 = call i32 @llvm.SI.packf16(float %67, float %69) %79 = bitcast i32 %78 to float %80 = call i32 @llvm.SI.packf16(float %71, float 0.000000e+00) %81 = bitcast i32 %80 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %75, float %77, float %75, float %77) %82 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %83 = bitcast i32 %82 to float %84 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %73) %85 = bitcast i32 %84 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %79, float %81, float %79, float %81) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %83, float %85, float %83, float %85) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v10 ; 10021404 v_mul_f32_e32 v2, s5, v11 ; 10041605 v_mul_f32_e32 v3, s6, v12 ; 10061806 v_mul_f32_e32 v10, v0, v13 ; 10141B00 v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mac_f32_e32 v11, v5, v5 ; 3E160B05 v_mac_f32_e32 v11, v6, v6 ; 3E160D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v3, v0 ; 5E040103 v_cvt_pkrtz_f16_f32_e32 v3, v4, v5 ; 5E060B04 v_cvt_pkrtz_f16_f32_e64 v4, v6, 0 ; D25E0004 00010106 exp 15, 0, 1, 0, 0, v1, v2, v1, v2 ; F800040F 02010201 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, 0, 0 ; D25E0001 00010080 v_cvt_pkrtz_f16_f32_e32 v0, 0, v0 ; 5E000080 exp 15, 1, 1, 0, 0, v3, v4, v3, v4 ; F800041F 04030403 exp 15, 2, 1, 1, 1, v1, v0, v1, v0 ; F8001C2F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..4], LOCAL IMM[0] UINT32 {0, 16, 4, 48} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} IMM[2] UINT32 {44, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0], TEMP[1], IN[2] 5: MOV TEMP[1].xy, IN[0].xyyy 6: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 7: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 10: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 11: MUL TEMP[3].x, IN[2].wwww, TEMP[0].wwww 12: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][0].yyyy 13: MOV TEMP[4].x, TEMP[0].xxxx 14: MOV TEMP[4].y, TEMP[0].yyyy 15: MOV TEMP[4].z, TEMP[0].zzzz 16: MOV TEMP[4].w, TEMP[3].xxxx 17: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 18: MOV TEMP[0].y, TEMP[1].yyyy 19: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 20: MOV TEMP[0].z, TEMP[1].xxxx 21: MOV TEMP[0].w, TEMP[3].xxxx 22: MOV TEMP[1].w, IMM[1].yyyy 23: MOV TEMP[1].x, TEMP[2].xxxx 24: MOV TEMP[1].y, TEMP[2].yyyy 25: MOV TEMP[1].z, TEMP[2].zzzz 26: MOV OUT[0], TEMP[4] 27: MOV OUT[1], TEMP[1] 28: MOV OUT[2], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %45 %62 = fmul float %59, %46 %63 = fmul float %60, %47 %64 = fmul float %57, %48 %65 = bitcast float %40 to i32 %66 = bitcast float %41 to i32 %67 = insertelement <2 x i32> undef, i32 %65, i32 0 %68 = insertelement <2 x i32> %67, i32 %66, i32 1 %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %36, <16 x i8> %39, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = fmul float %42, %42 %74 = fmul float %43, %43 %75 = fadd float %74, %73 %76 = fmul float %44, %44 %77 = fadd float %75, %76 %78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77) %79 = fmul float %42, %78 %80 = fmul float %43, %78 %81 = fmul float %44, %78 %82 = fmul float %79, 5.000000e-01 %83 = fadd float %82, 5.000000e-01 %84 = fmul float %80, 5.000000e-01 %85 = fadd float %84, 5.000000e-01 %86 = fmul float %81, 5.000000e-01 %87 = fadd float %86, 5.000000e-01 %88 = fmul float %48, %64 %89 = fmul float %88, %24 %90 = fmul float %29, %70 %91 = fmul float %28, %72 %92 = call i32 @llvm.SI.packf16(float %61, float %62) %93 = bitcast i32 %92 to float %94 = call i32 @llvm.SI.packf16(float %63, float %89) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %83, float %85) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %87, float 0.000000e+00) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %93, float %95, float %93, float %95) %100 = call i32 @llvm.SI.packf16(float %90, float %71) %101 = bitcast i32 %100 to float %102 = call i32 @llvm.SI.packf16(float %91, float %89) %103 = bitcast i32 %102 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %97, float %99, float %97, float %99) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %101, float %103, float %101, float %103) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s32, s[0:3], 0xb ; C210010B v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v3, s32, v3 ; 10060620 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, v7, v10 ; 10041507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v8, v9, v12 ; 10101909 v_mul_f32_e32 v0, v13, v0 ; 1000010D v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mac_f32_e32 v9, v5, v5 ; 3E120B05 v_mac_f32_e32 v9, v6, v6 ; 3E120D06 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_cvt_pkrtz_f16_f32_e32 v2, v2, v7 ; 5E040F02 v_cvt_pkrtz_f16_f32_e32 v7, v8, v0 ; 5E0E0108 exp 15, 0, 1, 0, 0, v2, v7, v2, v7 ; F800040F 07020702 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v9, v4 ; 10040909 v_mul_f32_e32 v4, v9, v5 ; 10080B09 v_mul_f32_e32 v5, v9, v6 ; 100A0D09 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_cvt_pkrtz_f16_f32_e32 v2, v2, v4 ; 5E040902 v_mad_f32 v4, 0.5, v5, 0.5 ; D2820004 03C20AF0 v_cvt_pkrtz_f16_f32_e64 v4, v4, 0 ; D25E0004 00010104 exp 15, 1, 1, 0, 0, v2, v4, v2, v4 ; F800041F 04020402 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 2, 1, 1, 1, v1, v0, v1, v0 ; F8001C2F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL IN[3], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[5] DCL CONST[1][0..17] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..10], LOCAL IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 2.0000} IMM[1] UINT32 {0, 156, 176, 160} IMM[2] FLT32 { -1.0000, 0.1000, 0.3110, 1120.0000} IMM[3] UINT32 {164, 168, 152, 3} IMM[4] UINT32 {348, 180, 184, 172} IMM[5] UINT32 {400, 304, 44, 60} IMM[6] UINT32 {272, 208, 236, 448} IMM[7] UINT32 {224, 204, 32, 48} IMM[8] FLT32 { -0.0500, 20.0000, 3.5000, 0.0000} IMM[9] UINT32 {64, 240, 244, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[5].xxxx, CONST[5].yyyy 2: MOV TEMP[1].w, IMM[0].xxxx 3: MOV TEMP[1].x, IN[2].wwww 4: MOV TEMP[1].y, IN[2].wwww 5: MOV TEMP[1].z, IN[2].wwww 6: DP3 TEMP[2].x, IN[3].xyzz, IN[3].xyzz 7: RSQ TEMP[2].x, TEMP[2].xxxx 8: MUL TEMP[2].xyz, IN[3].xyzz, TEMP[2].xxxx 9: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy, IMM[0].yyyy 10: MOV TEMP[3].w, IMM[0].zzzz 11: MOV TEMP[3].x, TEMP[2].xxxx 12: MOV TEMP[3].y, TEMP[2].yyyy 13: MOV TEMP[3].z, TEMP[2].zzzz 14: MUL TEMP[2].xy, CONST[1][9].wwww, IN[2].xyzz 15: MOV TEMP[4].x, TEMP[2].xxxx 16: MOV TEMP[4].y, -TEMP[2].yyyy 17: MUL TEMP[2].xy, CONST[1][11].xxxx, TEMP[4].xyyy 18: MUL TEMP[5].xy, CONST[1][10].xxxx, IN[1].xyyy 19: MOV TEMP[5].xy, TEMP[5].xyyy 20: TEX TEMP[5].xy, TEMP[5], SAMP[1], 2D 21: MAD TEMP[5].xy, TEMP[5].xyyy, IMM[0].wwww, IMM[2].xxxx 22: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[1][10].yyyy 23: MOV TEMP[4].xy, TEMP[4].xyyy 24: TEX TEMP[4].y, TEMP[4], SAMP[0], 2D 25: MUL TEMP[6].x, CONST[1][9].zzzz, CONST[4][21].wwww 26: MUL TEMP[7].x, CONST[1][11].yyyy, IMM[0].wwww 27: RCP TEMP[7].x, TEMP[7].xxxx 28: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 29: MAD TEMP[4].x, TEMP[4].yyyy, CONST[1][10].zzzz, TEMP[6].xxxx 30: FRC TEMP[6].x, TEMP[4].xxxx 31: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].yyyy 32: FRC TEMP[8].x, TEMP[7].xxxx 33: MUL TEMP[9].xy, CONST[1][11].zzzz, TEMP[5].xyyy 34: MUL TEMP[10].x, TEMP[5].yyyy, TEMP[5].yyyy 35: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx, TEMP[10].xxxx 36: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][0].xxxx 38: MAD TEMP[10].x, IMM[0].wwww, TEMP[6].xxxx, IMM[2].xxxx 39: ABS TEMP[10].x, TEMP[10].xxxx 40: POW TEMP[10].x, TEMP[10].xxxx, CONST[1][10].wwww 41: FLR TEMP[7].x, TEMP[7].xxxx 42: MAD TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx, IMM[0].yyyy 43: ADD TEMP[7].xy, TEMP[7].xxxx, TEMP[2].xyyy 44: MAD TEMP[7].xy, TEMP[9].xyyy, TEMP[8].xxxx, TEMP[7].xyyy 45: MOV TEMP[7].xy, TEMP[7].xyyy 46: TEX TEMP[7].yw, TEMP[7], SAMP[2], 2D 47: MAD TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx, IMM[2].xxxx 48: ABS TEMP[8].x, TEMP[8].xxxx 49: POW TEMP[8].x, TEMP[8].xxxx, CONST[1][10].wwww 50: FLR TEMP[4].x, TEMP[4].xxxx 51: MUL TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz 52: ADD TEMP[2].xy, TEMP[4].xxxx, TEMP[2].xyyy 53: MAD TEMP[2].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[2].xyyy 54: MOV TEMP[2].xy, TEMP[2].xyyy 55: TEX TEMP[2].yw, TEMP[2], SAMP[2], 2D 56: MUL TEMP[2].xy, TEMP[8].xxxx, TEMP[2].wyyy 57: MAD TEMP[2].xy, TEMP[10].xxxx, TEMP[7].wyyy, TEMP[2].xyyy 58: MAD TEMP[2].xy, IMM[0].wwww, TEMP[2].xyyy, IMM[2].xxxx 59: MUL TEMP[2].xy, TEMP[5].xxxx, TEMP[2].xyyy 60: MOV TEMP[4].xy, TEMP[2].xyxx 61: DP2 TEMP[5].x, TEMP[2].xyyy, TEMP[2].xyyy 62: RSQ TEMP[5].x, TEMP[5].xxxx 63: MUL TEMP[5].xy, TEMP[2].xyyy, TEMP[5].xxxx 64: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 65: FSLT TEMP[2].x, IMM[0].xxxx, TEMP[2].xxxx 66: UIF TEMP[2].xxxx :2 67: MOV TEMP[2].xy, TEMP[5].xyzx 68: ELSE :2 69: MOV TEMP[2].xy, TEMP[4].xyzx 70: ENDIF 71: MOV TEMP[4].xy, TEMP[2].xyxx 72: DP2 TEMP[5].x, TEMP[2].xyyy, TEMP[2].xyyy 73: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 74: MOV_SAT TEMP[5].x, TEMP[5].xxxx 75: SQRT TEMP[5].x, TEMP[5].xxxx 76: MOV TEMP[4].z, TEMP[5].xxxx 77: ADD TEMP[5].xy, TEMP[0].xyyy, IMM[0].yyyy 78: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[4][25].xyyy 79: MOV TEMP[6].xy, TEMP[5].xyyy 80: TEX TEMP[6].w, TEMP[6], SAMP[3], 2D 81: ADD TEMP[7].xyz, IN[2].xyzz, -CONST[4][19].xyzz 82: MOV TEMP[8].xyz, -TEMP[7].xyzx 83: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz 84: SQRT TEMP[8].x, TEMP[8].xxxx 85: MUL TEMP[9].x, CONST[1][2].wwww, TEMP[6].wwww 86: MUL TEMP[9].xy, TEMP[9].xxxx, TEMP[2].xyyy 87: MUL TEMP[9].xy, IMM[2].wwww, TEMP[9].xyyy 88: RCP TEMP[10].xy, TEMP[8].xxxx 89: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[5].xyyy 90: MUL TEMP[10].x, CONST[1][3].wwww, TEMP[6].wwww 91: MUL TEMP[2].xy, TEMP[10].xxxx, TEMP[2].xyyy 92: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[2].wwww 93: RCP TEMP[8].xy, TEMP[8].xxxx 94: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[8].xyyy 95: ADD TEMP[2].xy, TEMP[5].xyyy, -TEMP[2].xyyy 96: MOV TEMP[5].x, TEMP[9].xxxx 97: MAX TEMP[8].x, TEMP[9].yyyy, CONST[1][17].yyyy 98: MIN TEMP[8].x, TEMP[8].xxxx, CONST[1][17].wwww 99: MOV TEMP[5].y, TEMP[8].xxxx 100: MOV TEMP[8].x, TEMP[2].xxxx 101: MAX TEMP[2].x, TEMP[2].yyyy, CONST[1][17].yyyy 102: MIN TEMP[2].x, TEMP[2].xxxx, CONST[1][17].wwww 103: MOV TEMP[8].y, TEMP[2].xxxx 104: MOV TEMP[2].xy, TEMP[8].xyyy 105: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 106: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 107: RSQ TEMP[8].x, TEMP[8].xxxx 108: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 109: MOV TEMP[7].xyz, -TEMP[7].xyzx 110: DP3 TEMP[8].x, TEMP[4].xyzz, TEMP[7].xyzz 111: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 112: MUL TEMP[4].xyz, IMM[0].wwww, TEMP[4].xyzz 113: ADD TEMP[4].xyz, TEMP[7].xyzz, -TEMP[4].xyzz 114: DP3 TEMP[4].x, CONST[1][13].xyzz, TEMP[4].xyzz 115: MOV_SAT TEMP[4].x, TEMP[4].xxxx 116: POW TEMP[4].x, TEMP[4].xxxx, CONST[1][14].wwww 117: MUL TEMP[7].xyz, CONST[4][28].xyzz, CONST[1][14].xyzz 118: ADD TEMP[8].x, TEMP[6].wwww, IMM[8].xxxx 119: MUL TEMP[8].x, TEMP[8].xxxx, IMM[8].yyyy 120: MOV_SAT TEMP[8].x, TEMP[8].xxxx 121: MUL TEMP[8].x, CONST[1][12].wwww, TEMP[8].xxxx 122: MUL TEMP[9].x, IMM[0].wwww, TEMP[6].wwww 123: MOV_SAT TEMP[9].x, TEMP[9].xxxx 124: MOV TEMP[5].xy, TEMP[5].xyyy 125: TEX TEMP[5].xyz, TEMP[5], SAMP[4], 2D 126: MUL TEMP[10].xyz, CONST[4][28].xyzz, CONST[1][2].xyzz 127: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[10].xyzz 128: MUL TEMP[5].xyz, TEMP[9].xxxx, TEMP[5].xyzz 129: MUL TEMP[9].xyz, CONST[1][3].xyzz, TEMP[2].xyzz 130: MUL TEMP[10].x, TEMP[6].wwww, IMM[8].zzzz 131: MOV_SAT TEMP[10].x, TEMP[10].xxxx 132: LRP TEMP[2].xyz, TEMP[10].xxxx, TEMP[9].xyzz, TEMP[2].xyzz 133: LRP TEMP[2].xyz, TEMP[6].wwww, CONST[1][4].xyzz, TEMP[2].xyzz 134: MAD TEMP[2].xyz, TEMP[8].xxxx, TEMP[5].xyzz, TEMP[2].xyzz 135: MAD TEMP[2].xyz, TEMP[4].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 136: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz 137: MOV TEMP[5].w, IMM[0].zzzz 138: MOV TEMP[5].x, TEMP[2].xxxx 139: MOV TEMP[5].y, TEMP[2].yyyy 140: MOV TEMP[5].z, TEMP[2].zzzz 141: MOV TEMP[2].x, IMM[0].zzzz 142: MOV TEMP[6].z, IMM[0].zzzz 143: MOV TEMP[6].x, CONST[1][15].xxxx 144: POW TEMP[4].x, TEMP[4].xxxx, CONST[1][15].yyyy 145: MOV TEMP[6].y, TEMP[4].xxxx 146: MOV TEMP[2].yzw, TEMP[6].yxyz 147: MOV OUT[2], TEMP[1] 148: MOV OUT[0], TEMP[5] 149: MOV OUT[3], TEMP[3] 150: MOV OUT[1], TEMP[2] 151: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 40) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 44) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 56) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 60) %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 64) %38 = call float @llvm.SI.load.const(<16 x i8> %27, i32 68) %39 = call float @llvm.SI.load.const(<16 x i8> %27, i32 72) %40 = call float @llvm.SI.load.const(<16 x i8> %27, i32 152) %41 = call float @llvm.SI.load.const(<16 x i8> %27, i32 156) %42 = call float @llvm.SI.load.const(<16 x i8> %27, i32 160) %43 = call float @llvm.SI.load.const(<16 x i8> %27, i32 164) %44 = call float @llvm.SI.load.const(<16 x i8> %27, i32 168) %45 = call float @llvm.SI.load.const(<16 x i8> %27, i32 172) %46 = call float @llvm.SI.load.const(<16 x i8> %27, i32 176) %47 = call float @llvm.SI.load.const(<16 x i8> %27, i32 180) %48 = call float @llvm.SI.load.const(<16 x i8> %27, i32 184) %49 = call float @llvm.SI.load.const(<16 x i8> %27, i32 204) %50 = call float @llvm.SI.load.const(<16 x i8> %27, i32 208) %51 = call float @llvm.SI.load.const(<16 x i8> %27, i32 212) %52 = call float @llvm.SI.load.const(<16 x i8> %27, i32 216) %53 = call float @llvm.SI.load.const(<16 x i8> %27, i32 224) %54 = call float @llvm.SI.load.const(<16 x i8> %27, i32 228) %55 = call float @llvm.SI.load.const(<16 x i8> %27, i32 232) %56 = call float @llvm.SI.load.const(<16 x i8> %27, i32 236) %57 = call float @llvm.SI.load.const(<16 x i8> %27, i32 240) %58 = call float @llvm.SI.load.const(<16 x i8> %27, i32 244) %59 = call float @llvm.SI.load.const(<16 x i8> %27, i32 276) %60 = call float @llvm.SI.load.const(<16 x i8> %27, i32 284) %61 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 304) %64 = call float @llvm.SI.load.const(<16 x i8> %62, i32 308) %65 = call float @llvm.SI.load.const(<16 x i8> %62, i32 312) %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 348) %67 = call float @llvm.SI.load.const(<16 x i8> %62, i32 400) %68 = call float @llvm.SI.load.const(<16 x i8> %62, i32 404) %69 = call float @llvm.SI.load.const(<16 x i8> %62, i32 448) %70 = call float @llvm.SI.load.const(<16 x i8> %62, i32 452) %71 = call float @llvm.SI.load.const(<16 x i8> %62, i32 456) %72 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %77 = bitcast <8 x i32> addrspace(2)* %76 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %80 = bitcast <4 x i32> addrspace(2)* %79 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %89 = load <4 x i32>, <4 x i32> addrspace(2)* %88, align 16, !tbaa !0 %90 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %91 = bitcast <8 x i32> addrspace(2)* %90 to <32 x i8> addrspace(2)* %92 = load <32 x i8>, <32 x i8> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %94 = bitcast <4 x i32> addrspace(2)* %93 to <16 x i8> addrspace(2)* %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %105 = fmul float %24, %15 %106 = fadd float %105, %25 %107 = fmul float %102, %102 %108 = fmul float %103, %103 %109 = fadd float %108, %107 %110 = fmul float %104, %104 %111 = fadd float %109, %110 %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) %113 = fmul float %102, %112 %114 = fmul float %103, %112 %115 = fmul float %104, %112 %116 = fmul float %113, 5.000000e-01 %117 = fadd float %116, 5.000000e-01 %118 = fmul float %114, 5.000000e-01 %119 = fadd float %118, 5.000000e-01 %120 = fmul float %115, 5.000000e-01 %121 = fadd float %120, 5.000000e-01 %122 = fmul float %41, %98 %123 = fmul float %41, %99 %124 = fsub float -0.000000e+00, %123 %125 = fmul float %46, %122 %126 = fmul float %46, %124 %127 = fmul float %42, %96 %128 = fmul float %42, %97 %129 = bitcast float %127 to i32 %130 = bitcast float %128 to i32 %131 = insertelement <2 x i32> undef, i32 %129, i32 0 %132 = insertelement <2 x i32> %131, i32 %130, i32 1 %133 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %132, <32 x i8> %78, <16 x i8> %81, i32 2) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = fmul float %134, 2.000000e+00 %137 = fadd float %136, -1.000000e+00 %138 = fmul float %135, 2.000000e+00 %139 = fadd float %138, -1.000000e+00 %140 = fmul float %122, %43 %141 = fmul float %43, %124 %142 = bitcast float %140 to i32 %143 = bitcast float %141 to i32 %144 = insertelement <2 x i32> undef, i32 %142, i32 0 %145 = insertelement <2 x i32> %144, i32 %143, i32 1 %146 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %145, <32 x i8> %73, <16 x i8> %75, i32 2) %147 = extractelement <4 x float> %146, i32 1 %148 = fmul float %40, %66 %149 = fmul float %47, 2.000000e+00 %150 = fdiv float 1.000000e+00, %149 %151 = fmul float %148, %150 %152 = fmul float %147, %44 %153 = fadd float %152, %151 %154 = call float @llvm.AMDIL.fraction.(float %153) %155 = fadd float %153, 5.000000e-01 %156 = call float @llvm.AMDIL.fraction.(float %155) %157 = fmul float %48, %137 %158 = fmul float %48, %139 %159 = fmul float %139, %139 %160 = fmul float %137, %137 %161 = fadd float %160, %159 %162 = fadd float %161, 0x3FB99999A0000000 %163 = fmul float %162, %28 %164 = fmul float %154, 2.000000e+00 %165 = fadd float %164, -1.000000e+00 %166 = call float @fabs(float %165) %167 = call float @llvm.pow.f32(float %166, float %45) %168 = call float @floor(float %155) %169 = fmul float %168, 0x3FD3E76C80000000 %170 = fadd float %169, 5.000000e-01 %171 = fadd float %170, %125 %172 = fadd float %170, %126 %173 = fmul float %157, %156 %174 = fadd float %173, %171 %175 = fmul float %158, %156 %176 = fadd float %175, %172 %177 = bitcast float %174 to i32 %178 = bitcast float %176 to i32 %179 = insertelement <2 x i32> undef, i32 %177, i32 0 %180 = insertelement <2 x i32> %179, i32 %178, i32 1 %181 = bitcast <8 x i32> %83 to <32 x i8> %182 = bitcast <4 x i32> %85 to <16 x i8> %183 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %180, <32 x i8> %181, <16 x i8> %182, i32 2) %184 = extractelement <4 x float> %183, i32 1 %185 = extractelement <4 x float> %183, i32 3 %186 = fmul float %156, 2.000000e+00 %187 = fadd float %186, -1.000000e+00 %188 = call float @fabs(float %187) %189 = call float @llvm.pow.f32(float %188, float %45) %190 = call float @floor(float %153) %191 = fmul float %190, 0x3FD3E76C80000000 %192 = fadd float %191, %125 %193 = fadd float %191, %126 %194 = fmul float %157, %154 %195 = fadd float %194, %192 %196 = fmul float %158, %154 %197 = fadd float %196, %193 %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = insertelement <2 x i32> undef, i32 %198, i32 0 %201 = insertelement <2 x i32> %200, i32 %199, i32 1 %202 = bitcast <8 x i32> %83 to <32 x i8> %203 = bitcast <4 x i32> %85 to <16 x i8> %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %201, <32 x i8> %202, <16 x i8> %203, i32 2) %205 = extractelement <4 x float> %204, i32 1 %206 = extractelement <4 x float> %204, i32 3 %207 = fmul float %189, %206 %208 = fmul float %189, %205 %209 = fmul float %167, %185 %210 = fadd float %209, %207 %211 = fmul float %167, %184 %212 = fadd float %211, %208 %213 = fmul float %210, 2.000000e+00 %214 = fadd float %213, -1.000000e+00 %215 = fmul float %212, 2.000000e+00 %216 = fadd float %215, -1.000000e+00 %217 = fmul float %163, %214 %218 = fmul float %163, %216 %219 = fmul float %217, %217 %220 = fmul float %218, %218 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %217, %222 %224 = fmul float %218, %222 %225 = fmul float %217, %217 %226 = fmul float %218, %218 %227 = fadd float %225, %226 %228 = fcmp ogt float %227, 1.000000e+00 %. = select i1 %228, float %223, float %217 %.44 = select i1 %228, float %224, float %218 %229 = fmul float %., %. %230 = fmul float %.44, %.44 %231 = fadd float %229, %230 %232 = fsub float 1.000000e+00, %231 %233 = call float @llvm.AMDIL.clamp.(float %232, float 0.000000e+00, float 1.000000e+00) %234 = call float @llvm.sqrt.f32(float %233) %235 = fadd float %14, 5.000000e-01 %236 = fadd float %106, 5.000000e-01 %237 = fmul float %235, %67 %238 = fmul float %236, %68 %239 = bitcast float %237 to i32 %240 = bitcast float %238 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = bitcast <8 x i32> %87 to <32 x i8> %244 = bitcast <4 x i32> %89 to <16 x i8> %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2) %246 = extractelement <4 x float> %245, i32 3 %247 = fsub float %98, %63 %248 = fsub float %99, %64 %249 = fsub float %100, %65 %250 = fmul float %247, %247 %251 = fmul float %248, %248 %252 = fadd float %251, %250 %253 = fmul float %249, %249 %254 = fadd float %252, %253 %255 = call float @llvm.sqrt.f32(float %254) %256 = fmul float %32, %246 %257 = fmul float %256, %. %258 = fmul float %256, %.44 %259 = fmul float %257, 1.120000e+03 %260 = fmul float %258, 1.120000e+03 %261 = fdiv float 1.000000e+00, %255 %262 = fmul float %259, %261 %263 = fadd float %262, %237 %264 = fmul float %260, %261 %265 = fadd float %264, %238 %266 = fmul float %36, %246 %267 = fmul float %266, %. %268 = fmul float %266, %.44 %269 = fmul float %267, 1.120000e+03 %270 = fmul float %268, 1.120000e+03 %271 = fdiv float 1.000000e+00, %255 %272 = fmul float %269, %271 %273 = fmul float %270, %271 %274 = fsub float %237, %272 %275 = fsub float %238, %273 %276 = call float @llvm.maxnum.f32(float %265, float %59) %277 = call float @llvm.minnum.f32(float %276, float %60) %278 = call float @llvm.maxnum.f32(float %275, float %59) %279 = call float @llvm.minnum.f32(float %278, float %60) %280 = bitcast float %274 to i32 %281 = bitcast float %279 to i32 %282 = insertelement <2 x i32> undef, i32 %280, i32 0 %283 = insertelement <2 x i32> %282, i32 %281, i32 1 %284 = bitcast <8 x i32> %87 to <32 x i8> %285 = bitcast <4 x i32> %89 to <16 x i8> %286 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %283, <32 x i8> %284, <16 x i8> %285, i32 2) %287 = extractelement <4 x float> %286, i32 0 %288 = extractelement <4 x float> %286, i32 1 %289 = extractelement <4 x float> %286, i32 2 %290 = fmul float %247, %247 %291 = fmul float %248, %248 %292 = fadd float %291, %290 %293 = fmul float %249, %249 %294 = fadd float %292, %293 %295 = call float @llvm.AMDGPU.rsq.clamped.f32(float %294) %296 = fmul float %247, %295 %297 = fmul float %248, %295 %298 = fmul float %249, %295 %299 = fmul float %296, %. %300 = fsub float -0.000000e+00, %299 %301 = fmul float %297, %.44 %302 = fsub float %300, %301 %303 = fmul float %298, %234 %304 = fsub float %302, %303 %305 = fmul float %304, %. %306 = fmul float %304, %.44 %307 = fmul float %304, %234 %308 = fmul float %305, 2.000000e+00 %309 = fmul float %306, 2.000000e+00 %310 = fmul float %307, 2.000000e+00 %311 = fsub float -0.000000e+00, %308 %312 = fsub float %311, %296 %313 = fsub float -0.000000e+00, %309 %314 = fsub float %313, %297 %315 = fsub float -0.000000e+00, %310 %316 = fsub float %315, %298 %317 = fmul float %50, %312 %318 = fmul float %51, %314 %319 = fadd float %318, %317 %320 = fmul float %52, %316 %321 = fadd float %319, %320 %322 = call float @llvm.AMDIL.clamp.(float %321, float 0.000000e+00, float 1.000000e+00) %323 = call float @llvm.pow.f32(float %322, float %56) %324 = fmul float %69, %53 %325 = fmul float %70, %54 %326 = fmul float %71, %55 %327 = fadd float %246, 0xBFA99999A0000000 %328 = fmul float %327, 2.000000e+01 %329 = call float @llvm.AMDIL.clamp.(float %328, float 0.000000e+00, float 1.000000e+00) %330 = fmul float %49, %329 %331 = fmul float %246, 2.000000e+00 %332 = call float @llvm.AMDIL.clamp.(float %331, float 0.000000e+00, float 1.000000e+00) %333 = bitcast float %263 to i32 %334 = bitcast float %277 to i32 %335 = insertelement <2 x i32> undef, i32 %333, i32 0 %336 = insertelement <2 x i32> %335, i32 %334, i32 1 %337 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %336, <32 x i8> %92, <16 x i8> %95, i32 2) %338 = extractelement <4 x float> %337, i32 0 %339 = extractelement <4 x float> %337, i32 1 %340 = extractelement <4 x float> %337, i32 2 %341 = fmul float %69, %29 %342 = fmul float %70, %30 %343 = fmul float %71, %31 %344 = fmul float %338, %341 %345 = fmul float %339, %342 %346 = fmul float %340, %343 %347 = fmul float %332, %344 %348 = fmul float %332, %345 %349 = fmul float %332, %346 %350 = fmul float %33, %287 %351 = fmul float %34, %288 %352 = fmul float %35, %289 %353 = fmul float %246, 3.500000e+00 %354 = call float @llvm.AMDIL.clamp.(float %353, float 0.000000e+00, float 1.000000e+00) %355 = call float @llvm.AMDGPU.lrp(float %354, float %350, float %287) %356 = call float @llvm.AMDGPU.lrp(float %354, float %351, float %288) %357 = call float @llvm.AMDGPU.lrp(float %354, float %352, float %289) %358 = call float @llvm.AMDGPU.lrp(float %246, float %37, float %355) %359 = call float @llvm.AMDGPU.lrp(float %246, float %38, float %356) %360 = call float @llvm.AMDGPU.lrp(float %246, float %39, float %357) %361 = fmul float %330, %347 %362 = fadd float %361, %358 %363 = fmul float %330, %348 %364 = fadd float %363, %359 %365 = fmul float %330, %349 %366 = fadd float %365, %360 %367 = fmul float %323, %324 %368 = fadd float %367, %362 %369 = fmul float %323, %325 %370 = fadd float %369, %364 %371 = fmul float %323, %326 %372 = fadd float %371, %366 %373 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %374 = call float @llvm.AMDIL.clamp.(float %370, float 0.000000e+00, float 1.000000e+00) %375 = call float @llvm.AMDIL.clamp.(float %372, float 0.000000e+00, float 1.000000e+00) %376 = call float @llvm.pow.f32(float %323, float %58) %377 = call i32 @llvm.SI.packf16(float %373, float %374) %378 = bitcast i32 %377 to float %379 = call i32 @llvm.SI.packf16(float %375, float 0.000000e+00) %380 = bitcast i32 %379 to float %381 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %57) %382 = bitcast i32 %381 to float %383 = call i32 @llvm.SI.packf16(float %376, float 0.000000e+00) %384 = bitcast i32 %383 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %378, float %380, float %378, float %380) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %382, float %384, float %382, float %384) %385 = call i32 @llvm.SI.packf16(float %117, float %119) %386 = bitcast i32 %385 to float %387 = call i32 @llvm.SI.packf16(float %121, float 0.000000e+00) %388 = bitcast i32 %387 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %101, float %101, float %101, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %386, float %388, float %386, float %388) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v0, v0, 2, 2, [m0] ; C8000A00 v_interp_p2_f32 v0, [v0], v1, 2, 2, [m0] ; C8010A01 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[20:23], s[2:3], 0x10 ; C08A0310 s_load_dwordx4 s[56:59], s[4:5], 0x0 ; C09C0500 s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[72:79], s[6:7], 0x8 ; C0E40708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[8:11], 0x29 ; C2020929 s_buffer_load_dword s5, s[8:11], 0x2a ; C202892A s_buffer_load_dword s80, s[8:11], 0x2b ; C228092B s_buffer_load_dword s81, s[8:11], 0x2c ; C228892C s_buffer_load_dword s82, s[8:11], 0x2d ; C229092D s_buffer_load_dword s83, s[8:11], 0x2e ; C229892E s_buffer_load_dword s0, s[8:11], 0x33 ; C2000933 s_buffer_load_dword s3, s[8:11], 0x34 ; C2018934 s_buffer_load_dword s2, s[8:11], 0x35 ; C2010935 s_buffer_load_dword s1, s[8:11], 0x36 ; C2008936 s_buffer_load_dword s24, s[8:11], 0x28 ; C20C0928 s_buffer_load_dword s84, s[8:11], 0x27 ; C22A0927 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s24, v4 ; 10180818 v_mul_f32_e32 v13, s24, v5 ; 101A0A18 s_buffer_load_dword s85, s[8:11], 0x26 ; C22A8926 s_buffer_load_dword s86, s[20:23], 0x57 ; C22B1557 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[72:79], s[68:71] ; F0800300 0232040C v_mul_f32_e32 v1, s84, v6 ; 10020C54 v_mul_f32_e32 v12, s84, v7 ; 10180E54 v_mul_f32_e32 v13, s4, v1 ; 101A0204 v_mul_f32_e64 v14, s4, -v12 ; D210000E 40021804 v_add_f32_e64 v15, s82, s82 ; D206000F 0000A452 v_rcp_f32_e32 v15, v15 ; 7E1E550F image_sample v13, 2, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[60:67], s[56:59] ; F0800200 01CF0D0D s_buffer_load_dword s4, s[20:23], 0x64 ; C2021564 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s86 ; 7E1C0256 v_mul_f32_e32 v14, s85, v14 ; 101C1C55 v_mul_f32_e32 v16, v15, v14 ; 10201D0F s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v16, s5, v13 ; 3E201A05 v_floor_f32_e32 v16, v16 ; 7E204910 v_mad_f32 v17, v14, v15, -v16 ; D2820011 84421F0E v_mad_f32 v14, v14, v15, 0.5 ; D282000E 03C21F0E v_mac_f32_e32 v17, s5, v13 ; 3E221A05 v_mac_f32_e32 v14, s5, v13 ; 3E1C1A05 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mad_f32 v5, 2.0, v5, -1.0 ; D2820005 03CE0AF4 v_mul_f32_e32 v13, s83, v4 ; 101A0853 v_mul_f32_e32 v15, s83, v5 ; 101E0A53 v_mul_f32_e32 v18, s81, v12 ; 10241851 v_floor_f32_e32 v19, v14 ; 7E26490E v_mov_b32_e32 v20, 0x3e9f3b64 ; 7E2802FF 3E9F3B64 v_mad_f32 v21, v19, v20, 0.5 ; D2820015 03C22913 v_mul_f32_e32 v22, s81, v1 ; 102C0251 v_mad_f32 v24, s81, v1, v21 ; D2820018 04560251 v_mad_f32 v25, s81, -v12, v21 ; D2820019 44561851 v_mac_f32_e32 v22, v20, v16 ; 3E2C2114 v_mad_f32 v23, v16, v20, -v18 ; D2820017 844A2910 v_subrev_f32_e32 v1, v19, v14 ; 0A021D13 v_mac_f32_e32 v24, v1, v13 ; 3E301B01 v_mac_f32_e32 v22, v17, v13 ; 3E2C1B11 v_mac_f32_e32 v25, v1, v15 ; 3E321F01 v_mac_f32_e32 v23, v17, v15 ; 3E2E1F11 image_sample v[12:13], 10, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[40:47], s[36:39] ; F0800A00 012A0C18 image_sample v[14:15], 10, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800A00 012A0E16 s_buffer_load_dword s5, s[32:35], 0x14 ; C202A114 s_buffer_load_dword s6, s[32:35], 0x15 ; C2032115 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_and_b32_e32 v1, v1, v16 ; 36022101 v_log_f32_e32 v1, v1 ; 7E024F01 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_and_b32_e32 v16, v17, v16 ; 36202111 v_log_f32_e32 v16, v16 ; 7E204F10 v_mul_legacy_f32_e32 v1, s80, v1 ; 0E020250 v_exp_f32_e32 v1, v1 ; 7E024B01 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, v15, v1 ; 101E030F v_mul_f32_e32 v1, v14, v1 ; 1002030E v_mul_legacy_f32_e32 v14, s80, v16 ; 0E1C2050 s_buffer_load_dword s7, s[8:11], 0x0 ; C2038900 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mac_f32_e32 v15, v13, v14 ; 3E1E1D0D v_mac_f32_e32 v1, v12, v14 ; 3E021D0C v_madak_f32_e32 v5, v5, v5, 0x3dcccccd ; 420A0B05 3DCCCCCD v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 s_buffer_load_dword s32, s[8:11], 0x8 ; C2100908 s_buffer_load_dword s33, s[8:11], 0x9 ; C2108909 s_buffer_load_dword s34, s[8:11], 0xa ; C211090A s_buffer_load_dword s35, s[8:11], 0xb ; C211890B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s7, v5 ; 10080A07 v_mad_f32 v5, 2.0, v15, -1.0 ; D2820005 03CE1EF4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v5, v5, v4 ; 100A0905 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v12, v4 ; 7E185904 s_buffer_load_dword s7, s[20:23], 0x4c ; C203954C s_buffer_load_dword s36, s[20:23], 0x4d ; C212154D s_buffer_load_dword s37, s[20:23], 0x4e ; C212954E s_buffer_load_dword s38, s[20:23], 0x65 ; C2131565 s_buffer_load_dword s39, s[20:23], 0x70 ; C2139570 s_buffer_load_dword s40, s[20:23], 0x71 ; C2141571 s_buffer_load_dword s20, s[20:23], 0x72 ; C20A1572 v_mul_f32_e32 v13, v12, v5 ; 101A0B0C v_cmp_lt_f32_e32 vcc, 1.0, v4 ; 7C0208F2 v_cndmask_b32_e32 v4, v5, v13 ; 00081B05 v_mul_f32_e32 v5, v12, v1 ; 100A030C v_cndmask_b32_e32 v1, v1, v5 ; 00020B01 v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mac_f32_e32 v5, s5, v3 ; 3E0A0605 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s7, v6 ; 0A060C07 v_subrev_f32_e32 v6, s36, v7 ; 0A0C0E24 s_buffer_load_dword s5, s[8:11], 0xf ; C202890F v_subrev_f32_e32 v7, s37, v8 ; 0A0E1025 v_add_f32_e32 v5, 0.5, v5 ; 060A0AF0 v_mul_f32_e32 v12, s4, v2 ; 10180404 v_mul_f32_e32 v13, s38, v5 ; 101A0A26 v_mul_f32_e32 v8, v3, v3 ; 10100703 v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 image_sample v14, 8, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[16:19] ; F0800800 008C0E0C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v15, s5, v14 ; 101E1C05 s_buffer_load_dword s5, s[8:11], 0x45 ; C2028945 s_buffer_load_dword s7, s[8:11], 0x47 ; C2038947 v_rsq_f32_e32 v16, v8 ; 7E205D08 v_mov_b32_e32 v17, 0x448c0000 ; 7E2202FF 448C0000 v_mul_f32_e32 v18, v1, v15 ; 10241F01 v_mul_f32_e32 v18, v17, v18 ; 10242511 v_mul_f32_e32 v18, v16, v18 ; 10242510 v_mad_f32 v5, v5, s38, -v18 ; D2820005 84484D05 v_mul_f32_e32 v15, v4, v15 ; 101E1F04 v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_mad_f32 v18, v2, s4, -v15 ; D2820012 843C0902 s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v2, s5, v5 ; 20040A05 v_min_f32_e32 v19, s7, v2 ; 1E260407 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[48:55], s[16:19] ; F0800700 008C1212 v_mul_f32_e32 v2, s35, v14 ; 10041C23 v_mul_f32_e32 v5, v4, v2 ; 100A0504 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mac_f32_e32 v12, v16, v5 ; 3E180B10 v_mac_f32_e32 v13, v16, v2 ; 3E1A0510 v_max_f32_e32 v2, s5, v13 ; 20041A05 v_min_f32_e32 v13, s7, v2 ; 1E1A0407 s_buffer_load_dword s4, s[8:11], 0xc ; C202090C image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[24:31], s[12:15] ; F0800700 00660F0C v_mov_b32_e32 v2, s32 ; 7E040220 v_mul_f32_e32 v2, s39, v2 ; 10040427 v_mov_b32_e32 v5, s33 ; 7E0A0221 v_mul_f32_e32 v5, s40, v5 ; 100A0A28 v_mov_b32_e32 v12, s34 ; 7E180222 v_mul_f32_e32 v12, s20, v12 ; 10181814 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, v2, v15 ; 10041F02 v_mul_f32_e32 v5, v5, v16 ; 100A2105 v_mul_f32_e32 v12, v12, v17 ; 1018230C s_buffer_load_dword s5, s[8:11], 0xd ; C202890D s_buffer_load_dword s7, s[8:11], 0xe ; C203890E v_mul_f32_e32 v13, s4, v18 ; 101A2404 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v15, 0x40600000, v14 ; 101E1CFF 40600000 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_sub_f32_e32 v16, 1.0, v15 ; 08201EF2 v_mul_f32_e32 v17, v18, v16 ; 10222112 v_mul_f32_e32 v18, v19, v16 ; 10242113 v_mul_f32_e32 v16, v20, v16 ; 10202114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s5, v19 ; 10262605 v_mul_f32_e32 v20, s7, v20 ; 10282807 v_mul_f32_e32 v21, v8, v3 ; 102A0708 v_mul_f32_e32 v21, v4, v21 ; 102A2B04 v_mul_f32_e32 v22, v8, v6 ; 102C0D08 v_mad_f32 v21, -v22, v1, -v21 ; D2820015 A4560316 v_mad_f32 v22, -v1, v1, 1.0 ; D2820016 23CA0301 v_mad_f32 v22, -v4, v4, v22 ; D2820016 245A0904 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_sqrt_f32_e32 v22, v22 ; 7E2C6716 v_mul_f32_e32 v23, v8, v7 ; 102E0F08 v_mad_f32 v21, -v23, v22, v21 ; D2820015 24562D17 v_mul_f32_e32 v23, v4, v21 ; 102E2B04 v_mac_f32_e32 v23, v4, v21 ; 3E2E2B04 v_mul_f32_e32 v4, v1, v21 ; 10082B01 v_mac_f32_e32 v4, v1, v21 ; 3E082B01 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mac_f32_e32 v1, v22, v21 ; 3E022B16 v_mad_f32 v3, -v3, v8, -v23 ; D2820003 A45E1103 v_mad_f32 v4, -v6, v8, -v4 ; D2820004 A4121106 v_mad_f32 v1, -v7, v8, -v1 ; D2820001 A4061107 v_mul_f32_e32 v3, s3, v3 ; 10060603 v_mac_f32_e32 v3, s2, v4 ; 3E060802 s_buffer_load_dword s2, s[8:11], 0x11 ; C2010911 v_mac_f32_e32 v17, v13, v15 ; 3E221F0D s_buffer_load_dword s3, s[8:11], 0x12 ; C2018912 v_mac_f32_e32 v18, v19, v15 ; 3E241F13 v_mac_f32_e32 v16, v20, v15 ; 3E201F14 v_mac_f32_e32 v3, s1, v1 ; 3E060201 v_sub_f32_e32 v1, 1.0, v14 ; 08021CF2 v_mul_f32_e32 v4, v17, v1 ; 10080311 v_mac_f32_e32 v4, s6, v14 ; 3E081C06 v_mul_f32_e32 v6, v18, v1 ; 100C0312 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v6, s2, v14 ; 3E0C1C02 v_mul_f32_e32 v1, v16, v1 ; 10020310 v_mac_f32_e32 v1, s3, v14 ; 3E021C03 v_mov_b32_e32 v7, 0xbd4ccccd ; 7E0E02FF BD4CCCCD v_add_f32_e32 v7, v14, v7 ; 060E0F0E v_add_f32_e32 v8, v14, v14 ; 06101D0E v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mul_f32_e32 v5, v5, v8 ; 100A1105 v_mul_f32_e32 v8, v12, v8 ; 1010110C v_mul_f32_e32 v7, 0x41a00000, v7 ; 100E0EFF 41A00000 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 s_buffer_load_dword s1, s[8:11], 0x3b ; C200893B s_buffer_load_dword s2, s[8:11], 0x38 ; C2010938 s_buffer_load_dword s3, s[8:11], 0x39 ; C2018939 v_mul_f32_e32 v7, s0, v7 ; 100E0E00 v_mac_f32_e32 v4, v2, v7 ; 3E080F02 v_add_f32_e64 v2, 0, v3 clamp ; D2060802 00020680 s_buffer_load_dword s0, s[8:11], 0x3a ; C200093A v_log_f32_e32 v2, v2 ; 7E044F02 v_mac_f32_e32 v6, v5, v7 ; 3E0C0F05 v_mac_f32_e32 v1, v8, v7 ; 3E020F08 s_buffer_load_dword s4, s[8:11], 0x3c ; C202093C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v2, s1, v2 ; 0E040401 v_mov_b32_e32 v3, s2 ; 7E060202 v_mul_f32_e32 v3, s39, v3 ; 10060627 v_mov_b32_e32 v5, s3 ; 7E0A0203 v_mul_f32_e32 v5, s40, v5 ; 100A0A28 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mac_f32_e32 v4, v3, v2 ; 3E080503 v_mac_f32_e32 v6, v5, v2 ; 3E0C0505 v_mov_b32_e32 v3, s0 ; 7E060200 v_mul_f32_e32 v3, s20, v3 ; 10060614 v_mac_f32_e32 v1, v3, v2 ; 3E020503 s_buffer_load_dword s0, s[8:11], 0x3d ; C200093D v_add_f32_e64 v3, 0, v4 clamp ; D2060803 00020880 v_add_f32_e64 v4, 0, v6 clamp ; D2060804 00020C80 v_cvt_pkrtz_f16_f32_e32 v3, v3, v4 ; 5E060903 v_mul_f32_e32 v4, v10, v10 ; 1008150A v_mac_f32_e32 v4, v11, v11 ; 3E08170B v_mac_f32_e32 v4, v0, v0 ; 3E080100 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 0, 0, v3, v1, v3, v1 ; F800040F 01030103 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, v4, v10 ; 10021504 v_mul_f32_e32 v3, v4, v11 ; 10061704 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_log_f32_e32 v2, v2 ; 7E044F02 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mul_legacy_f32_e32 v2, s0, v2 ; 0E040400 v_exp_f32_e32 v2, v2 ; 7E044B02 v_cvt_pkrtz_f16_f32_e64 v4, 0, s4 ; D25E0004 00000880 v_cvt_pkrtz_f16_f32_e64 v2, v2, 0 ; D25E0002 00010102 exp 15, 1, 1, 0, 0, v4, v2, v4, v2 ; F800041F 02040204 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 2, 0, 0, 0, v9, v9, v9, v2 ; F800002F 02090909 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 28 Code Size: 1436 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..4] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 32, 16, 48} IMM[1] FLT32 { 0.5000, 0.0000, 65535.0000, 0.0039} IMM[2] UINT32 {64, 1, 368, 0} IMM[3] FLT32 { 0.0039, 256.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xy, TEMP[0], SAMP[0], 2D 2: MAD TEMP[1].xy, CONST[1][2].ywww, IMM[1].xxxx, IN[0].xyyy 3: MOV TEMP[2].x, IMM[1].yyyy 4: MOV TEMP[2].y, CONST[1][1].wwww 5: ADD TEMP[2].xy, TEMP[2].xyyy, TEMP[1].xyyy 6: MOV TEMP[2].xy, TEMP[2].xyyy 7: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D 8: MOV TEMP[2].x, TEMP[2].xxxx 9: ADD TEMP[3].xy, CONST[1][1].xwww, TEMP[1].xyyy 10: MOV TEMP[3].xy, TEMP[3].xyyy 11: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 12: MOV TEMP[2].y, TEMP[3].xxxx 13: ADD TEMP[3].xy, CONST[1][1].ywww, TEMP[1].xyyy 14: MOV TEMP[3].xy, TEMP[3].xyyy 15: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 16: MOV TEMP[2].z, TEMP[3].xxxx 17: ADD TEMP[3].xy, CONST[1][1].zwww, TEMP[1].xyyy 18: MOV TEMP[3].xy, TEMP[3].xyyy 19: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 20: MOV TEMP[2].w, TEMP[3].xxxx 21: MOV TEMP[3].x, IMM[1].yyyy 22: MOV TEMP[3].y, CONST[1][2].wwww 23: ADD TEMP[3].xy, TEMP[3].xyyy, TEMP[1].xyyy 24: MOV TEMP[3].xy, TEMP[3].xyyy 25: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 26: MOV TEMP[3].x, TEMP[3].xxxx 27: ADD TEMP[4].xy, CONST[1][2].xwww, TEMP[1].xyyy 28: MOV TEMP[4].xy, TEMP[4].xyyy 29: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 30: MOV TEMP[3].y, TEMP[4].xxxx 31: ADD TEMP[4].xy, CONST[1][2].ywww, TEMP[1].xyyy 32: MOV TEMP[4].xy, TEMP[4].xyyy 33: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 34: MOV TEMP[3].z, TEMP[4].xxxx 35: ADD TEMP[4].xy, CONST[1][2].zwww, TEMP[1].xyyy 36: MOV TEMP[4].xy, TEMP[4].xyyy 37: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 38: MOV TEMP[3].w, TEMP[4].xxxx 39: MOV TEMP[4].xy, TEMP[1].xyyy 40: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 41: MOV TEMP[4].x, TEMP[4].xxxx 42: ADD TEMP[5].xy, CONST[1][3].xwww, TEMP[1].xyyy 43: MOV TEMP[5].xy, TEMP[5].xyyy 44: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 45: MOV TEMP[4].y, TEMP[5].xxxx 46: ADD TEMP[5].xy, CONST[1][3].ywww, TEMP[1].xyyy 47: MOV TEMP[5].xy, TEMP[5].xyyy 48: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 49: MOV TEMP[4].z, TEMP[5].xxxx 50: ADD TEMP[5].xy, CONST[1][3].zwww, TEMP[1].xyyy 51: MOV TEMP[5].xy, TEMP[5].xyyy 52: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 53: MOV TEMP[4].w, TEMP[5].xxxx 54: MOV TEMP[5].x, IMM[1].yyyy 55: MOV TEMP[5].y, CONST[1][4].wwww 56: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[1].xyyy 57: MOV TEMP[5].xy, TEMP[5].xyyy 58: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 59: MOV TEMP[5].x, TEMP[5].xxxx 60: ADD TEMP[6].xy, CONST[1][4].xwww, TEMP[1].xyyy 61: MOV TEMP[6].xy, TEMP[6].xyyy 62: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 63: MOV TEMP[5].y, TEMP[6].xxxx 64: ADD TEMP[6].xy, CONST[1][4].ywww, TEMP[1].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 67: MOV TEMP[5].z, TEMP[6].xxxx 68: ADD TEMP[1].xy, CONST[1][4].zwww, TEMP[1].xyyy 69: MOV TEMP[1].xy, TEMP[1].xyyy 70: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 71: MOV TEMP[5].w, TEMP[1].xxxx 72: MIN TEMP[1], TEMP[2], TEMP[3] 73: MIN TEMP[2], TEMP[4], TEMP[5] 74: MIN TEMP[1], TEMP[1], TEMP[2] 75: MIN TEMP[1].xy, TEMP[1].xyyy, TEMP[1].zwww 76: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy 77: RCP TEMP[2].x, CONST[2][23].xxxx 78: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 79: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 80: FLR TEMP[1].x, TEMP[1].xxxx 81: MUL TEMP[2].x, TEMP[1].xxxx, IMM[1].wwww 82: FLR TEMP[2].x, TEMP[2].xxxx 83: MUL TEMP[3].x, TEMP[2].xxxx, IMM[3].xxxx 84: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 85: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx 86: MUL TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx 87: MOV TEMP[3].y, TEMP[1].xxxx 88: MOV TEMP[3].z, TEMP[0].xxxx 89: MOV TEMP[3].w, TEMP[0].yyyy 90: MOV OUT[0], TEMP[3] 91: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %40 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = call float @llvm.SI.load.const(<16 x i8> %41, i32 368) %43 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %53 = bitcast float %51 to i32 %54 = bitcast float %52 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %44, <16 x i8> %46, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = fmul float %29, 5.000000e-01 %61 = fadd float %60, %51 %62 = fmul float %31, 5.000000e-01 %63 = fadd float %62, %52 %64 = fadd float %61, 0.000000e+00 %65 = fadd float %27, %63 %66 = bitcast float %64 to i32 %67 = bitcast float %65 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = bitcast <8 x i32> %48 to <32 x i8> %71 = bitcast <4 x i32> %50 to <16 x i8> %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %70, <16 x i8> %71, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = fadd float %24, %61 %75 = fadd float %27, %63 %76 = bitcast float %74 to i32 %77 = bitcast float %75 to i32 %78 = insertelement <2 x i32> undef, i32 %76, i32 0 %79 = insertelement <2 x i32> %78, i32 %77, i32 1 %80 = bitcast <8 x i32> %48 to <32 x i8> %81 = bitcast <4 x i32> %50 to <16 x i8> %82 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %79, <32 x i8> %80, <16 x i8> %81, i32 2) %83 = extractelement <4 x float> %82, i32 0 %84 = fadd float %25, %61 %85 = fadd float %27, %63 %86 = bitcast float %84 to i32 %87 = bitcast float %85 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = bitcast <8 x i32> %48 to <32 x i8> %91 = bitcast <4 x i32> %50 to <16 x i8> %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = fadd float %26, %61 %95 = fadd float %27, %63 %96 = bitcast float %94 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <2 x i32> undef, i32 %96, i32 0 %99 = insertelement <2 x i32> %98, i32 %97, i32 1 %100 = bitcast <8 x i32> %48 to <32 x i8> %101 = bitcast <4 x i32> %50 to <16 x i8> %102 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %100, <16 x i8> %101, i32 2) %103 = extractelement <4 x float> %102, i32 0 %104 = fadd float %61, 0.000000e+00 %105 = fadd float %31, %63 %106 = bitcast float %104 to i32 %107 = bitcast float %105 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = bitcast <8 x i32> %48 to <32 x i8> %111 = bitcast <4 x i32> %50 to <16 x i8> %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %110, <16 x i8> %111, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fadd float %28, %61 %115 = fadd float %31, %63 %116 = bitcast float %114 to i32 %117 = bitcast float %115 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %48 to <32 x i8> %121 = bitcast <4 x i32> %50 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = fadd float %29, %61 %125 = fadd float %31, %63 %126 = bitcast float %124 to i32 %127 = bitcast float %125 to i32 %128 = insertelement <2 x i32> undef, i32 %126, i32 0 %129 = insertelement <2 x i32> %128, i32 %127, i32 1 %130 = bitcast <8 x i32> %48 to <32 x i8> %131 = bitcast <4 x i32> %50 to <16 x i8> %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %129, <32 x i8> %130, <16 x i8> %131, i32 2) %133 = extractelement <4 x float> %132, i32 0 %134 = fadd float %30, %61 %135 = fadd float %31, %63 %136 = bitcast float %134 to i32 %137 = bitcast float %135 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %48 to <32 x i8> %141 = bitcast <4 x i32> %50 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = bitcast float %61 to i32 %145 = bitcast float %63 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = bitcast <8 x i32> %48 to <32 x i8> %149 = bitcast <4 x i32> %50 to <16 x i8> %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %148, <16 x i8> %149, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = fadd float %32, %61 %153 = fadd float %35, %63 %154 = bitcast float %152 to i32 %155 = bitcast float %153 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = bitcast <8 x i32> %48 to <32 x i8> %159 = bitcast <4 x i32> %50 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = fadd float %33, %61 %163 = fadd float %35, %63 %164 = bitcast float %162 to i32 %165 = bitcast float %163 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = bitcast <8 x i32> %48 to <32 x i8> %169 = bitcast <4 x i32> %50 to <16 x i8> %170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %168, <16 x i8> %169, i32 2) %171 = extractelement <4 x float> %170, i32 0 %172 = fadd float %34, %61 %173 = fadd float %35, %63 %174 = bitcast float %172 to i32 %175 = bitcast float %173 to i32 %176 = insertelement <2 x i32> undef, i32 %174, i32 0 %177 = insertelement <2 x i32> %176, i32 %175, i32 1 %178 = bitcast <8 x i32> %48 to <32 x i8> %179 = bitcast <4 x i32> %50 to <16 x i8> %180 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %177, <32 x i8> %178, <16 x i8> %179, i32 2) %181 = extractelement <4 x float> %180, i32 0 %182 = fadd float %61, 0.000000e+00 %183 = fadd float %39, %63 %184 = bitcast float %182 to i32 %185 = bitcast float %183 to i32 %186 = insertelement <2 x i32> undef, i32 %184, i32 0 %187 = insertelement <2 x i32> %186, i32 %185, i32 1 %188 = bitcast <8 x i32> %48 to <32 x i8> %189 = bitcast <4 x i32> %50 to <16 x i8> %190 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = fadd float %36, %61 %193 = fadd float %39, %63 %194 = bitcast float %192 to i32 %195 = bitcast float %193 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %48 to <32 x i8> %199 = bitcast <4 x i32> %50 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = fadd float %37, %61 %203 = fadd float %39, %63 %204 = bitcast float %202 to i32 %205 = bitcast float %203 to i32 %206 = insertelement <2 x i32> undef, i32 %204, i32 0 %207 = insertelement <2 x i32> %206, i32 %205, i32 1 %208 = bitcast <8 x i32> %48 to <32 x i8> %209 = bitcast <4 x i32> %50 to <16 x i8> %210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %207, <32 x i8> %208, <16 x i8> %209, i32 2) %211 = extractelement <4 x float> %210, i32 0 %212 = fadd float %38, %61 %213 = fadd float %39, %63 %214 = bitcast float %212 to i32 %215 = bitcast float %213 to i32 %216 = insertelement <2 x i32> undef, i32 %214, i32 0 %217 = insertelement <2 x i32> %216, i32 %215, i32 1 %218 = bitcast <8 x i32> %48 to <32 x i8> %219 = bitcast <4 x i32> %50 to <16 x i8> %220 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %217, <32 x i8> %218, <16 x i8> %219, i32 2) %221 = extractelement <4 x float> %220, i32 0 %222 = call float @llvm.minnum.f32(float %73, float %113) %223 = call float @llvm.minnum.f32(float %83, float %123) %224 = call float @llvm.minnum.f32(float %93, float %133) %225 = call float @llvm.minnum.f32(float %103, float %143) %226 = call float @llvm.minnum.f32(float %151, float %191) %227 = call float @llvm.minnum.f32(float %161, float %201) %228 = call float @llvm.minnum.f32(float %171, float %211) %229 = call float @llvm.minnum.f32(float %181, float %221) %230 = call float @llvm.minnum.f32(float %222, float %226) %231 = call float @llvm.minnum.f32(float %223, float %227) %232 = call float @llvm.minnum.f32(float %224, float %228) %233 = call float @llvm.minnum.f32(float %225, float %229) %234 = call float @llvm.minnum.f32(float %230, float %232) %235 = call float @llvm.minnum.f32(float %231, float %233) %236 = call float @llvm.minnum.f32(float %234, float %235) %237 = fdiv float 1.000000e+00, %42 %238 = fmul float %236, %237 %239 = fmul float %238, 6.553500e+04 %240 = call float @floor(float %239) %241 = fmul float %240, 3.906250e-03 %242 = call float @floor(float %241) %243 = fmul float %242, 0x3F70101020000000 %244 = fmul float %242, 2.560000e+02 %245 = fsub float %240, %244 %246 = fmul float %245, 0x3F70101020000000 %247 = call i32 @llvm.SI.packf16(float %243, float %246) %248 = bitcast i32 %247 to float %249 = call i32 @llvm.SI.packf16(float %58, float %59) %250 = bitcast i32 %249 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %248, float %250, float %248, float %250) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[32:35], s[2:3], 0x4 ; C0900304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[28:31] ; F0800300 00E50002 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_buffer_load_dword s4, s[32:35], 0x4 ; C2022104 s_buffer_load_dword s5, s[32:35], 0x5 ; C202A105 s_buffer_load_dword s6, s[32:35], 0x6 ; C2032106 s_buffer_load_dword s7, s[32:35], 0x7 ; C203A107 s_buffer_load_dword s20, s[32:35], 0x8 ; C20A2108 s_buffer_load_dword s21, s[32:35], 0x9 ; C20AA109 s_buffer_load_dword s22, s[32:35], 0xa ; C20B210A s_buffer_load_dword s23, s[32:35], 0xb ; C20BA10B s_buffer_load_dword s24, s[32:35], 0xc ; C20C210C s_buffer_load_dword s25, s[32:35], 0xd ; C20CA10D s_buffer_load_dword s26, s[32:35], 0xe ; C20D210E s_buffer_load_dword s27, s[32:35], 0xf ; C20DA10F s_buffer_load_dword s28, s[32:35], 0x10 ; C20E2110 s_buffer_load_dword s29, s[32:35], 0x11 ; C20EA111 s_buffer_load_dword s30, s[32:35], 0x12 ; C20F2112 s_buffer_load_dword s31, s[32:35], 0x13 ; C20FA113 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mac_f32_e64 v2, 0.5, s21 ; D23E0002 00002AF0 v_mac_f32_e64 v3, 0.5, s23 ; D23E0003 00002EF0 v_add_f32_e32 v4, 0, v2 ; 06080480 v_add_f32_e32 v5, s7, v3 ; 060A0607 v_add_f32_e32 v6, s4, v2 ; 060C0404 s_buffer_load_dword s0, s[0:3], 0x5c ; C200015C v_mov_b32_e32 v7, v5 ; 7E0E0305 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430804 v_add_f32_e32 v9, s5, v2 ; 06120405 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800100 00430606 v_mov_b32_e32 v10, v5 ; 7E140305 v_add_f32_e32 v11, s6, v2 ; 06160406 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430709 v_mov_b32_e32 v12, v5 ; 7E180305 v_add_f32_e32 v10, s23, v3 ; 06140617 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[12:19], s[8:11] ; F0800100 00430B0B v_mov_b32_e32 v5, v10 ; 7E0A030A v_add_f32_e32 v9, s20, v2 ; 06120414 image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430C04 image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430D09 v_add_f32_e32 v9, s21, v2 ; 06120415 image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430E09 v_add_f32_e32 v9, s22, v2 ; 06120416 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430909 v_add_f32_e32 v15, s24, v2 ; 061E0418 v_add_f32_e32 v16, s27, v3 ; 0620061B image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00430A02 image_sample v17, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800100 0043110F v_add_f32_e32 v15, s25, v2 ; 061E0419 image_sample v18, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800100 0043120F v_add_f32_e32 v15, s26, v2 ; 061E041A image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800100 00430F0F v_add_f32_e32 v5, s31, v3 ; 060A061F image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430304 v_add_f32_e32 v4, s28, v2 ; 0608041C image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00431004 v_add_f32_e32 v4, s29, v2 ; 0608041D image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00431304 v_add_f32_e32 v4, s30, v2 ; 0608041E image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430204 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_min_f32_e32 v3, v3, v10 ; 1E061503 v_min3_f32 v3, v8, v12, v3 ; D2A20003 040E1908 s_waitcnt vmcnt(1) ; BF8C0771 v_min_f32_e32 v4, v19, v18 ; 1E082513 v_min3_f32 v4, v7, v14, v4 ; D2A20004 04121D07 s_waitcnt vmcnt(0) ; BF8C0770 v_min_f32_e32 v2, v2, v15 ; 1E041F02 v_min3_f32 v2, v11, v9, v2 ; D2A20002 040A130B v_min_f32_e32 v5, v13, v6 ; 1E0A0D0D v_rcp_f32_e32 v6, s0 ; 7E0C5400 v_min_f32_e32 v7, v16, v17 ; 1E0E2310 v_min3_f32 v2, v5, v7, v2 ; D2A20002 040A0F05 v_min3_f32 v2, v3, v4, v2 ; D2A20002 040A0903 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v2, 0x477fff00, v2 ; 100404FF 477FFF00 v_floor_f32_e32 v2, v2 ; 7E044902 v_mul_f32_e32 v3, 0x3b800000, v2 ; 100604FF 3B800000 v_floor_f32_e32 v3, v3 ; 7E064903 v_mov_b32_e32 v4, 0x3b808081 ; 7E0802FF 3B808081 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_madmk_f32_e32 v2, v3, v2, 0xc3800000 ; 40040503 C3800000 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_cvt_pkrtz_f16_f32_e32 v2, v5, v2 ; 5E040505 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 20 Code Size: 508 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[4] DCL CONST[1][0..3] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0] DCL TEMP[1..17], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, 2.0000, 0.2500} IMM[1] FLT32 { -1.0000, 0.6152, 0.0992, 0.4686} IMM[2] UINT32 {1, 304, 0, 16} IMM[3] UINT32 {320, 368, 20, 12} IMM[4] UINT32 {28, 44, 60, 32} IMM[5] UINT32 {4, 36, 48, 52} IMM[6] FLT32 { 1.0000, 0.9961, 0.0039, -0.0015} IMM[7] FLT32 { -0.2467, 0.4956, 0.6367, 0.0625} IMM[8] FLT32 { -0.3027, 0.1974, 0.9290, 65535.0000} IMM[9] FLT32 { 0.3298, -0.2439, 0.4536, 0.0039} IMM[10] FLT32 { 0.2288, 0.2897, 0.1331, 7.0000} IMM[11] FLT32 { -0.2213, -0.1484, 0.1318, 8.0000} IMM[12] FLT32 { -0.4576, -0.6662, 0.2295, 0.0039} IMM[13] FLT32 { -0.0739, 0.0550, -0.0097, 256.0000} IMM[14] FLT32 { -0.0383, -0.0400, 0.0098, 0.0039} IMM[15] FLT32 { 0.7452, 0.3237, -0.4291, 0.0627} IMM[16] FLT32 { -0.0154, 0.2674, -0.0660, 0.2934} IMM[17] FLT32 { -0.0962, -0.5229, -0.2508, 0.2934} IMM[18] FLT32 { 0.4897, -0.4644, -0.1492, 0.2934} IMM[19] FLT32 { -0.1312, 0.1391, -0.7009, 0.2934} IMM[20] FLT32 { -0.3623, -0.1005, -0.3062, 0.2934} IMM[21] FLT32 { 0.2934, -0.1706, -0.6618, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: MOV TEMP[1].w, IMM[0].xxxx 4: TXL TEMP[1].xyz, TEMP[1], SAMP[1], 2D 5: ADD TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz 7: MOV TEMP[2].xy, IN[1].xyyy 8: MOV TEMP[2].w, IMM[0].xxxx 9: TXL TEMP[2].x, TEMP[2], SAMP[2], 2D 10: MUL TEMP[3].xy, TEMP[0].xyyy, IMM[0].wwww 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: TEX TEMP[3], TEMP[3], SAMP[3], 2D 13: MAD TEMP[3], TEMP[3], IMM[0].zzzz, IMM[1].xxxx 14: DP4 TEMP[4].x, TEMP[3], TEMP[3] 15: RSQ TEMP[4].x, TEMP[4].xxxx 16: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 17: MAD TEMP[4].xyz, IN[2].xyzz, TEMP[2].xxxx, CONST[2][19].xyzz 18: MAD TEMP[4].xyz, CONST[1][1].xxxx, TEMP[1].xyzz, TEMP[4].xyzz 19: RCP TEMP[5].xyz, CONST[2][23].xxxx 20: MUL TEMP[5].xyz, CONST[2][20].xyzz, TEMP[5].xyzz 21: MUL TEMP[6].xyz, CONST[1][1].yyyy, TEMP[5].xyzz 22: MOV TEMP[7].x, TEMP[6].xxxx 23: MOV TEMP[7].y, TEMP[6].yyyy 24: MOV TEMP[7].z, TEMP[6].zzzz 25: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[4].xyzz 26: DP3 TEMP[5].x, TEMP[5].xyzz, CONST[2][19].xyzz 27: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 28: MOV TEMP[7].w, TEMP[5].xxxx 29: MUL TEMP[5].x, CONST[2][0].xxxx, CONST[1][2].xxxx 30: MAD TEMP[5].x, CONST[2][0].wwww, CONST[1][2].zzzz, TEMP[5].xxxx 31: MUL TEMP[6].x, CONST[2][1].xxxx, CONST[1][2].xxxx 32: MAD TEMP[6].x, CONST[2][1].wwww, CONST[1][2].zzzz, TEMP[6].xxxx 33: MOV TEMP[5].y, TEMP[6].xxxx 34: MUL TEMP[6].x, CONST[2][2].xxxx, CONST[1][2].xxxx 35: MAD TEMP[6].x, CONST[2][2].wwww, CONST[1][2].zzzz, TEMP[6].xxxx 36: MOV TEMP[5].z, TEMP[6].xxxx 37: MUL TEMP[6].x, CONST[2][0].yyyy, CONST[1][2].yyyy 38: MAD TEMP[6].x, CONST[2][0].wwww, CONST[1][2].wwww, TEMP[6].xxxx 39: MUL TEMP[8].x, CONST[2][1].yyyy, CONST[1][2].yyyy 40: MAD TEMP[8].x, CONST[2][1].wwww, CONST[1][2].wwww, TEMP[8].xxxx 41: MOV TEMP[6].y, TEMP[8].xxxx 42: MUL TEMP[8].x, CONST[2][2].yyyy, CONST[1][2].yyyy 43: MAD TEMP[8].x, CONST[2][2].wwww, CONST[1][2].wwww, TEMP[8].xxxx 44: MOV TEMP[6].z, TEMP[8].xxxx 45: MUL TEMP[8].xyz, TEMP[5].xyzz, CONST[1][1].yyyy 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MOV TEMP[9].z, TEMP[8].zzzz 49: MUL TEMP[8].x, CONST[2][3].xxxx, CONST[1][2].xxxx 50: MAD TEMP[8].x, CONST[2][3].wwww, CONST[1][2].zzzz, TEMP[8].xxxx 51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[4].xyzz 52: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx 53: MOV TEMP[9].w, TEMP[5].xxxx 54: MUL TEMP[5].xyz, CONST[1][1].yyyy, TEMP[6].xyzz 55: MOV TEMP[8].x, TEMP[5].xxxx 56: MOV TEMP[8].y, TEMP[5].yyyy 57: MOV TEMP[8].z, TEMP[5].zzzz 58: MUL TEMP[5].x, CONST[2][3].yyyy, CONST[1][2].yyyy 59: MAD TEMP[5].x, CONST[2][3].wwww, CONST[1][2].wwww, TEMP[5].xxxx 60: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[4].xyzz 61: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 62: MOV TEMP[8].w, TEMP[5].xxxx 63: MOV TEMP[5].x, CONST[2][0].wwww 64: MOV TEMP[5].y, CONST[2][1].wwww 65: MOV TEMP[5].z, CONST[2][2].wwww 66: MUL TEMP[5].xyz, CONST[1][1].yyyy, TEMP[5].xyzz 67: MOV TEMP[6].x, TEMP[5].xxxx 68: MOV TEMP[6].y, TEMP[5].yyyy 69: MOV TEMP[6].z, TEMP[5].zzzz 70: MOV TEMP[5].x, CONST[2][0].wwww 71: MOV TEMP[5].y, CONST[2][1].wwww 72: MOV TEMP[5].z, CONST[2][2].wwww 73: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[4].xyzz 74: ADD TEMP[4].x, CONST[2][3].wwww, TEMP[4].xxxx 75: MOV TEMP[6].w, TEMP[4].xxxx 76: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[1].yzww 77: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[3].xyzz 78: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz 79: ADD TEMP[4].xyz, IMM[1].yzww, -TEMP[4].xyzz 80: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[4].xyzz 81: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[1].xyzz 82: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 83: ADD TEMP[5].xyz, TEMP[4].xyzz, -TEMP[5].xyzz 84: DP3 TEMP[10].x, TEMP[4].xyzz, TEMP[1].xyzz 85: FSLT TEMP[10].x, TEMP[10].xxxx, IMM[0].xxxx 86: UIF TEMP[10].xxxx :2 87: MOV TEMP[5].xyz, TEMP[5].xyzx 88: ELSE :2 89: MOV TEMP[5].xyz, TEMP[4].xyzx 90: ENDIF 91: MOV TEMP[4].w, IMM[6].xxxx 92: MOV TEMP[4].x, TEMP[5].xxxx 93: MOV TEMP[4].y, TEMP[5].yyyy 94: MOV TEMP[4].z, TEMP[5].zzzz 95: DP4 TEMP[5].x, TEMP[7], TEMP[4] 96: DP4 TEMP[10].x, TEMP[4], TEMP[9] 97: DP4 TEMP[11].x, TEMP[4], TEMP[8] 98: MOV TEMP[10].y, TEMP[11].xxxx 99: DP4 TEMP[4].x, TEMP[4], TEMP[6] 100: RCP TEMP[4].xy, TEMP[4].xxxx 101: MUL TEMP[4].xy, TEMP[10].xyyy, TEMP[4].xyyy 102: MOV TEMP[4].xy, TEMP[4].xyyy 103: TEX TEMP[4].xy, TEMP[4], SAMP[0], 2D 104: DP2 TEMP[4].x, TEMP[4].xyyy, IMM[6].yzzz 105: DP3 TEMP[10].x, TEMP[3].xyzz, IMM[7].xyzz 106: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[3].xyzz 107: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 108: ADD TEMP[10].xyz, IMM[7].xyzz, -TEMP[10].xyzz 109: DP3 TEMP[11].x, TEMP[1].xyzz, TEMP[10].xyzz 110: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[1].xyzz 111: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 112: ADD TEMP[11].xyz, TEMP[10].xyzz, -TEMP[11].xyzz 113: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[1].xyzz 114: FSLT TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx 115: UIF TEMP[12].xxxx :2 116: MOV TEMP[11].xyz, TEMP[11].xyzx 117: ELSE :2 118: MOV TEMP[11].xyz, TEMP[10].xyzx 119: ENDIF 120: MOV TEMP[10].w, IMM[6].xxxx 121: MOV TEMP[10].x, TEMP[11].xxxx 122: MOV TEMP[10].y, TEMP[11].yyyy 123: MOV TEMP[10].z, TEMP[11].zzzz 124: DP4 TEMP[11].x, TEMP[7], TEMP[10] 125: DP4 TEMP[12].x, TEMP[10], TEMP[9] 126: DP4 TEMP[13].x, TEMP[10], TEMP[8] 127: MOV TEMP[12].y, TEMP[13].xxxx 128: DP4 TEMP[10].x, TEMP[10], TEMP[6] 129: RCP TEMP[10].xy, TEMP[10].xxxx 130: MUL TEMP[10].xy, TEMP[12].xyyy, TEMP[10].xyyy 131: MOV TEMP[10].xy, TEMP[10].xyyy 132: TEX TEMP[10].xy, TEMP[10], SAMP[0], 2D 133: DP2 TEMP[10].x, TEMP[10].xyyy, IMM[6].yzzz 134: DP3 TEMP[12].x, TEMP[3].xyzz, IMM[8].xyzz 135: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[3].xyzz 136: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 137: ADD TEMP[12].xyz, IMM[8].xyzz, -TEMP[12].xyzz 138: DP3 TEMP[13].x, TEMP[1].xyzz, TEMP[12].xyzz 139: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[1].xyzz 140: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 141: ADD TEMP[13].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 142: DP3 TEMP[14].x, TEMP[12].xyzz, TEMP[1].xyzz 143: FSLT TEMP[14].x, TEMP[14].xxxx, IMM[0].xxxx 144: UIF TEMP[14].xxxx :2 145: MOV TEMP[13].xyz, TEMP[13].xyzx 146: ELSE :2 147: MOV TEMP[13].xyz, TEMP[12].xyzx 148: ENDIF 149: MOV TEMP[12].w, IMM[6].xxxx 150: MOV TEMP[12].x, TEMP[13].xxxx 151: MOV TEMP[12].y, TEMP[13].yyyy 152: MOV TEMP[12].z, TEMP[13].zzzz 153: DP4 TEMP[13].x, TEMP[7], TEMP[12] 154: DP4 TEMP[14].x, TEMP[12], TEMP[9] 155: DP4 TEMP[15].x, TEMP[12], TEMP[8] 156: MOV TEMP[14].y, TEMP[15].xxxx 157: DP4 TEMP[12].x, TEMP[12], TEMP[6] 158: RCP TEMP[12].xy, TEMP[12].xxxx 159: MUL TEMP[12].xy, TEMP[14].xyyy, TEMP[12].xyyy 160: MOV TEMP[12].xy, TEMP[12].xyyy 161: TEX TEMP[12].xy, TEMP[12], SAMP[0], 2D 162: DP2 TEMP[12].x, TEMP[12].xyyy, IMM[6].yzzz 163: DP3 TEMP[14].x, TEMP[3].xyzz, IMM[9].xyzz 164: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[3].xyzz 165: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 166: ADD TEMP[14].xyz, IMM[9].xyzz, -TEMP[14].xyzz 167: DP3 TEMP[15].x, TEMP[1].xyzz, TEMP[14].xyzz 168: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[1].xyzz 169: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 170: ADD TEMP[15].xyz, TEMP[14].xyzz, -TEMP[15].xyzz 171: DP3 TEMP[16].x, TEMP[14].xyzz, TEMP[1].xyzz 172: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[0].xxxx 173: UIF TEMP[16].xxxx :2 174: MOV TEMP[15].xyz, TEMP[15].xyzx 175: ELSE :2 176: MOV TEMP[15].xyz, TEMP[14].xyzx 177: ENDIF 178: MOV TEMP[14].w, IMM[6].xxxx 179: MOV TEMP[14].x, TEMP[15].xxxx 180: MOV TEMP[14].y, TEMP[15].yyyy 181: MOV TEMP[14].z, TEMP[15].zzzz 182: MOV TEMP[5].x, TEMP[5].xxxx 183: MOV TEMP[5].y, TEMP[11].xxxx 184: MOV TEMP[5].z, TEMP[13].xxxx 185: DP4 TEMP[11].x, TEMP[7], TEMP[14] 186: MOV TEMP[5].w, TEMP[11].xxxx 187: DP4 TEMP[11].x, TEMP[14], TEMP[9] 188: DP4 TEMP[13].x, TEMP[14], TEMP[8] 189: MOV TEMP[11].y, TEMP[13].xxxx 190: MOV TEMP[4].x, TEMP[4].xxxx 191: MOV TEMP[4].y, TEMP[10].xxxx 192: MOV TEMP[4].z, TEMP[12].xxxx 193: DP4 TEMP[10].x, TEMP[14], TEMP[6] 194: RCP TEMP[10].xy, TEMP[10].xxxx 195: MUL TEMP[10].xy, TEMP[11].xyyy, TEMP[10].xyyy 196: MOV TEMP[10].xy, TEMP[10].xyyy 197: TEX TEMP[10].xy, TEMP[10], SAMP[0], 2D 198: DP2 TEMP[10].x, TEMP[10].xyyy, IMM[6].yzzz 199: MOV TEMP[4].w, TEMP[10].xxxx 200: ADD TEMP[4], TEMP[5], -TEMP[4] 201: ADD TEMP[4], TEMP[4], IMM[6].wwww 202: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].xxxx 203: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 204: MOV_SAT TEMP[5].x, TEMP[5].xxxx 205: FSLT TEMP[10].x, TEMP[4].xxxx, IMM[0].xxxx 206: UIF TEMP[10].xxxx :2 207: MOV TEMP[10].x, IMM[0].xxxx 208: ELSE :2 209: MOV TEMP[10].x, TEMP[5].xxxx 210: ENDIF 211: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].yyyy 212: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 213: MOV_SAT TEMP[5].x, TEMP[5].xxxx 214: FSLT TEMP[11].x, TEMP[4].yyyy, IMM[0].xxxx 215: UIF TEMP[11].xxxx :2 216: MOV TEMP[11].x, IMM[0].xxxx 217: ELSE :2 218: MOV TEMP[11].x, TEMP[5].xxxx 219: ENDIF 220: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].zzzz 221: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 222: MOV_SAT TEMP[5].x, TEMP[5].xxxx 223: FSLT TEMP[12].x, TEMP[4].zzzz, IMM[0].xxxx 224: UIF TEMP[12].xxxx :2 225: MOV TEMP[12].x, IMM[0].xxxx 226: ELSE :2 227: MOV TEMP[12].x, TEMP[5].xxxx 228: ENDIF 229: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].wwww 230: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 231: MOV_SAT TEMP[5].x, TEMP[5].xxxx 232: FSLT TEMP[4].x, TEMP[4].wwww, IMM[0].xxxx 233: UIF TEMP[4].xxxx :2 234: MOV TEMP[4].x, IMM[0].xxxx 235: ELSE :2 236: MOV TEMP[4].x, TEMP[5].xxxx 237: ENDIF 238: MOV TEMP[5].x, TEMP[10].xxxx 239: MOV TEMP[5].y, TEMP[11].xxxx 240: MOV TEMP[5].z, TEMP[12].xxxx 241: MOV TEMP[5].w, TEMP[4].xxxx 242: DP4 TEMP[4].x, TEMP[5], IMM[7].wwww 243: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[10].xyzz 244: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 245: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 246: ADD TEMP[5].xyz, IMM[10].xyzz, -TEMP[5].xyzz 247: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 248: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 249: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 250: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 251: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 252: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 253: UIF TEMP[11].xxxx :2 254: MOV TEMP[10].xyz, TEMP[10].xyzx 255: ELSE :2 256: MOV TEMP[10].xyz, TEMP[5].xyzx 257: ENDIF 258: MOV TEMP[5].w, IMM[6].xxxx 259: MOV TEMP[5].x, TEMP[10].xxxx 260: MOV TEMP[5].y, TEMP[10].yyyy 261: MOV TEMP[5].z, TEMP[10].zzzz 262: DP4 TEMP[10].x, TEMP[7], TEMP[5] 263: DP4 TEMP[11].x, TEMP[5], TEMP[9] 264: DP4 TEMP[12].x, TEMP[5], TEMP[8] 265: MOV TEMP[11].y, TEMP[12].xxxx 266: DP4 TEMP[5].x, TEMP[5], TEMP[6] 267: RCP TEMP[5].xy, TEMP[5].xxxx 268: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 269: MOV TEMP[5].xy, TEMP[5].xyyy 270: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 271: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 272: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[11].xyzz 273: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 274: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 275: ADD TEMP[11].xyz, IMM[11].xyzz, -TEMP[11].xyzz 276: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 277: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 278: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 279: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 280: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 281: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 282: UIF TEMP[13].xxxx :2 283: MOV TEMP[12].xyz, TEMP[12].xyzx 284: ELSE :2 285: MOV TEMP[12].xyz, TEMP[11].xyzx 286: ENDIF 287: MOV TEMP[11].w, IMM[6].xxxx 288: MOV TEMP[11].x, TEMP[12].xxxx 289: MOV TEMP[11].y, TEMP[12].yyyy 290: MOV TEMP[11].z, TEMP[12].zzzz 291: DP4 TEMP[12].x, TEMP[7], TEMP[11] 292: DP4 TEMP[13].x, TEMP[11], TEMP[9] 293: DP4 TEMP[14].x, TEMP[11], TEMP[8] 294: MOV TEMP[13].y, TEMP[14].xxxx 295: DP4 TEMP[11].x, TEMP[11], TEMP[6] 296: RCP TEMP[11].xy, TEMP[11].xxxx 297: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 298: MOV TEMP[11].xy, TEMP[11].xyyy 299: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 300: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 301: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[12].xyzz 302: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 303: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 304: ADD TEMP[13].xyz, IMM[12].xyzz, -TEMP[13].xyzz 305: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 306: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 307: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 308: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 309: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 310: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 311: UIF TEMP[15].xxxx :2 312: MOV TEMP[14].xyz, TEMP[14].xyzx 313: ELSE :2 314: MOV TEMP[14].xyz, TEMP[13].xyzx 315: ENDIF 316: MOV TEMP[13].w, IMM[6].xxxx 317: MOV TEMP[13].x, TEMP[14].xxxx 318: MOV TEMP[13].y, TEMP[14].yyyy 319: MOV TEMP[13].z, TEMP[14].zzzz 320: DP4 TEMP[14].x, TEMP[7], TEMP[13] 321: DP4 TEMP[15].x, TEMP[13], TEMP[9] 322: DP4 TEMP[16].x, TEMP[13], TEMP[8] 323: MOV TEMP[15].y, TEMP[16].xxxx 324: DP4 TEMP[13].x, TEMP[13], TEMP[6] 325: RCP TEMP[13].xy, TEMP[13].xxxx 326: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 327: MOV TEMP[13].xy, TEMP[13].xyyy 328: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 329: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 330: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[13].xyzz 331: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[3].xyzz 332: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 333: ADD TEMP[15].xyz, IMM[13].xyzz, -TEMP[15].xyzz 334: DP3 TEMP[16].x, TEMP[1].xyzz, TEMP[15].xyzz 335: MUL TEMP[16].xyz, TEMP[16].xxxx, TEMP[1].xyzz 336: MUL TEMP[16].xyz, IMM[0].zzzz, TEMP[16].xyzz 337: ADD TEMP[16].xyz, TEMP[15].xyzz, -TEMP[16].xyzz 338: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[1].xyzz 339: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[0].xxxx 340: UIF TEMP[17].xxxx :2 341: MOV TEMP[16].xyz, TEMP[16].xyzx 342: ELSE :2 343: MOV TEMP[16].xyz, TEMP[15].xyzx 344: ENDIF 345: MOV TEMP[15].w, IMM[6].xxxx 346: MOV TEMP[15].x, TEMP[16].xxxx 347: MOV TEMP[15].y, TEMP[16].yyyy 348: MOV TEMP[15].z, TEMP[16].zzzz 349: MOV TEMP[10].x, TEMP[10].xxxx 350: MOV TEMP[10].y, TEMP[12].xxxx 351: MOV TEMP[10].z, TEMP[14].xxxx 352: DP4 TEMP[12].x, TEMP[7], TEMP[15] 353: MOV TEMP[10].w, TEMP[12].xxxx 354: DP4 TEMP[12].x, TEMP[15], TEMP[9] 355: DP4 TEMP[14].x, TEMP[15], TEMP[8] 356: MOV TEMP[12].y, TEMP[14].xxxx 357: MOV TEMP[5].x, TEMP[5].xxxx 358: MOV TEMP[5].y, TEMP[11].xxxx 359: MOV TEMP[5].z, TEMP[13].xxxx 360: DP4 TEMP[11].x, TEMP[15], TEMP[6] 361: RCP TEMP[11].xy, TEMP[11].xxxx 362: MUL TEMP[11].xy, TEMP[12].xyyy, TEMP[11].xyyy 363: MOV TEMP[11].xy, TEMP[11].xyyy 364: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 365: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 366: MOV TEMP[5].w, TEMP[11].xxxx 367: ADD TEMP[5], TEMP[10], -TEMP[5] 368: ADD TEMP[5], IMM[6].wwww, TEMP[5] 369: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].xxxx 370: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 371: MOV_SAT TEMP[10].x, TEMP[10].xxxx 372: FSLT TEMP[11].x, TEMP[5].xxxx, IMM[0].xxxx 373: UIF TEMP[11].xxxx :2 374: MOV TEMP[11].x, IMM[0].xxxx 375: ELSE :2 376: MOV TEMP[11].x, TEMP[10].xxxx 377: ENDIF 378: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].yyyy 379: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 380: MOV_SAT TEMP[10].x, TEMP[10].xxxx 381: FSLT TEMP[12].x, TEMP[5].yyyy, IMM[0].xxxx 382: UIF TEMP[12].xxxx :2 383: MOV TEMP[12].x, IMM[0].xxxx 384: ELSE :2 385: MOV TEMP[12].x, TEMP[10].xxxx 386: ENDIF 387: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].zzzz 388: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 389: MOV_SAT TEMP[10].x, TEMP[10].xxxx 390: FSLT TEMP[13].x, TEMP[5].zzzz, IMM[0].xxxx 391: UIF TEMP[13].xxxx :2 392: MOV TEMP[13].x, IMM[0].xxxx 393: ELSE :2 394: MOV TEMP[13].x, TEMP[10].xxxx 395: ENDIF 396: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].wwww 397: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 398: MOV_SAT TEMP[10].x, TEMP[10].xxxx 399: FSLT TEMP[5].x, TEMP[5].wwww, IMM[0].xxxx 400: UIF TEMP[5].xxxx :2 401: MOV TEMP[5].x, IMM[0].xxxx 402: ELSE :2 403: MOV TEMP[5].x, TEMP[10].xxxx 404: ENDIF 405: MOV TEMP[10].x, TEMP[11].xxxx 406: MOV TEMP[10].y, TEMP[12].xxxx 407: MOV TEMP[10].z, TEMP[13].xxxx 408: MOV TEMP[10].w, TEMP[5].xxxx 409: DP4 TEMP[5].x, TEMP[10], IMM[7].wwww 410: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 411: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[14].xyzz 412: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 413: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 414: ADD TEMP[5].xyz, IMM[14].xyzz, -TEMP[5].xyzz 415: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 416: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 417: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 418: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 419: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 420: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 421: UIF TEMP[11].xxxx :2 422: MOV TEMP[10].xyz, TEMP[10].xyzx 423: ELSE :2 424: MOV TEMP[10].xyz, TEMP[5].xyzx 425: ENDIF 426: MOV TEMP[5].w, IMM[6].xxxx 427: MOV TEMP[5].x, TEMP[10].xxxx 428: MOV TEMP[5].y, TEMP[10].yyyy 429: MOV TEMP[5].z, TEMP[10].zzzz 430: DP4 TEMP[10].x, TEMP[7], TEMP[5] 431: DP4 TEMP[11].x, TEMP[5], TEMP[9] 432: DP4 TEMP[12].x, TEMP[5], TEMP[8] 433: MOV TEMP[11].y, TEMP[12].xxxx 434: DP4 TEMP[5].x, TEMP[5], TEMP[6] 435: RCP TEMP[5].xy, TEMP[5].xxxx 436: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 437: MOV TEMP[5].xy, TEMP[5].xyyy 438: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 439: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 440: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[15].xyzz 441: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 442: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 443: ADD TEMP[11].xyz, IMM[15].xyzz, -TEMP[11].xyzz 444: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 445: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 446: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 447: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 448: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 449: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 450: UIF TEMP[13].xxxx :2 451: MOV TEMP[12].xyz, TEMP[12].xyzx 452: ELSE :2 453: MOV TEMP[12].xyz, TEMP[11].xyzx 454: ENDIF 455: MOV TEMP[11].w, IMM[6].xxxx 456: MOV TEMP[11].x, TEMP[12].xxxx 457: MOV TEMP[11].y, TEMP[12].yyyy 458: MOV TEMP[11].z, TEMP[12].zzzz 459: DP4 TEMP[12].x, TEMP[7], TEMP[11] 460: DP4 TEMP[13].x, TEMP[11], TEMP[9] 461: DP4 TEMP[14].x, TEMP[11], TEMP[8] 462: MOV TEMP[13].y, TEMP[14].xxxx 463: DP4 TEMP[11].x, TEMP[11], TEMP[6] 464: RCP TEMP[11].xy, TEMP[11].xxxx 465: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 466: MOV TEMP[11].xy, TEMP[11].xyyy 467: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 468: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 469: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[16].xyzz 470: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 471: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 472: ADD TEMP[13].xyz, IMM[16].xyzz, -TEMP[13].xyzz 473: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 474: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 475: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 476: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 477: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 478: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 479: UIF TEMP[15].xxxx :2 480: MOV TEMP[14].xyz, TEMP[14].xyzx 481: ELSE :2 482: MOV TEMP[14].xyz, TEMP[13].xyzx 483: ENDIF 484: MOV TEMP[13].w, IMM[6].xxxx 485: MOV TEMP[13].x, TEMP[14].xxxx 486: MOV TEMP[13].y, TEMP[14].yyyy 487: MOV TEMP[13].z, TEMP[14].zzzz 488: DP4 TEMP[14].x, TEMP[7], TEMP[13] 489: DP4 TEMP[15].x, TEMP[13], TEMP[9] 490: DP4 TEMP[16].x, TEMP[13], TEMP[8] 491: MOV TEMP[15].y, TEMP[16].xxxx 492: DP4 TEMP[13].x, TEMP[13], TEMP[6] 493: RCP TEMP[13].xy, TEMP[13].xxxx 494: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 495: MOV TEMP[13].xy, TEMP[13].xyyy 496: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 497: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 498: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[17].xyzz 499: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[3].xyzz 500: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 501: ADD TEMP[15].xyz, IMM[17].xyzz, -TEMP[15].xyzz 502: DP3 TEMP[16].x, TEMP[1].xyzz, TEMP[15].xyzz 503: MUL TEMP[16].xyz, TEMP[16].xxxx, TEMP[1].xyzz 504: MUL TEMP[16].xyz, IMM[0].zzzz, TEMP[16].xyzz 505: ADD TEMP[16].xyz, TEMP[15].xyzz, -TEMP[16].xyzz 506: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[1].xyzz 507: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[0].xxxx 508: UIF TEMP[17].xxxx :2 509: MOV TEMP[16].xyz, TEMP[16].xyzx 510: ELSE :2 511: MOV TEMP[16].xyz, TEMP[15].xyzx 512: ENDIF 513: MOV TEMP[15].w, IMM[6].xxxx 514: MOV TEMP[15].x, TEMP[16].xxxx 515: MOV TEMP[15].y, TEMP[16].yyyy 516: MOV TEMP[15].z, TEMP[16].zzzz 517: MOV TEMP[10].x, TEMP[10].xxxx 518: MOV TEMP[10].y, TEMP[12].xxxx 519: MOV TEMP[10].z, TEMP[14].xxxx 520: DP4 TEMP[12].x, TEMP[7], TEMP[15] 521: MOV TEMP[10].w, TEMP[12].xxxx 522: DP4 TEMP[12].x, TEMP[15], TEMP[9] 523: DP4 TEMP[14].x, TEMP[15], TEMP[8] 524: MOV TEMP[12].y, TEMP[14].xxxx 525: MOV TEMP[5].x, TEMP[5].xxxx 526: MOV TEMP[5].y, TEMP[11].xxxx 527: MOV TEMP[5].z, TEMP[13].xxxx 528: DP4 TEMP[11].x, TEMP[15], TEMP[6] 529: RCP TEMP[11].xy, TEMP[11].xxxx 530: MUL TEMP[11].xy, TEMP[12].xyyy, TEMP[11].xyyy 531: MOV TEMP[11].xy, TEMP[11].xyyy 532: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 533: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 534: MOV TEMP[5].w, TEMP[11].xxxx 535: ADD TEMP[5], TEMP[10], -TEMP[5] 536: ADD TEMP[5], IMM[6].wwww, TEMP[5] 537: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].xxxx 538: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 539: MOV_SAT TEMP[10].x, TEMP[10].xxxx 540: FSLT TEMP[11].x, TEMP[5].xxxx, IMM[0].xxxx 541: UIF TEMP[11].xxxx :2 542: MOV TEMP[11].x, IMM[0].xxxx 543: ELSE :2 544: MOV TEMP[11].x, TEMP[10].xxxx 545: ENDIF 546: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].yyyy 547: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 548: MOV_SAT TEMP[10].x, TEMP[10].xxxx 549: FSLT TEMP[12].x, TEMP[5].yyyy, IMM[0].xxxx 550: UIF TEMP[12].xxxx :2 551: MOV TEMP[12].x, IMM[0].xxxx 552: ELSE :2 553: MOV TEMP[12].x, TEMP[10].xxxx 554: ENDIF 555: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].zzzz 556: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 557: MOV_SAT TEMP[10].x, TEMP[10].xxxx 558: FSLT TEMP[13].x, TEMP[5].zzzz, IMM[0].xxxx 559: UIF TEMP[13].xxxx :2 560: MOV TEMP[13].x, IMM[0].xxxx 561: ELSE :2 562: MOV TEMP[13].x, TEMP[10].xxxx 563: ENDIF 564: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].wwww 565: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 566: MOV_SAT TEMP[10].x, TEMP[10].xxxx 567: FSLT TEMP[5].x, TEMP[5].wwww, IMM[0].xxxx 568: UIF TEMP[5].xxxx :2 569: MOV TEMP[5].x, IMM[0].xxxx 570: ELSE :2 571: MOV TEMP[5].x, TEMP[10].xxxx 572: ENDIF 573: MOV TEMP[10].x, TEMP[11].xxxx 574: MOV TEMP[10].y, TEMP[12].xxxx 575: MOV TEMP[10].z, TEMP[13].xxxx 576: MOV TEMP[10].w, TEMP[5].xxxx 577: DP4 TEMP[5].x, TEMP[10], IMM[7].wwww 578: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 579: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[18].xyzz 580: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 581: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 582: ADD TEMP[5].xyz, IMM[18].xyzz, -TEMP[5].xyzz 583: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 584: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 585: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 586: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 587: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 588: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 589: UIF TEMP[11].xxxx :2 590: MOV TEMP[10].xyz, TEMP[10].xyzx 591: ELSE :2 592: MOV TEMP[10].xyz, TEMP[5].xyzx 593: ENDIF 594: MOV TEMP[5].w, IMM[6].xxxx 595: MOV TEMP[5].x, TEMP[10].xxxx 596: MOV TEMP[5].y, TEMP[10].yyyy 597: MOV TEMP[5].z, TEMP[10].zzzz 598: DP4 TEMP[10].x, TEMP[7], TEMP[5] 599: DP4 TEMP[11].x, TEMP[5], TEMP[9] 600: DP4 TEMP[12].x, TEMP[5], TEMP[8] 601: MOV TEMP[11].y, TEMP[12].xxxx 602: DP4 TEMP[5].x, TEMP[5], TEMP[6] 603: RCP TEMP[5].xy, TEMP[5].xxxx 604: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 605: MOV TEMP[5].xy, TEMP[5].xyyy 606: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 607: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 608: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[19].xyzz 609: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 610: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 611: ADD TEMP[11].xyz, IMM[19].xyzz, -TEMP[11].xyzz 612: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 613: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 614: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 615: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 616: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 617: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 618: UIF TEMP[13].xxxx :2 619: MOV TEMP[12].xyz, TEMP[12].xyzx 620: ELSE :2 621: MOV TEMP[12].xyz, TEMP[11].xyzx 622: ENDIF 623: MOV TEMP[11].w, IMM[6].xxxx 624: MOV TEMP[11].x, TEMP[12].xxxx 625: MOV TEMP[11].y, TEMP[12].yyyy 626: MOV TEMP[11].z, TEMP[12].zzzz 627: DP4 TEMP[12].x, TEMP[7], TEMP[11] 628: DP4 TEMP[13].x, TEMP[11], TEMP[9] 629: DP4 TEMP[14].x, TEMP[11], TEMP[8] 630: MOV TEMP[13].y, TEMP[14].xxxx 631: DP4 TEMP[11].x, TEMP[11], TEMP[6] 632: RCP TEMP[11].xy, TEMP[11].xxxx 633: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 634: MOV TEMP[11].xy, TEMP[11].xyyy 635: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 636: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 637: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[20].xyzz 638: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 639: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 640: ADD TEMP[13].xyz, IMM[20].xyzz, -TEMP[13].xyzz 641: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 642: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 643: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 644: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 645: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 646: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 647: UIF TEMP[15].xxxx :2 648: MOV TEMP[14].xyz, TEMP[14].xyzx 649: ELSE :2 650: MOV TEMP[14].xyz, TEMP[13].xyzx 651: ENDIF 652: MOV TEMP[13].w, IMM[6].xxxx 653: MOV TEMP[13].x, TEMP[14].xxxx 654: MOV TEMP[13].y, TEMP[14].yyyy 655: MOV TEMP[13].z, TEMP[14].zzzz 656: DP4 TEMP[14].x, TEMP[7], TEMP[13] 657: DP4 TEMP[15].x, TEMP[13], TEMP[9] 658: DP4 TEMP[16].x, TEMP[13], TEMP[8] 659: MOV TEMP[15].y, TEMP[16].xxxx 660: DP4 TEMP[13].x, TEMP[13], TEMP[6] 661: RCP TEMP[13].xy, TEMP[13].xxxx 662: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 663: MOV TEMP[13].xy, TEMP[13].xyyy 664: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 665: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 666: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[21].xyzz 667: MUL TEMP[3].xyz, TEMP[15].xxxx, TEMP[3].xyzz 668: MUL TEMP[3].xyz, IMM[0].zzzz, TEMP[3].xyzz 669: ADD TEMP[3].xyz, IMM[21].xyzz, -TEMP[3].xyzz 670: DP3 TEMP[15].x, TEMP[1].xyzz, TEMP[3].xyzz 671: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[1].xyzz 672: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 673: ADD TEMP[15].xyz, TEMP[3].xyzz, -TEMP[15].xyzz 674: DP3 TEMP[16].x, TEMP[3].xyzz, TEMP[1].xyzz 675: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[0].xxxx 676: UIF TEMP[16].xxxx :2 677: MOV TEMP[15].xyz, TEMP[15].xyzx 678: ELSE :2 679: MOV TEMP[15].xyz, TEMP[3].xyzx 680: ENDIF 681: MOV TEMP[3].w, IMM[6].xxxx 682: MOV TEMP[3].x, TEMP[15].xxxx 683: MOV TEMP[3].y, TEMP[15].yyyy 684: MOV TEMP[3].z, TEMP[15].zzzz 685: MOV TEMP[10].x, TEMP[10].xxxx 686: MOV TEMP[10].y, TEMP[12].xxxx 687: MOV TEMP[10].z, TEMP[14].xxxx 688: DP4 TEMP[7].x, TEMP[7], TEMP[3] 689: MOV TEMP[10].w, TEMP[7].xxxx 690: DP4 TEMP[7].x, TEMP[3], TEMP[9] 691: DP4 TEMP[8].x, TEMP[3], TEMP[8] 692: MOV TEMP[7].y, TEMP[8].xxxx 693: MOV TEMP[5].x, TEMP[5].xxxx 694: MOV TEMP[5].y, TEMP[11].xxxx 695: MOV TEMP[5].z, TEMP[13].xxxx 696: DP4 TEMP[3].x, TEMP[3], TEMP[6] 697: RCP TEMP[3].xy, TEMP[3].xxxx 698: MUL TEMP[3].xy, TEMP[7].xyyy, TEMP[3].xyyy 699: MOV TEMP[3].xy, TEMP[3].xyyy 700: TEX TEMP[3].xy, TEMP[3], SAMP[0], 2D 701: DP2 TEMP[3].x, TEMP[3].xyyy, IMM[6].yzzz 702: MOV TEMP[5].w, TEMP[3].xxxx 703: ADD TEMP[3], TEMP[10], -TEMP[5] 704: ADD TEMP[3], IMM[6].wwww, TEMP[3] 705: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].xxxx 706: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 707: MOV_SAT TEMP[5].x, TEMP[5].xxxx 708: FSLT TEMP[6].x, TEMP[3].xxxx, IMM[0].xxxx 709: UIF TEMP[6].xxxx :2 710: MOV TEMP[6].x, IMM[0].xxxx 711: ELSE :2 712: MOV TEMP[6].x, TEMP[5].xxxx 713: ENDIF 714: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].yyyy 715: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 716: MOV_SAT TEMP[5].x, TEMP[5].xxxx 717: FSLT TEMP[7].x, TEMP[3].yyyy, IMM[0].xxxx 718: UIF TEMP[7].xxxx :2 719: MOV TEMP[7].x, IMM[0].xxxx 720: ELSE :2 721: MOV TEMP[7].x, TEMP[5].xxxx 722: ENDIF 723: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].zzzz 724: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 725: MOV_SAT TEMP[5].x, TEMP[5].xxxx 726: FSLT TEMP[8].x, TEMP[3].zzzz, IMM[0].xxxx 727: UIF TEMP[8].xxxx :2 728: MOV TEMP[8].x, IMM[0].xxxx 729: ELSE :2 730: MOV TEMP[8].x, TEMP[5].xxxx 731: ENDIF 732: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].wwww 733: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 734: MOV_SAT TEMP[5].x, TEMP[5].xxxx 735: FSLT TEMP[3].x, TEMP[3].wwww, IMM[0].xxxx 736: UIF TEMP[3].xxxx :2 737: MOV TEMP[3].x, IMM[0].xxxx 738: ELSE :2 739: MOV TEMP[3].x, TEMP[5].xxxx 740: ENDIF 741: MOV TEMP[5].x, TEMP[6].xxxx 742: MOV TEMP[5].y, TEMP[7].xxxx 743: MOV TEMP[5].z, TEMP[8].xxxx 744: MOV TEMP[5].w, TEMP[3].xxxx 745: RCP TEMP[3].x, CONST[2][23].xxxx 746: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 747: MUL TEMP[2].x, TEMP[2].xxxx, IMM[8].wwww 748: FLR TEMP[2].x, TEMP[2].xxxx 749: MUL TEMP[3].x, TEMP[2].xxxx, IMM[9].wwww 750: FLR TEMP[3].x, TEMP[3].xxxx 751: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[10].wwww, IMM[11].wwww 752: FLR TEMP[1].xy, TEMP[1].xyyy 753: DP4 TEMP[5].x, TEMP[5], IMM[7].wwww 754: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 755: MUL TEMP[5].x, TEMP[3].xxxx, IMM[12].wwww 756: MOV TEMP[4].y, TEMP[5].xxxx 757: MUL TEMP[3].x, TEMP[3].xxxx, IMM[13].wwww 758: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 759: MUL TEMP[2].x, TEMP[2].xxxx, IMM[12].wwww 760: MOV TEMP[4].z, TEMP[2].xxxx 761: MUL TEMP[2].x, TEMP[1].xxxx, IMM[15].wwww 762: MAD TEMP[1].x, TEMP[1].yyyy, IMM[14].wwww, TEMP[2].xxxx 763: MOV TEMP[4].w, TEMP[1].xxxx 764: MOV OUT[0], TEMP[4] 765: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 48) %35 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = call float @llvm.SI.load.const(<16 x i8> %36, i32 0) %38 = call float @llvm.SI.load.const(<16 x i8> %36, i32 4) %39 = call float @llvm.SI.load.const(<16 x i8> %36, i32 12) %40 = call float @llvm.SI.load.const(<16 x i8> %36, i32 16) %41 = call float @llvm.SI.load.const(<16 x i8> %36, i32 20) %42 = call float @llvm.SI.load.const(<16 x i8> %36, i32 28) %43 = call float @llvm.SI.load.const(<16 x i8> %36, i32 32) %44 = call float @llvm.SI.load.const(<16 x i8> %36, i32 36) %45 = call float @llvm.SI.load.const(<16 x i8> %36, i32 44) %46 = call float @llvm.SI.load.const(<16 x i8> %36, i32 48) %47 = call float @llvm.SI.load.const(<16 x i8> %36, i32 52) %48 = call float @llvm.SI.load.const(<16 x i8> %36, i32 60) %49 = call float @llvm.SI.load.const(<16 x i8> %36, i32 304) %50 = call float @llvm.SI.load.const(<16 x i8> %36, i32 308) %51 = call float @llvm.SI.load.const(<16 x i8> %36, i32 312) %52 = call float @llvm.SI.load.const(<16 x i8> %36, i32 320) %53 = call float @llvm.SI.load.const(<16 x i8> %36, i32 324) %54 = call float @llvm.SI.load.const(<16 x i8> %36, i32 328) %55 = call float @llvm.SI.load.const(<16 x i8> %36, i32 368) %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %73 = bitcast <8 x i32> addrspace(2)* %72 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %76 = bitcast <4 x i32> addrspace(2)* %75 to <16 x i8> addrspace(2)* %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %83 = fmul float %24, %15 %84 = fadd float %83, %25 %85 = bitcast float %78 to i32 %86 = bitcast float %79 to i32 %87 = insertelement <4 x i32> undef, i32 %85, i32 0 %88 = insertelement <4 x i32> %87, i32 %86, i32 1 %89 = insertelement <4 x i32> %88, i32 0, i32 2 %90 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %89, <32 x i8> %62, <16 x i8> %65, i32 2) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = fadd float %91, -5.000000e-01 %95 = fadd float %92, -5.000000e-01 %96 = fadd float %93, -5.000000e-01 %97 = fmul float %94, 2.000000e+00 %98 = fmul float %95, 2.000000e+00 %99 = fmul float %96, 2.000000e+00 %100 = bitcast float %78 to i32 %101 = bitcast float %79 to i32 %102 = insertelement <4 x i32> undef, i32 %100, i32 0 %103 = insertelement <4 x i32> %102, i32 %101, i32 1 %104 = insertelement <4 x i32> %103, i32 0, i32 2 %105 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %104, <32 x i8> %68, <16 x i8> %71, i32 2) %106 = extractelement <4 x float> %105, i32 0 %107 = fmul float %14, 2.500000e-01 %108 = fmul float %84, 2.500000e-01 %109 = bitcast float %107 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %74, <16 x i8> %77, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = fmul float %114, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %115, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %116, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %117, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %119, %119 %127 = fmul float %121, %121 %128 = fadd float %126, %127 %129 = fmul float %123, %123 %130 = fadd float %128, %129 %131 = fmul float %125, %125 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %119, %133 %135 = fmul float %121, %133 %136 = fmul float %123, %133 %137 = fmul float %80, %106 %138 = fadd float %137, %49 %139 = fmul float %81, %106 %140 = fadd float %139, %50 %141 = fmul float %82, %106 %142 = fadd float %141, %51 %143 = fmul float %28, %97 %144 = fadd float %143, %138 %145 = fmul float %28, %98 %146 = fadd float %145, %140 %147 = fmul float %28, %99 %148 = fadd float %147, %142 %149 = fdiv float 1.000000e+00, %55 %150 = fmul float %52, %149 %151 = fmul float %53, %149 %152 = fmul float %54, %149 %153 = fmul float %29, %150 %154 = fmul float %29, %151 %155 = fmul float %29, %152 %156 = fmul float %150, %144 %157 = fmul float %151, %146 %158 = fadd float %157, %156 %159 = fmul float %152, %148 %160 = fadd float %158, %159 %161 = fmul float %150, %49 %162 = fmul float %151, %50 %163 = fadd float %162, %161 %164 = fmul float %152, %51 %165 = fadd float %163, %164 %166 = fsub float %160, %165 %167 = fmul float %37, %30 %168 = fmul float %39, %32 %169 = fadd float %168, %167 %170 = fmul float %40, %30 %171 = fmul float %42, %32 %172 = fadd float %171, %170 %173 = fmul float %43, %30 %174 = fmul float %45, %32 %175 = fadd float %174, %173 %176 = fmul float %38, %31 %177 = fmul float %39, %33 %178 = fadd float %177, %176 %179 = fmul float %41, %31 %180 = fmul float %42, %33 %181 = fadd float %180, %179 %182 = fmul float %44, %31 %183 = fmul float %45, %33 %184 = fadd float %183, %182 %185 = fmul float %169, %29 %186 = fmul float %172, %29 %187 = fmul float %175, %29 %188 = fmul float %46, %30 %189 = fmul float %48, %32 %190 = fadd float %189, %188 %191 = fmul float %169, %144 %192 = fmul float %172, %146 %193 = fadd float %192, %191 %194 = fmul float %175, %148 %195 = fadd float %193, %194 %196 = fadd float %190, %195 %197 = fmul float %29, %178 %198 = fmul float %29, %181 %199 = fmul float %29, %184 %200 = fmul float %47, %31 %201 = fmul float %48, %33 %202 = fadd float %201, %200 %203 = fmul float %178, %144 %204 = fmul float %181, %146 %205 = fadd float %204, %203 %206 = fmul float %184, %148 %207 = fadd float %205, %206 %208 = fadd float %202, %207 %209 = fmul float %29, %39 %210 = fmul float %29, %42 %211 = fmul float %29, %45 %212 = fmul float %39, %144 %213 = fmul float %42, %146 %214 = fadd float %213, %212 %215 = fmul float %45, %148 %216 = fadd float %214, %215 %217 = fadd float %48, %216 %218 = fmul float %134, 0x3FE3AFAD60000000 %219 = fmul float %135, 0x3FB9643020000000 %220 = fadd float %219, %218 %221 = fmul float %136, 0x3FDDFD1DC0000000 %222 = fadd float %220, %221 %223 = fmul float %222, %134 %224 = fmul float %222, %135 %225 = fmul float %222, %136 %226 = fmul float %223, 2.000000e+00 %227 = fmul float %224, 2.000000e+00 %228 = fmul float %225, 2.000000e+00 %229 = fsub float 0x3FE3AFAD60000000, %226 %230 = fsub float 0x3FB9643020000000, %227 %231 = fsub float 0x3FDDFD1DC0000000, %228 %232 = fmul float %97, %229 %233 = fmul float %98, %230 %234 = fadd float %233, %232 %235 = fmul float %99, %231 %236 = fadd float %234, %235 %237 = fmul float %236, %97 %238 = fmul float %236, %98 %239 = fmul float %236, %99 %240 = fmul float %237, 2.000000e+00 %241 = fmul float %238, 2.000000e+00 %242 = fmul float %239, 2.000000e+00 %243 = fsub float %229, %240 %244 = fsub float %230, %241 %245 = fsub float %231, %242 %246 = fmul float %229, %97 %247 = fmul float %230, %98 %248 = fadd float %247, %246 %249 = fmul float %231, %99 %250 = fadd float %248, %249 %251 = fcmp olt float %250, 0.000000e+00 %. = select i1 %251, float %243, float %229 %.165 = select i1 %251, float %244, float %230 %.166 = select i1 %251, float %245, float %231 %252 = fmul float %153, %. %253 = fmul float %154, %.165 %254 = fadd float %252, %253 %255 = fmul float %155, %.166 %256 = fadd float %254, %255 %257 = fadd float %256, %166 %258 = fmul float %., %185 %259 = fmul float %.165, %186 %260 = fadd float %258, %259 %261 = fmul float %.166, %187 %262 = fadd float %260, %261 %263 = fadd float %262, %196 %264 = fmul float %., %197 %265 = fmul float %.165, %198 %266 = fadd float %264, %265 %267 = fmul float %.166, %199 %268 = fadd float %266, %267 %269 = fadd float %268, %208 %270 = fmul float %., %209 %271 = fmul float %.165, %210 %272 = fadd float %270, %271 %273 = fmul float %.166, %211 %274 = fadd float %272, %273 %275 = fadd float %274, %217 %276 = fdiv float 1.000000e+00, %275 %277 = fmul float %263, %276 %278 = fmul float %269, %276 %279 = bitcast float %277 to i32 %280 = bitcast float %278 to i32 %281 = insertelement <2 x i32> undef, i32 %279, i32 0 %282 = insertelement <2 x i32> %281, i32 %280, i32 1 %283 = bitcast <8 x i32> %57 to <32 x i8> %284 = bitcast <4 x i32> %59 to <16 x i8> %285 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %282, <32 x i8> %283, <16 x i8> %284, i32 2) %286 = extractelement <4 x float> %285, i32 0 %287 = extractelement <4 x float> %285, i32 1 %288 = fmul float %286, 0x3FEFE02000000000 %289 = fmul float %287, 0x3F6FE01F80000000 %290 = fadd float %288, %289 %291 = fmul float %134, 0xBFCF92AFA0000000 %292 = fmul float %135, 0x3FDFB71B80000000 %293 = fadd float %292, %291 %294 = fmul float %136, 0x3FE45F8900000000 %295 = fadd float %293, %294 %296 = fmul float %295, %134 %297 = fmul float %295, %135 %298 = fmul float %295, %136 %299 = fmul float %296, 2.000000e+00 %300 = fmul float %297, 2.000000e+00 %301 = fmul float %298, 2.000000e+00 %302 = fsub float 0xBFCF92AFA0000000, %299 %303 = fsub float 0x3FDFB71B80000000, %300 %304 = fsub float 0x3FE45F8900000000, %301 %305 = fmul float %97, %302 %306 = fmul float %98, %303 %307 = fadd float %306, %305 %308 = fmul float %99, %304 %309 = fadd float %307, %308 %310 = fmul float %309, %97 %311 = fmul float %309, %98 %312 = fmul float %309, %99 %313 = fmul float %310, 2.000000e+00 %314 = fmul float %311, 2.000000e+00 %315 = fmul float %312, 2.000000e+00 %316 = fsub float %302, %313 %317 = fsub float %303, %314 %318 = fsub float %304, %315 %319 = fmul float %302, %97 %320 = fmul float %303, %98 %321 = fadd float %320, %319 %322 = fmul float %304, %99 %323 = fadd float %321, %322 %324 = fcmp olt float %323, 0.000000e+00 %temp44.0 = select i1 %324, float %316, float %302 %temp45.0 = select i1 %324, float %317, float %303 %temp46.0 = select i1 %324, float %318, float %304 %325 = fmul float %153, %temp44.0 %326 = fmul float %154, %temp45.0 %327 = fadd float %325, %326 %328 = fmul float %155, %temp46.0 %329 = fadd float %327, %328 %330 = fadd float %329, %166 %331 = fmul float %temp44.0, %185 %332 = fmul float %temp45.0, %186 %333 = fadd float %331, %332 %334 = fmul float %temp46.0, %187 %335 = fadd float %333, %334 %336 = fadd float %335, %196 %337 = fmul float %temp44.0, %197 %338 = fmul float %temp45.0, %198 %339 = fadd float %337, %338 %340 = fmul float %temp46.0, %199 %341 = fadd float %339, %340 %342 = fadd float %341, %208 %343 = fmul float %temp44.0, %209 %344 = fmul float %temp45.0, %210 %345 = fadd float %343, %344 %346 = fmul float %temp46.0, %211 %347 = fadd float %345, %346 %348 = fadd float %347, %217 %349 = fdiv float 1.000000e+00, %348 %350 = fmul float %336, %349 %351 = fmul float %342, %349 %352 = bitcast float %350 to i32 %353 = bitcast float %351 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %57 to <32 x i8> %357 = bitcast <4 x i32> %59 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = extractelement <4 x float> %358, i32 1 %361 = fmul float %359, 0x3FEFE02000000000 %362 = fmul float %360, 0x3F6FE01F80000000 %363 = fadd float %361, %362 %364 = fmul float %134, 0xBFD35F6BA0000000 %365 = fmul float %135, 0x3FC942F620000000 %366 = fadd float %365, %364 %367 = fmul float %136, 0x3FEDBA4D60000000 %368 = fadd float %366, %367 %369 = fmul float %368, %134 %370 = fmul float %368, %135 %371 = fmul float %368, %136 %372 = fmul float %369, 2.000000e+00 %373 = fmul float %370, 2.000000e+00 %374 = fmul float %371, 2.000000e+00 %375 = fsub float 0xBFD35F6BA0000000, %372 %376 = fsub float 0x3FC942F620000000, %373 %377 = fsub float 0x3FEDBA4D60000000, %374 %378 = fmul float %97, %375 %379 = fmul float %98, %376 %380 = fadd float %379, %378 %381 = fmul float %99, %377 %382 = fadd float %380, %381 %383 = fmul float %382, %97 %384 = fmul float %382, %98 %385 = fmul float %382, %99 %386 = fmul float %383, 2.000000e+00 %387 = fmul float %384, 2.000000e+00 %388 = fmul float %385, 2.000000e+00 %389 = fsub float %375, %386 %390 = fsub float %376, %387 %391 = fsub float %377, %388 %392 = fmul float %375, %97 %393 = fmul float %376, %98 %394 = fadd float %393, %392 %395 = fmul float %377, %99 %396 = fadd float %394, %395 %397 = fcmp olt float %396, 0.000000e+00 %.167 = select i1 %397, float %389, float %375 %.168 = select i1 %397, float %390, float %376 %.169 = select i1 %397, float %391, float %377 %398 = fmul float %153, %.167 %399 = fmul float %154, %.168 %400 = fadd float %398, %399 %401 = fmul float %155, %.169 %402 = fadd float %400, %401 %403 = fadd float %402, %166 %404 = fmul float %.167, %185 %405 = fmul float %.168, %186 %406 = fadd float %404, %405 %407 = fmul float %.169, %187 %408 = fadd float %406, %407 %409 = fadd float %408, %196 %410 = fmul float %.167, %197 %411 = fmul float %.168, %198 %412 = fadd float %410, %411 %413 = fmul float %.169, %199 %414 = fadd float %412, %413 %415 = fadd float %414, %208 %416 = fmul float %.167, %209 %417 = fmul float %.168, %210 %418 = fadd float %416, %417 %419 = fmul float %.169, %211 %420 = fadd float %418, %419 %421 = fadd float %420, %217 %422 = fdiv float 1.000000e+00, %421 %423 = fmul float %409, %422 %424 = fmul float %415, %422 %425 = bitcast float %423 to i32 %426 = bitcast float %424 to i32 %427 = insertelement <2 x i32> undef, i32 %425, i32 0 %428 = insertelement <2 x i32> %427, i32 %426, i32 1 %429 = bitcast <8 x i32> %57 to <32 x i8> %430 = bitcast <4 x i32> %59 to <16 x i8> %431 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2) %432 = extractelement <4 x float> %431, i32 0 %433 = extractelement <4 x float> %431, i32 1 %434 = fmul float %432, 0x3FEFE02000000000 %435 = fmul float %433, 0x3F6FE01F80000000 %436 = fadd float %434, %435 %437 = fmul float %134, 0x3FD51ACDE0000000 %438 = fmul float %135, 0xBFCF378680000000 %439 = fadd float %438, %437 %440 = fmul float %136, 0x3FDD070760000000 %441 = fadd float %439, %440 %442 = fmul float %441, %134 %443 = fmul float %441, %135 %444 = fmul float %441, %136 %445 = fmul float %442, 2.000000e+00 %446 = fmul float %443, 2.000000e+00 %447 = fmul float %444, 2.000000e+00 %448 = fsub float 0x3FD51ACDE0000000, %445 %449 = fsub float 0xBFCF378680000000, %446 %450 = fsub float 0x3FDD070760000000, %447 %451 = fmul float %97, %448 %452 = fmul float %98, %449 %453 = fadd float %452, %451 %454 = fmul float %99, %450 %455 = fadd float %453, %454 %456 = fmul float %455, %97 %457 = fmul float %455, %98 %458 = fmul float %455, %99 %459 = fmul float %456, 2.000000e+00 %460 = fmul float %457, 2.000000e+00 %461 = fmul float %458, 2.000000e+00 %462 = fsub float %448, %459 %463 = fsub float %449, %460 %464 = fsub float %450, %461 %465 = fmul float %448, %97 %466 = fmul float %449, %98 %467 = fadd float %466, %465 %468 = fmul float %450, %99 %469 = fadd float %467, %468 %470 = fcmp olt float %469, 0.000000e+00 %temp60.0 = select i1 %470, float %462, float %448 %temp61.0 = select i1 %470, float %463, float %449 %temp62.0 = select i1 %470, float %464, float %450 %471 = fmul float %153, %temp60.0 %472 = fmul float %154, %temp61.0 %473 = fadd float %471, %472 %474 = fmul float %155, %temp62.0 %475 = fadd float %473, %474 %476 = fadd float %475, %166 %477 = fmul float %temp60.0, %185 %478 = fmul float %temp61.0, %186 %479 = fadd float %477, %478 %480 = fmul float %temp62.0, %187 %481 = fadd float %479, %480 %482 = fadd float %481, %196 %483 = fmul float %temp60.0, %197 %484 = fmul float %temp61.0, %198 %485 = fadd float %483, %484 %486 = fmul float %temp62.0, %199 %487 = fadd float %485, %486 %488 = fadd float %487, %208 %489 = fmul float %temp60.0, %209 %490 = fmul float %temp61.0, %210 %491 = fadd float %489, %490 %492 = fmul float %temp62.0, %211 %493 = fadd float %491, %492 %494 = fadd float %493, %217 %495 = fdiv float 1.000000e+00, %494 %496 = fmul float %482, %495 %497 = fmul float %488, %495 %498 = bitcast float %496 to i32 %499 = bitcast float %497 to i32 %500 = insertelement <2 x i32> undef, i32 %498, i32 0 %501 = insertelement <2 x i32> %500, i32 %499, i32 1 %502 = bitcast <8 x i32> %57 to <32 x i8> %503 = bitcast <4 x i32> %59 to <16 x i8> %504 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %501, <32 x i8> %502, <16 x i8> %503, i32 2) %505 = extractelement <4 x float> %504, i32 0 %506 = extractelement <4 x float> %504, i32 1 %507 = fmul float %505, 0x3FEFE02000000000 %508 = fmul float %506, 0x3F6FE01F80000000 %509 = fadd float %507, %508 %510 = fsub float %257, %290 %511 = fsub float %330, %363 %512 = fsub float %403, %436 %513 = fsub float %476, %509 %514 = fadd float %510, 0xBF59000120000000 %515 = fadd float %511, 0xBF59000120000000 %516 = fadd float %512, 0xBF59000120000000 %517 = fadd float %513, 0xBF59000120000000 %518 = fmul float %34, %514 %519 = fsub float 1.000000e+00, %518 %520 = call float @llvm.AMDIL.clamp.(float %519, float 0.000000e+00, float 1.000000e+00) %521 = fcmp olt float %514, 0.000000e+00 %522 = fmul float %34, %515 %523 = fsub float 1.000000e+00, %522 %524 = call float @llvm.AMDIL.clamp.(float %523, float 0.000000e+00, float 1.000000e+00) %525 = fcmp olt float %515, 0.000000e+00 %526 = fmul float %34, %516 %527 = fsub float 1.000000e+00, %526 %528 = call float @llvm.AMDIL.clamp.(float %527, float 0.000000e+00, float 1.000000e+00) %529 = fcmp olt float %516, 0.000000e+00 %530 = fmul float %34, %517 %531 = fsub float 1.000000e+00, %530 %532 = call float @llvm.AMDIL.clamp.(float %531, float 0.000000e+00, float 1.000000e+00) %533 = fcmp olt float %517, 0.000000e+00 %.op = fmul float %520, 6.250000e-02 %534 = select i1 %521, float 0.000000e+00, float %.op %.op196 = fmul float %524, 6.250000e-02 %535 = select i1 %525, float 0.000000e+00, float %.op196 %536 = fadd float %534, %535 %.op197 = fmul float %528, 6.250000e-02 %537 = select i1 %529, float 0.000000e+00, float %.op197 %538 = fadd float %536, %537 %.op198 = fmul float %532, 6.250000e-02 %539 = select i1 %533, float 0.000000e+00, float %.op198 %540 = fadd float %538, %539 %541 = fmul float %134, 0x3FCD4A98A0000000 %542 = fmul float %135, 0x3FD28A04E0000000 %543 = fadd float %542, %541 %544 = fmul float %136, 0x3FC10B0F20000000 %545 = fadd float %543, %544 %546 = fmul float %545, %134 %547 = fmul float %545, %135 %548 = fmul float %545, %136 %549 = fmul float %546, 2.000000e+00 %550 = fmul float %547, 2.000000e+00 %551 = fmul float %548, 2.000000e+00 %552 = fsub float 0x3FCD4A98A0000000, %549 %553 = fsub float 0x3FD28A04E0000000, %550 %554 = fsub float 0x3FC10B0F20000000, %551 %555 = fmul float %97, %552 %556 = fmul float %98, %553 %557 = fadd float %556, %555 %558 = fmul float %99, %554 %559 = fadd float %557, %558 %560 = fmul float %559, %97 %561 = fmul float %559, %98 %562 = fmul float %559, %99 %563 = fmul float %560, 2.000000e+00 %564 = fmul float %561, 2.000000e+00 %565 = fmul float %562, 2.000000e+00 %566 = fsub float %552, %563 %567 = fsub float %553, %564 %568 = fsub float %554, %565 %569 = fmul float %552, %97 %570 = fmul float %553, %98 %571 = fadd float %570, %569 %572 = fmul float %554, %99 %573 = fadd float %571, %572 %574 = fcmp olt float %573, 0.000000e+00 %.172 = select i1 %574, float %566, float %552 %.173 = select i1 %574, float %567, float %553 %.174 = select i1 %574, float %568, float %554 %575 = fmul float %153, %.172 %576 = fmul float %154, %.173 %577 = fadd float %575, %576 %578 = fmul float %155, %.174 %579 = fadd float %577, %578 %580 = fadd float %579, %166 %581 = fmul float %.172, %185 %582 = fmul float %.173, %186 %583 = fadd float %581, %582 %584 = fmul float %.174, %187 %585 = fadd float %583, %584 %586 = fadd float %585, %196 %587 = fmul float %.172, %197 %588 = fmul float %.173, %198 %589 = fadd float %587, %588 %590 = fmul float %.174, %199 %591 = fadd float %589, %590 %592 = fadd float %591, %208 %593 = fmul float %.172, %209 %594 = fmul float %.173, %210 %595 = fadd float %593, %594 %596 = fmul float %.174, %211 %597 = fadd float %595, %596 %598 = fadd float %597, %217 %599 = fdiv float 1.000000e+00, %598 %600 = fmul float %586, %599 %601 = fmul float %592, %599 %602 = bitcast float %600 to i32 %603 = bitcast float %601 to i32 %604 = insertelement <2 x i32> undef, i32 %602, i32 0 %605 = insertelement <2 x i32> %604, i32 %603, i32 1 %606 = bitcast <8 x i32> %57 to <32 x i8> %607 = bitcast <4 x i32> %59 to <16 x i8> %608 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %605, <32 x i8> %606, <16 x i8> %607, i32 2) %609 = extractelement <4 x float> %608, i32 0 %610 = extractelement <4 x float> %608, i32 1 %611 = fmul float %609, 0x3FEFE02000000000 %612 = fmul float %610, 0x3F6FE01F80000000 %613 = fadd float %611, %612 %614 = fmul float %134, 0xBFCC54D620000000 %615 = fmul float %135, 0xBFC2FF10E0000000 %616 = fadd float %615, %614 %617 = fmul float %136, 0x3FC0DD6160000000 %618 = fadd float %616, %617 %619 = fmul float %618, %134 %620 = fmul float %618, %135 %621 = fmul float %618, %136 %622 = fmul float %619, 2.000000e+00 %623 = fmul float %620, 2.000000e+00 %624 = fmul float %621, 2.000000e+00 %625 = fsub float 0xBFCC54D620000000, %622 %626 = fsub float 0xBFC2FF10E0000000, %623 %627 = fsub float 0x3FC0DD6160000000, %624 %628 = fmul float %97, %625 %629 = fmul float %98, %626 %630 = fadd float %629, %628 %631 = fmul float %99, %627 %632 = fadd float %630, %631 %633 = fmul float %632, %97 %634 = fmul float %632, %98 %635 = fmul float %632, %99 %636 = fmul float %633, 2.000000e+00 %637 = fmul float %634, 2.000000e+00 %638 = fmul float %635, 2.000000e+00 %639 = fsub float %625, %636 %640 = fsub float %626, %637 %641 = fsub float %627, %638 %642 = fmul float %625, %97 %643 = fmul float %626, %98 %644 = fadd float %643, %642 %645 = fmul float %627, %99 %646 = fadd float %644, %645 %647 = fcmp olt float %646, 0.000000e+00 %temp48.1 = select i1 %647, float %639, float %625 %temp49.0 = select i1 %647, float %640, float %626 %temp50.0 = select i1 %647, float %641, float %627 %648 = fmul float %153, %temp48.1 %649 = fmul float %154, %temp49.0 %650 = fadd float %648, %649 %651 = fmul float %155, %temp50.0 %652 = fadd float %650, %651 %653 = fadd float %652, %166 %654 = fmul float %temp48.1, %185 %655 = fmul float %temp49.0, %186 %656 = fadd float %654, %655 %657 = fmul float %temp50.0, %187 %658 = fadd float %656, %657 %659 = fadd float %658, %196 %660 = fmul float %temp48.1, %197 %661 = fmul float %temp49.0, %198 %662 = fadd float %660, %661 %663 = fmul float %temp50.0, %199 %664 = fadd float %662, %663 %665 = fadd float %664, %208 %666 = fmul float %temp48.1, %209 %667 = fmul float %temp49.0, %210 %668 = fadd float %666, %667 %669 = fmul float %temp50.0, %211 %670 = fadd float %668, %669 %671 = fadd float %670, %217 %672 = fdiv float 1.000000e+00, %671 %673 = fmul float %659, %672 %674 = fmul float %665, %672 %675 = bitcast float %673 to i32 %676 = bitcast float %674 to i32 %677 = insertelement <2 x i32> undef, i32 %675, i32 0 %678 = insertelement <2 x i32> %677, i32 %676, i32 1 %679 = bitcast <8 x i32> %57 to <32 x i8> %680 = bitcast <4 x i32> %59 to <16 x i8> %681 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %678, <32 x i8> %679, <16 x i8> %680, i32 2) %682 = extractelement <4 x float> %681, i32 0 %683 = extractelement <4 x float> %681, i32 1 %684 = fmul float %682, 0x3FEFE02000000000 %685 = fmul float %683, 0x3F6FE01F80000000 %686 = fadd float %684, %685 %687 = fmul float %134, 0xBFDD48CF80000000 %688 = fmul float %135, 0xBFE5517C60000000 %689 = fadd float %688, %687 %690 = fmul float %136, 0x3FCD5F1380000000 %691 = fadd float %689, %690 %692 = fmul float %691, %134 %693 = fmul float %691, %135 %694 = fmul float %691, %136 %695 = fmul float %692, 2.000000e+00 %696 = fmul float %693, 2.000000e+00 %697 = fmul float %694, 2.000000e+00 %698 = fsub float 0xBFDD48CF80000000, %695 %699 = fsub float 0xBFE5517C60000000, %696 %700 = fsub float 0x3FCD5F1380000000, %697 %701 = fmul float %97, %698 %702 = fmul float %98, %699 %703 = fadd float %702, %701 %704 = fmul float %99, %700 %705 = fadd float %703, %704 %706 = fmul float %705, %97 %707 = fmul float %705, %98 %708 = fmul float %705, %99 %709 = fmul float %706, 2.000000e+00 %710 = fmul float %707, 2.000000e+00 %711 = fmul float %708, 2.000000e+00 %712 = fsub float %698, %709 %713 = fsub float %699, %710 %714 = fsub float %700, %711 %715 = fmul float %698, %97 %716 = fmul float %699, %98 %717 = fadd float %716, %715 %718 = fmul float %700, %99 %719 = fadd float %717, %718 %720 = fcmp olt float %719, 0.000000e+00 %.175 = select i1 %720, float %712, float %698 %.176 = select i1 %720, float %713, float %699 %.177 = select i1 %720, float %714, float %700 %721 = fmul float %153, %.175 %722 = fmul float %154, %.176 %723 = fadd float %721, %722 %724 = fmul float %155, %.177 %725 = fadd float %723, %724 %726 = fadd float %725, %166 %727 = fmul float %.175, %185 %728 = fmul float %.176, %186 %729 = fadd float %727, %728 %730 = fmul float %.177, %187 %731 = fadd float %729, %730 %732 = fadd float %731, %196 %733 = fmul float %.175, %197 %734 = fmul float %.176, %198 %735 = fadd float %733, %734 %736 = fmul float %.177, %199 %737 = fadd float %735, %736 %738 = fadd float %737, %208 %739 = fmul float %.175, %209 %740 = fmul float %.176, %210 %741 = fadd float %739, %740 %742 = fmul float %.177, %211 %743 = fadd float %741, %742 %744 = fadd float %743, %217 %745 = fdiv float 1.000000e+00, %744 %746 = fmul float %732, %745 %747 = fmul float %738, %745 %748 = bitcast float %746 to i32 %749 = bitcast float %747 to i32 %750 = insertelement <2 x i32> undef, i32 %748, i32 0 %751 = insertelement <2 x i32> %750, i32 %749, i32 1 %752 = bitcast <8 x i32> %57 to <32 x i8> %753 = bitcast <4 x i32> %59 to <16 x i8> %754 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %751, <32 x i8> %752, <16 x i8> %753, i32 2) %755 = extractelement <4 x float> %754, i32 0 %756 = extractelement <4 x float> %754, i32 1 %757 = fmul float %755, 0x3FEFE02000000000 %758 = fmul float %756, 0x3F6FE01F80000000 %759 = fadd float %757, %758 %760 = fmul float %134, 0xBFB2E9EE40000000 %761 = fmul float %135, 0x3FAC2ACB80000000 %762 = fadd float %761, %760 %763 = fmul float %136, 0xBF83DE1E20000000 %764 = fadd float %762, %763 %765 = fmul float %764, %134 %766 = fmul float %764, %135 %767 = fmul float %764, %136 %768 = fmul float %765, 2.000000e+00 %769 = fmul float %766, 2.000000e+00 %770 = fmul float %767, 2.000000e+00 %771 = fsub float 0xBFB2E9EE40000000, %768 %772 = fsub float 0x3FAC2ACB80000000, %769 %773 = fsub float 0xBF83DE1E20000000, %770 %774 = fmul float %97, %771 %775 = fmul float %98, %772 %776 = fadd float %775, %774 %777 = fmul float %99, %773 %778 = fadd float %776, %777 %779 = fmul float %778, %97 %780 = fmul float %778, %98 %781 = fmul float %778, %99 %782 = fmul float %779, 2.000000e+00 %783 = fmul float %780, 2.000000e+00 %784 = fmul float %781, 2.000000e+00 %785 = fsub float %771, %782 %786 = fsub float %772, %783 %787 = fsub float %773, %784 %788 = fmul float %771, %97 %789 = fmul float %772, %98 %790 = fadd float %789, %788 %791 = fmul float %773, %99 %792 = fadd float %790, %791 %793 = fcmp olt float %792, 0.000000e+00 %temp64.0 = select i1 %793, float %785, float %771 %temp65.0 = select i1 %793, float %786, float %772 %temp66.0 = select i1 %793, float %787, float %773 %794 = fmul float %153, %temp64.0 %795 = fmul float %154, %temp65.0 %796 = fadd float %794, %795 %797 = fmul float %155, %temp66.0 %798 = fadd float %796, %797 %799 = fadd float %798, %166 %800 = fmul float %temp64.0, %185 %801 = fmul float %temp65.0, %186 %802 = fadd float %800, %801 %803 = fmul float %temp66.0, %187 %804 = fadd float %802, %803 %805 = fadd float %804, %196 %806 = fmul float %temp64.0, %197 %807 = fmul float %temp65.0, %198 %808 = fadd float %806, %807 %809 = fmul float %temp66.0, %199 %810 = fadd float %808, %809 %811 = fadd float %810, %208 %812 = fmul float %temp64.0, %209 %813 = fmul float %temp65.0, %210 %814 = fadd float %812, %813 %815 = fmul float %temp66.0, %211 %816 = fadd float %814, %815 %817 = fadd float %816, %217 %818 = fdiv float 1.000000e+00, %817 %819 = fmul float %805, %818 %820 = fmul float %811, %818 %821 = bitcast float %819 to i32 %822 = bitcast float %820 to i32 %823 = insertelement <2 x i32> undef, i32 %821, i32 0 %824 = insertelement <2 x i32> %823, i32 %822, i32 1 %825 = bitcast <8 x i32> %57 to <32 x i8> %826 = bitcast <4 x i32> %59 to <16 x i8> %827 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %824, <32 x i8> %825, <16 x i8> %826, i32 2) %828 = extractelement <4 x float> %827, i32 0 %829 = extractelement <4 x float> %827, i32 1 %830 = fmul float %828, 0x3FEFE02000000000 %831 = fmul float %829, 0x3F6FE01F80000000 %832 = fadd float %830, %831 %833 = fsub float %580, %613 %834 = fsub float %653, %686 %835 = fsub float %726, %759 %836 = fsub float %799, %832 %837 = fadd float %833, 0xBF59000120000000 %838 = fadd float %834, 0xBF59000120000000 %839 = fadd float %835, 0xBF59000120000000 %840 = fadd float %836, 0xBF59000120000000 %841 = fmul float %34, %837 %842 = fsub float 1.000000e+00, %841 %843 = call float @llvm.AMDIL.clamp.(float %842, float 0.000000e+00, float 1.000000e+00) %844 = fcmp olt float %837, 0.000000e+00 %845 = fmul float %34, %838 %846 = fsub float 1.000000e+00, %845 %847 = call float @llvm.AMDIL.clamp.(float %846, float 0.000000e+00, float 1.000000e+00) %848 = fcmp olt float %838, 0.000000e+00 %849 = fmul float %34, %839 %850 = fsub float 1.000000e+00, %849 %851 = call float @llvm.AMDIL.clamp.(float %850, float 0.000000e+00, float 1.000000e+00) %852 = fcmp olt float %839, 0.000000e+00 %853 = fmul float %34, %840 %854 = fsub float 1.000000e+00, %853 %855 = call float @llvm.AMDIL.clamp.(float %854, float 0.000000e+00, float 1.000000e+00) %856 = fcmp olt float %840, 0.000000e+00 %.op199 = fmul float %843, 6.250000e-02 %857 = select i1 %844, float 0.000000e+00, float %.op199 %.op200 = fmul float %847, 6.250000e-02 %858 = select i1 %848, float 0.000000e+00, float %.op200 %859 = fadd float %857, %858 %.op201 = fmul float %851, 6.250000e-02 %860 = select i1 %852, float 0.000000e+00, float %.op201 %861 = fadd float %859, %860 %.op202 = fmul float %855, 6.250000e-02 %862 = select i1 %856, float 0.000000e+00, float %.op202 %863 = fadd float %861, %862 %864 = fadd float %540, %863 %865 = fmul float %134, 0xBFA398A660000000 %866 = fmul float %135, 0xBFA4790B80000000 %867 = fadd float %866, %865 %868 = fmul float %136, 0x3F841B75A0000000 %869 = fadd float %867, %868 %870 = fmul float %869, %134 %871 = fmul float %869, %135 %872 = fmul float %869, %136 %873 = fmul float %870, 2.000000e+00 %874 = fmul float %871, 2.000000e+00 %875 = fmul float %872, 2.000000e+00 %876 = fsub float 0xBFA398A660000000, %873 %877 = fsub float 0xBFA4790B80000000, %874 %878 = fsub float 0x3F841B75A0000000, %875 %879 = fmul float %97, %876 %880 = fmul float %98, %877 %881 = fadd float %880, %879 %882 = fmul float %99, %878 %883 = fadd float %881, %882 %884 = fmul float %883, %97 %885 = fmul float %883, %98 %886 = fmul float %883, %99 %887 = fmul float %884, 2.000000e+00 %888 = fmul float %885, 2.000000e+00 %889 = fmul float %886, 2.000000e+00 %890 = fsub float %876, %887 %891 = fsub float %877, %888 %892 = fsub float %878, %889 %893 = fmul float %876, %97 %894 = fmul float %877, %98 %895 = fadd float %894, %893 %896 = fmul float %878, %99 %897 = fadd float %895, %896 %898 = fcmp olt float %897, 0.000000e+00 %.180 = select i1 %898, float %890, float %876 %.181 = select i1 %898, float %891, float %877 %.182 = select i1 %898, float %892, float %878 %899 = fmul float %153, %.180 %900 = fmul float %154, %.181 %901 = fadd float %899, %900 %902 = fmul float %155, %.182 %903 = fadd float %901, %902 %904 = fadd float %903, %166 %905 = fmul float %.180, %185 %906 = fmul float %.181, %186 %907 = fadd float %905, %906 %908 = fmul float %.182, %187 %909 = fadd float %907, %908 %910 = fadd float %909, %196 %911 = fmul float %.180, %197 %912 = fmul float %.181, %198 %913 = fadd float %911, %912 %914 = fmul float %.182, %199 %915 = fadd float %913, %914 %916 = fadd float %915, %208 %917 = fmul float %.180, %209 %918 = fmul float %.181, %210 %919 = fadd float %917, %918 %920 = fmul float %.182, %211 %921 = fadd float %919, %920 %922 = fadd float %921, %217 %923 = fdiv float 1.000000e+00, %922 %924 = fmul float %910, %923 %925 = fmul float %916, %923 %926 = bitcast float %924 to i32 %927 = bitcast float %925 to i32 %928 = insertelement <2 x i32> undef, i32 %926, i32 0 %929 = insertelement <2 x i32> %928, i32 %927, i32 1 %930 = bitcast <8 x i32> %57 to <32 x i8> %931 = bitcast <4 x i32> %59 to <16 x i8> %932 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %929, <32 x i8> %930, <16 x i8> %931, i32 2) %933 = extractelement <4 x float> %932, i32 0 %934 = extractelement <4 x float> %932, i32 1 %935 = fmul float %933, 0x3FEFE02000000000 %936 = fmul float %934, 0x3F6FE01F80000000 %937 = fadd float %935, %936 %938 = fmul float %134, 0x3FE7D86220000000 %939 = fmul float %135, 0x3FD4B7CFE0000000 %940 = fadd float %939, %938 %941 = fmul float %136, 0xBFDB761880000000 %942 = fadd float %940, %941 %943 = fmul float %942, %134 %944 = fmul float %942, %135 %945 = fmul float %942, %136 %946 = fmul float %943, 2.000000e+00 %947 = fmul float %944, 2.000000e+00 %948 = fmul float %945, 2.000000e+00 %949 = fsub float 0x3FE7D86220000000, %946 %950 = fsub float 0x3FD4B7CFE0000000, %947 %951 = fsub float 0xBFDB761880000000, %948 %952 = fmul float %97, %949 %953 = fmul float %98, %950 %954 = fadd float %953, %952 %955 = fmul float %99, %951 %956 = fadd float %954, %955 %957 = fmul float %956, %97 %958 = fmul float %956, %98 %959 = fmul float %956, %99 %960 = fmul float %957, 2.000000e+00 %961 = fmul float %958, 2.000000e+00 %962 = fmul float %959, 2.000000e+00 %963 = fsub float %949, %960 %964 = fsub float %950, %961 %965 = fsub float %951, %962 %966 = fmul float %949, %97 %967 = fmul float %950, %98 %968 = fadd float %967, %966 %969 = fmul float %951, %99 %970 = fadd float %968, %969 %971 = fcmp olt float %970, 0.000000e+00 %temp48.3 = select i1 %971, float %963, float %949 %temp49.1 = select i1 %971, float %964, float %950 %temp50.1 = select i1 %971, float %965, float %951 %972 = fmul float %153, %temp48.3 %973 = fmul float %154, %temp49.1 %974 = fadd float %972, %973 %975 = fmul float %155, %temp50.1 %976 = fadd float %974, %975 %977 = fadd float %976, %166 %978 = fmul float %temp48.3, %185 %979 = fmul float %temp49.1, %186 %980 = fadd float %978, %979 %981 = fmul float %temp50.1, %187 %982 = fadd float %980, %981 %983 = fadd float %982, %196 %984 = fmul float %temp48.3, %197 %985 = fmul float %temp49.1, %198 %986 = fadd float %984, %985 %987 = fmul float %temp50.1, %199 %988 = fadd float %986, %987 %989 = fadd float %988, %208 %990 = fmul float %temp48.3, %209 %991 = fmul float %temp49.1, %210 %992 = fadd float %990, %991 %993 = fmul float %temp50.1, %211 %994 = fadd float %992, %993 %995 = fadd float %994, %217 %996 = fdiv float 1.000000e+00, %995 %997 = fmul float %983, %996 %998 = fmul float %989, %996 %999 = bitcast float %997 to i32 %1000 = bitcast float %998 to i32 %1001 = insertelement <2 x i32> undef, i32 %999, i32 0 %1002 = insertelement <2 x i32> %1001, i32 %1000, i32 1 %1003 = bitcast <8 x i32> %57 to <32 x i8> %1004 = bitcast <4 x i32> %59 to <16 x i8> %1005 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1002, <32 x i8> %1003, <16 x i8> %1004, i32 2) %1006 = extractelement <4 x float> %1005, i32 0 %1007 = extractelement <4 x float> %1005, i32 1 %1008 = fmul float %1006, 0x3FEFE02000000000 %1009 = fmul float %1007, 0x3F6FE01F80000000 %1010 = fadd float %1008, %1009 %1011 = fmul float %134, 0xBF8F82B200000000 %1012 = fmul float %135, 0x3FD11D7DC0000000 %1013 = fadd float %1012, %1011 %1014 = fmul float %136, 0xBFB0E7BC40000000 %1015 = fadd float %1013, %1014 %1016 = fmul float %1015, %134 %1017 = fmul float %1015, %135 %1018 = fmul float %1015, %136 %1019 = fmul float %1016, 2.000000e+00 %1020 = fmul float %1017, 2.000000e+00 %1021 = fmul float %1018, 2.000000e+00 %1022 = fsub float 0xBF8F82B200000000, %1019 %1023 = fsub float 0x3FD11D7DC0000000, %1020 %1024 = fsub float 0xBFB0E7BC40000000, %1021 %1025 = fmul float %97, %1022 %1026 = fmul float %98, %1023 %1027 = fadd float %1026, %1025 %1028 = fmul float %99, %1024 %1029 = fadd float %1027, %1028 %1030 = fmul float %1029, %97 %1031 = fmul float %1029, %98 %1032 = fmul float %1029, %99 %1033 = fmul float %1030, 2.000000e+00 %1034 = fmul float %1031, 2.000000e+00 %1035 = fmul float %1032, 2.000000e+00 %1036 = fsub float %1022, %1033 %1037 = fsub float %1023, %1034 %1038 = fsub float %1024, %1035 %1039 = fmul float %1022, %97 %1040 = fmul float %1023, %98 %1041 = fadd float %1040, %1039 %1042 = fmul float %1024, %99 %1043 = fadd float %1041, %1042 %1044 = fcmp olt float %1043, 0.000000e+00 %.183 = select i1 %1044, float %1036, float %1022 %.184 = select i1 %1044, float %1037, float %1023 %.185 = select i1 %1044, float %1038, float %1024 %1045 = fmul float %153, %.183 %1046 = fmul float %154, %.184 %1047 = fadd float %1045, %1046 %1048 = fmul float %155, %.185 %1049 = fadd float %1047, %1048 %1050 = fadd float %1049, %166 %1051 = fmul float %.183, %185 %1052 = fmul float %.184, %186 %1053 = fadd float %1051, %1052 %1054 = fmul float %.185, %187 %1055 = fadd float %1053, %1054 %1056 = fadd float %1055, %196 %1057 = fmul float %.183, %197 %1058 = fmul float %.184, %198 %1059 = fadd float %1057, %1058 %1060 = fmul float %.185, %199 %1061 = fadd float %1059, %1060 %1062 = fadd float %1061, %208 %1063 = fmul float %.183, %209 %1064 = fmul float %.184, %210 %1065 = fadd float %1063, %1064 %1066 = fmul float %.185, %211 %1067 = fadd float %1065, %1066 %1068 = fadd float %1067, %217 %1069 = fdiv float 1.000000e+00, %1068 %1070 = fmul float %1056, %1069 %1071 = fmul float %1062, %1069 %1072 = bitcast float %1070 to i32 %1073 = bitcast float %1071 to i32 %1074 = insertelement <2 x i32> undef, i32 %1072, i32 0 %1075 = insertelement <2 x i32> %1074, i32 %1073, i32 1 %1076 = bitcast <8 x i32> %57 to <32 x i8> %1077 = bitcast <4 x i32> %59 to <16 x i8> %1078 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1075, <32 x i8> %1076, <16 x i8> %1077, i32 2) %1079 = extractelement <4 x float> %1078, i32 0 %1080 = extractelement <4 x float> %1078, i32 1 %1081 = fmul float %1079, 0x3FEFE02000000000 %1082 = fmul float %1080, 0x3F6FE01F80000000 %1083 = fadd float %1081, %1082 %1084 = fmul float %134, 0xBFB89EFD80000000 %1085 = fmul float %135, 0xBFE0BBE660000000 %1086 = fadd float %1085, %1084 %1087 = fmul float %136, 0xBFD00DD820000000 %1088 = fadd float %1086, %1087 %1089 = fmul float %1088, %134 %1090 = fmul float %1088, %135 %1091 = fmul float %1088, %136 %1092 = fmul float %1089, 2.000000e+00 %1093 = fmul float %1090, 2.000000e+00 %1094 = fmul float %1091, 2.000000e+00 %1095 = fsub float 0xBFB89EFD80000000, %1092 %1096 = fsub float 0xBFE0BBE660000000, %1093 %1097 = fsub float 0xBFD00DD820000000, %1094 %1098 = fmul float %97, %1095 %1099 = fmul float %98, %1096 %1100 = fadd float %1099, %1098 %1101 = fmul float %99, %1097 %1102 = fadd float %1100, %1101 %1103 = fmul float %1102, %97 %1104 = fmul float %1102, %98 %1105 = fmul float %1102, %99 %1106 = fmul float %1103, 2.000000e+00 %1107 = fmul float %1104, 2.000000e+00 %1108 = fmul float %1105, 2.000000e+00 %1109 = fsub float %1095, %1106 %1110 = fsub float %1096, %1107 %1111 = fsub float %1097, %1108 %1112 = fmul float %1095, %97 %1113 = fmul float %1096, %98 %1114 = fadd float %1113, %1112 %1115 = fmul float %1097, %99 %1116 = fadd float %1114, %1115 %1117 = fcmp olt float %1116, 0.000000e+00 %temp64.1 = select i1 %1117, float %1109, float %1095 %temp65.1 = select i1 %1117, float %1110, float %1096 %temp66.1 = select i1 %1117, float %1111, float %1097 %1118 = fmul float %153, %temp64.1 %1119 = fmul float %154, %temp65.1 %1120 = fadd float %1118, %1119 %1121 = fmul float %155, %temp66.1 %1122 = fadd float %1120, %1121 %1123 = fadd float %1122, %166 %1124 = fmul float %temp64.1, %185 %1125 = fmul float %temp65.1, %186 %1126 = fadd float %1124, %1125 %1127 = fmul float %temp66.1, %187 %1128 = fadd float %1126, %1127 %1129 = fadd float %1128, %196 %1130 = fmul float %temp64.1, %197 %1131 = fmul float %temp65.1, %198 %1132 = fadd float %1130, %1131 %1133 = fmul float %temp66.1, %199 %1134 = fadd float %1132, %1133 %1135 = fadd float %1134, %208 %1136 = fmul float %temp64.1, %209 %1137 = fmul float %temp65.1, %210 %1138 = fadd float %1136, %1137 %1139 = fmul float %temp66.1, %211 %1140 = fadd float %1138, %1139 %1141 = fadd float %1140, %217 %1142 = fdiv float 1.000000e+00, %1141 %1143 = fmul float %1129, %1142 %1144 = fmul float %1135, %1142 %1145 = bitcast float %1143 to i32 %1146 = bitcast float %1144 to i32 %1147 = insertelement <2 x i32> undef, i32 %1145, i32 0 %1148 = insertelement <2 x i32> %1147, i32 %1146, i32 1 %1149 = bitcast <8 x i32> %57 to <32 x i8> %1150 = bitcast <4 x i32> %59 to <16 x i8> %1151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1148, <32 x i8> %1149, <16 x i8> %1150, i32 2) %1152 = extractelement <4 x float> %1151, i32 0 %1153 = extractelement <4 x float> %1151, i32 1 %1154 = fmul float %1152, 0x3FEFE02000000000 %1155 = fmul float %1153, 0x3F6FE01F80000000 %1156 = fadd float %1154, %1155 %1157 = fsub float %904, %937 %1158 = fsub float %977, %1010 %1159 = fsub float %1050, %1083 %1160 = fsub float %1123, %1156 %1161 = fadd float %1157, 0xBF59000120000000 %1162 = fadd float %1158, 0xBF59000120000000 %1163 = fadd float %1159, 0xBF59000120000000 %1164 = fadd float %1160, 0xBF59000120000000 %1165 = fmul float %34, %1161 %1166 = fsub float 1.000000e+00, %1165 %1167 = call float @llvm.AMDIL.clamp.(float %1166, float 0.000000e+00, float 1.000000e+00) %1168 = fcmp olt float %1161, 0.000000e+00 %1169 = fmul float %34, %1162 %1170 = fsub float 1.000000e+00, %1169 %1171 = call float @llvm.AMDIL.clamp.(float %1170, float 0.000000e+00, float 1.000000e+00) %1172 = fcmp olt float %1162, 0.000000e+00 %1173 = fmul float %34, %1163 %1174 = fsub float 1.000000e+00, %1173 %1175 = call float @llvm.AMDIL.clamp.(float %1174, float 0.000000e+00, float 1.000000e+00) %1176 = fcmp olt float %1163, 0.000000e+00 %1177 = fmul float %34, %1164 %1178 = fsub float 1.000000e+00, %1177 %1179 = call float @llvm.AMDIL.clamp.(float %1178, float 0.000000e+00, float 1.000000e+00) %1180 = fcmp olt float %1164, 0.000000e+00 %.op203 = fmul float %1167, 6.250000e-02 %1181 = select i1 %1168, float 0.000000e+00, float %.op203 %.op204 = fmul float %1171, 6.250000e-02 %1182 = select i1 %1172, float 0.000000e+00, float %.op204 %1183 = fadd float %1181, %1182 %.op205 = fmul float %1175, 6.250000e-02 %1184 = select i1 %1176, float 0.000000e+00, float %.op205 %1185 = fadd float %1183, %1184 %.op206 = fmul float %1179, 6.250000e-02 %1186 = select i1 %1180, float 0.000000e+00, float %.op206 %1187 = fadd float %1185, %1186 %1188 = fadd float %864, %1187 %1189 = fmul float %134, 0x3FDF56FFC0000000 %1190 = fmul float %135, 0xBFDDB8C760000000 %1191 = fadd float %1190, %1189 %1192 = fmul float %136, 0xBFC31A4BE0000000 %1193 = fadd float %1191, %1192 %1194 = fmul float %1193, %134 %1195 = fmul float %1193, %135 %1196 = fmul float %1193, %136 %1197 = fmul float %1194, 2.000000e+00 %1198 = fmul float %1195, 2.000000e+00 %1199 = fmul float %1196, 2.000000e+00 %1200 = fsub float 0x3FDF56FFC0000000, %1197 %1201 = fsub float 0xBFDDB8C760000000, %1198 %1202 = fsub float 0xBFC31A4BE0000000, %1199 %1203 = fmul float %97, %1200 %1204 = fmul float %98, %1201 %1205 = fadd float %1204, %1203 %1206 = fmul float %99, %1202 %1207 = fadd float %1205, %1206 %1208 = fmul float %1207, %97 %1209 = fmul float %1207, %98 %1210 = fmul float %1207, %99 %1211 = fmul float %1208, 2.000000e+00 %1212 = fmul float %1209, 2.000000e+00 %1213 = fmul float %1210, 2.000000e+00 %1214 = fsub float %1200, %1211 %1215 = fsub float %1201, %1212 %1216 = fsub float %1202, %1213 %1217 = fmul float %1200, %97 %1218 = fmul float %1201, %98 %1219 = fadd float %1218, %1217 %1220 = fmul float %1202, %99 %1221 = fadd float %1219, %1220 %1222 = fcmp olt float %1221, 0.000000e+00 %.188 = select i1 %1222, float %1214, float %1200 %.189 = select i1 %1222, float %1215, float %1201 %.190 = select i1 %1222, float %1216, float %1202 %1223 = fmul float %153, %.188 %1224 = fmul float %154, %.189 %1225 = fadd float %1223, %1224 %1226 = fmul float %155, %.190 %1227 = fadd float %1225, %1226 %1228 = fadd float %1227, %166 %1229 = fmul float %.188, %185 %1230 = fmul float %.189, %186 %1231 = fadd float %1229, %1230 %1232 = fmul float %.190, %187 %1233 = fadd float %1231, %1232 %1234 = fadd float %1233, %196 %1235 = fmul float %.188, %197 %1236 = fmul float %.189, %198 %1237 = fadd float %1235, %1236 %1238 = fmul float %.190, %199 %1239 = fadd float %1237, %1238 %1240 = fadd float %1239, %208 %1241 = fmul float %.188, %209 %1242 = fmul float %.189, %210 %1243 = fadd float %1241, %1242 %1244 = fmul float %.190, %211 %1245 = fadd float %1243, %1244 %1246 = fadd float %1245, %217 %1247 = fdiv float 1.000000e+00, %1246 %1248 = fmul float %1234, %1247 %1249 = fmul float %1240, %1247 %1250 = bitcast float %1248 to i32 %1251 = bitcast float %1249 to i32 %1252 = insertelement <2 x i32> undef, i32 %1250, i32 0 %1253 = insertelement <2 x i32> %1252, i32 %1251, i32 1 %1254 = bitcast <8 x i32> %57 to <32 x i8> %1255 = bitcast <4 x i32> %59 to <16 x i8> %1256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1253, <32 x i8> %1254, <16 x i8> %1255, i32 2) %1257 = extractelement <4 x float> %1256, i32 0 %1258 = extractelement <4 x float> %1256, i32 1 %1259 = fmul float %1257, 0x3FEFE02000000000 %1260 = fmul float %1258, 0x3F6FE01F80000000 %1261 = fadd float %1259, %1260 %1262 = fmul float %134, 0xBFC0C98E60000000 %1263 = fmul float %135, 0x3FC1CF3DC0000000 %1264 = fadd float %1263, %1262 %1265 = fmul float %136, 0xBFE66D9BE0000000 %1266 = fadd float %1264, %1265 %1267 = fmul float %1266, %134 %1268 = fmul float %1266, %135 %1269 = fmul float %1266, %136 %1270 = fmul float %1267, 2.000000e+00 %1271 = fmul float %1268, 2.000000e+00 %1272 = fmul float %1269, 2.000000e+00 %1273 = fsub float 0xBFC0C98E60000000, %1270 %1274 = fsub float 0x3FC1CF3DC0000000, %1271 %1275 = fsub float 0xBFE66D9BE0000000, %1272 %1276 = fmul float %97, %1273 %1277 = fmul float %98, %1274 %1278 = fadd float %1277, %1276 %1279 = fmul float %99, %1275 %1280 = fadd float %1278, %1279 %1281 = fmul float %1280, %97 %1282 = fmul float %1280, %98 %1283 = fmul float %1280, %99 %1284 = fmul float %1281, 2.000000e+00 %1285 = fmul float %1282, 2.000000e+00 %1286 = fmul float %1283, 2.000000e+00 %1287 = fsub float %1273, %1284 %1288 = fsub float %1274, %1285 %1289 = fsub float %1275, %1286 %1290 = fmul float %1273, %97 %1291 = fmul float %1274, %98 %1292 = fadd float %1291, %1290 %1293 = fmul float %1275, %99 %1294 = fadd float %1292, %1293 %1295 = fcmp olt float %1294, 0.000000e+00 %temp48.5 = select i1 %1295, float %1287, float %1273 %temp49.2 = select i1 %1295, float %1288, float %1274 %temp50.2 = select i1 %1295, float %1289, float %1275 %1296 = fmul float %153, %temp48.5 %1297 = fmul float %154, %temp49.2 %1298 = fadd float %1296, %1297 %1299 = fmul float %155, %temp50.2 %1300 = fadd float %1298, %1299 %1301 = fadd float %1300, %166 %1302 = fmul float %temp48.5, %185 %1303 = fmul float %temp49.2, %186 %1304 = fadd float %1302, %1303 %1305 = fmul float %temp50.2, %187 %1306 = fadd float %1304, %1305 %1307 = fadd float %1306, %196 %1308 = fmul float %temp48.5, %197 %1309 = fmul float %temp49.2, %198 %1310 = fadd float %1308, %1309 %1311 = fmul float %temp50.2, %199 %1312 = fadd float %1310, %1311 %1313 = fadd float %1312, %208 %1314 = fmul float %temp48.5, %209 %1315 = fmul float %temp49.2, %210 %1316 = fadd float %1314, %1315 %1317 = fmul float %temp50.2, %211 %1318 = fadd float %1316, %1317 %1319 = fadd float %1318, %217 %1320 = fdiv float 1.000000e+00, %1319 %1321 = fmul float %1307, %1320 %1322 = fmul float %1313, %1320 %1323 = bitcast float %1321 to i32 %1324 = bitcast float %1322 to i32 %1325 = insertelement <2 x i32> undef, i32 %1323, i32 0 %1326 = insertelement <2 x i32> %1325, i32 %1324, i32 1 %1327 = bitcast <8 x i32> %57 to <32 x i8> %1328 = bitcast <4 x i32> %59 to <16 x i8> %1329 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1326, <32 x i8> %1327, <16 x i8> %1328, i32 2) %1330 = extractelement <4 x float> %1329, i32 0 %1331 = extractelement <4 x float> %1329, i32 1 %1332 = fmul float %1330, 0x3FEFE02000000000 %1333 = fmul float %1331, 0x3F6FE01F80000000 %1334 = fadd float %1332, %1333 %1335 = fmul float %134, 0xBFD7307280000000 %1336 = fmul float %135, 0xBFB9BA7FC0000000 %1337 = fadd float %1336, %1335 %1338 = fmul float %136, 0xBFD3996320000000 %1339 = fadd float %1337, %1338 %1340 = fmul float %1339, %134 %1341 = fmul float %1339, %135 %1342 = fmul float %1339, %136 %1343 = fmul float %1340, 2.000000e+00 %1344 = fmul float %1341, 2.000000e+00 %1345 = fmul float %1342, 2.000000e+00 %1346 = fsub float 0xBFD7307280000000, %1343 %1347 = fsub float 0xBFB9BA7FC0000000, %1344 %1348 = fsub float 0xBFD3996320000000, %1345 %1349 = fmul float %97, %1346 %1350 = fmul float %98, %1347 %1351 = fadd float %1350, %1349 %1352 = fmul float %99, %1348 %1353 = fadd float %1351, %1352 %1354 = fmul float %1353, %97 %1355 = fmul float %1353, %98 %1356 = fmul float %1353, %99 %1357 = fmul float %1354, 2.000000e+00 %1358 = fmul float %1355, 2.000000e+00 %1359 = fmul float %1356, 2.000000e+00 %1360 = fsub float %1346, %1357 %1361 = fsub float %1347, %1358 %1362 = fsub float %1348, %1359 %1363 = fmul float %1346, %97 %1364 = fmul float %1347, %98 %1365 = fadd float %1364, %1363 %1366 = fmul float %1348, %99 %1367 = fadd float %1365, %1366 %1368 = fcmp olt float %1367, 0.000000e+00 %.191 = select i1 %1368, float %1360, float %1346 %.192 = select i1 %1368, float %1361, float %1347 %.193 = select i1 %1368, float %1362, float %1348 %1369 = fmul float %153, %.191 %1370 = fmul float %154, %.192 %1371 = fadd float %1369, %1370 %1372 = fmul float %155, %.193 %1373 = fadd float %1371, %1372 %1374 = fadd float %1373, %166 %1375 = fmul float %.191, %185 %1376 = fmul float %.192, %186 %1377 = fadd float %1375, %1376 %1378 = fmul float %.193, %187 %1379 = fadd float %1377, %1378 %1380 = fadd float %1379, %196 %1381 = fmul float %.191, %197 %1382 = fmul float %.192, %198 %1383 = fadd float %1381, %1382 %1384 = fmul float %.193, %199 %1385 = fadd float %1383, %1384 %1386 = fadd float %1385, %208 %1387 = fmul float %.191, %209 %1388 = fmul float %.192, %210 %1389 = fadd float %1387, %1388 %1390 = fmul float %.193, %211 %1391 = fadd float %1389, %1390 %1392 = fadd float %1391, %217 %1393 = fdiv float 1.000000e+00, %1392 %1394 = fmul float %1380, %1393 %1395 = fmul float %1386, %1393 %1396 = bitcast float %1394 to i32 %1397 = bitcast float %1395 to i32 %1398 = insertelement <2 x i32> undef, i32 %1396, i32 0 %1399 = insertelement <2 x i32> %1398, i32 %1397, i32 1 %1400 = bitcast <8 x i32> %57 to <32 x i8> %1401 = bitcast <4 x i32> %59 to <16 x i8> %1402 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1399, <32 x i8> %1400, <16 x i8> %1401, i32 2) %1403 = extractelement <4 x float> %1402, i32 0 %1404 = extractelement <4 x float> %1402, i32 1 %1405 = fmul float %1403, 0x3FEFE02000000000 %1406 = fmul float %1404, 0x3F6FE01F80000000 %1407 = fadd float %1405, %1406 %1408 = fmul float %134, 0x3FD2C69B60000000 %1409 = fmul float %135, 0xBFC5D74D60000000 %1410 = fadd float %1409, %1408 %1411 = fmul float %136, 0xBFE52D59E0000000 %1412 = fadd float %1410, %1411 %1413 = fmul float %1412, %134 %1414 = fmul float %1412, %135 %1415 = fmul float %1412, %136 %1416 = fmul float %1413, 2.000000e+00 %1417 = fmul float %1414, 2.000000e+00 %1418 = fmul float %1415, 2.000000e+00 %1419 = fsub float 0x3FD2C69B60000000, %1416 %1420 = fsub float 0xBFC5D74D60000000, %1417 %1421 = fsub float 0xBFE52D59E0000000, %1418 %1422 = fmul float %97, %1419 %1423 = fmul float %98, %1420 %1424 = fadd float %1423, %1422 %1425 = fmul float %99, %1421 %1426 = fadd float %1424, %1425 %1427 = fmul float %1426, %97 %1428 = fmul float %1426, %98 %1429 = fmul float %1426, %99 %1430 = fmul float %1427, 2.000000e+00 %1431 = fmul float %1428, 2.000000e+00 %1432 = fmul float %1429, 2.000000e+00 %1433 = fsub float %1419, %1430 %1434 = fsub float %1420, %1431 %1435 = fsub float %1421, %1432 %1436 = fmul float %1419, %97 %1437 = fmul float %1420, %98 %1438 = fadd float %1437, %1436 %1439 = fmul float %1421, %99 %1440 = fadd float %1438, %1439 %1441 = fcmp olt float %1440, 0.000000e+00 %temp60.1 = select i1 %1441, float %1433, float %1419 %temp61.1 = select i1 %1441, float %1434, float %1420 %temp62.1 = select i1 %1441, float %1435, float %1421 %1442 = fmul float %153, %temp60.1 %1443 = fmul float %154, %temp61.1 %1444 = fadd float %1442, %1443 %1445 = fmul float %155, %temp62.1 %1446 = fadd float %1444, %1445 %1447 = fadd float %1446, %166 %1448 = fmul float %temp60.1, %185 %1449 = fmul float %temp61.1, %186 %1450 = fadd float %1448, %1449 %1451 = fmul float %temp62.1, %187 %1452 = fadd float %1450, %1451 %1453 = fadd float %1452, %196 %1454 = fmul float %temp60.1, %197 %1455 = fmul float %temp61.1, %198 %1456 = fadd float %1454, %1455 %1457 = fmul float %temp62.1, %199 %1458 = fadd float %1456, %1457 %1459 = fadd float %1458, %208 %1460 = fmul float %temp60.1, %209 %1461 = fmul float %temp61.1, %210 %1462 = fadd float %1460, %1461 %1463 = fmul float %temp62.1, %211 %1464 = fadd float %1462, %1463 %1465 = fadd float %1464, %217 %1466 = fdiv float 1.000000e+00, %1465 %1467 = fmul float %1453, %1466 %1468 = fmul float %1459, %1466 %1469 = bitcast float %1467 to i32 %1470 = bitcast float %1468 to i32 %1471 = insertelement <2 x i32> undef, i32 %1469, i32 0 %1472 = insertelement <2 x i32> %1471, i32 %1470, i32 1 %1473 = bitcast <8 x i32> %57 to <32 x i8> %1474 = bitcast <4 x i32> %59 to <16 x i8> %1475 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1472, <32 x i8> %1473, <16 x i8> %1474, i32 2) %1476 = extractelement <4 x float> %1475, i32 0 %1477 = extractelement <4 x float> %1475, i32 1 %1478 = fmul float %1476, 0x3FEFE02000000000 %1479 = fmul float %1477, 0x3F6FE01F80000000 %1480 = fadd float %1478, %1479 %1481 = fsub float %1228, %1261 %1482 = fsub float %1301, %1334 %1483 = fsub float %1374, %1407 %1484 = fsub float %1447, %1480 %1485 = fadd float %1481, 0xBF59000120000000 %1486 = fadd float %1482, 0xBF59000120000000 %1487 = fadd float %1483, 0xBF59000120000000 %1488 = fadd float %1484, 0xBF59000120000000 %1489 = fmul float %34, %1485 %1490 = fsub float 1.000000e+00, %1489 %1491 = call float @llvm.AMDIL.clamp.(float %1490, float 0.000000e+00, float 1.000000e+00) %1492 = fcmp olt float %1485, 0.000000e+00 %1493 = fmul float %34, %1486 %1494 = fsub float 1.000000e+00, %1493 %1495 = call float @llvm.AMDIL.clamp.(float %1494, float 0.000000e+00, float 1.000000e+00) %1496 = fcmp olt float %1486, 0.000000e+00 %1497 = fmul float %34, %1487 %1498 = fsub float 1.000000e+00, %1497 %1499 = call float @llvm.AMDIL.clamp.(float %1498, float 0.000000e+00, float 1.000000e+00) %1500 = fcmp olt float %1487, 0.000000e+00 %1501 = fmul float %34, %1488 %1502 = fsub float 1.000000e+00, %1501 %1503 = call float @llvm.AMDIL.clamp.(float %1502, float 0.000000e+00, float 1.000000e+00) %1504 = fcmp olt float %1488, 0.000000e+00 %1505 = fdiv float 1.000000e+00, %55 %1506 = fmul float %106, %1505 %1507 = fmul float %1506, 6.553500e+04 %1508 = call float @floor(float %1507) %1509 = fmul float %1508, 3.906250e-03 %1510 = call float @floor(float %1509) %1511 = fmul float %97, 7.000000e+00 %1512 = fadd float %1511, 8.000000e+00 %1513 = fmul float %98, 7.000000e+00 %1514 = fadd float %1513, 8.000000e+00 %1515 = call float @floor(float %1512) %1516 = call float @floor(float %1514) %.op207 = fmul float %1491, 6.250000e-02 %1517 = select i1 %1492, float 0.000000e+00, float %.op207 %.op208 = fmul float %1495, 6.250000e-02 %1518 = select i1 %1496, float 0.000000e+00, float %.op208 %1519 = fadd float %1517, %1518 %.op209 = fmul float %1499, 6.250000e-02 %1520 = select i1 %1500, float 0.000000e+00, float %.op209 %1521 = fadd float %1519, %1520 %.op210 = fmul float %1503, 6.250000e-02 %1522 = select i1 %1504, float 0.000000e+00, float %.op210 %1523 = fadd float %1521, %1522 %1524 = fadd float %1188, %1523 %1525 = fmul float %1510, 0x3F70101020000000 %1526 = fmul float %1510, 2.560000e+02 %1527 = fsub float %1508, %1526 %1528 = fmul float %1527, 0x3F70101020000000 %1529 = fmul float %1515, 0x3FB0101020000000 %1530 = fmul float %1516, 0x3F70101060000000 %1531 = fadd float %1530, %1529 %1532 = call i32 @llvm.SI.packf16(float %1524, float %1525) %1533 = bitcast i32 %1532 to float %1534 = call i32 @llvm.SI.packf16(float %1528, float %1531) %1535 = bitcast i32 %1534 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1533, float %1535, float %1533, float %1535) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v5, v4, v2 ; 100A0504 v_mov_b32_e32 v9, 0x3f1d7d6b ; 7E1202FF 3F1D7D6B v_mov_b32_e32 v10, 0x3dcb2181 ; 7E1402FF 3DCB2181 v_mov_b32_e32 v11, 0x3eefe8ee ; 7E1602FF 3EEFE8EE v_mov_b32_e32 v19, 0xbe7c957d ; 7E2602FF BE7C957D v_mov_b32_e32 v20, 0x3efdb8dc ; 7E2802FF 3EFDB8DC v_mov_b32_e32 v21, 0x3f22fc48 ; 7E2A02FF 3F22FC48 v_mov_b32_e32 v22, 0xbe9afb5d ; 7E2C02FF BE9AFB5D v_mov_b32_e32 v23, 0x3e4a17b1 ; 7E2E02FF 3E4A17B1 v_mov_b32_e32 v24, 0x3f6dd26b ; 7E3002FF 3F6DD26B v_mov_b32_e32 v25, 0x3ea8d66f ; 7E3202FF 3EA8D66F v_mov_b32_e32 v26, 0xbe79bc34 ; 7E3402FF BE79BC34 v_mov_b32_e32 v27, 0x3ee8383b ; 7E3602FF 3EE8383B s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_mov_b32_e32 v14, 0 ; 7E1C0280 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[8:11], 0x10 ; C20C0910 s_buffer_load_dword s25, s[8:11], 0x11 ; C20C8911 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[28:35], s[20:23] ; F0900700 00A71C0C image_sample_l v0, 1, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[36:43], s[16:19] ; F0900100 0089000C v_mov_b32_e32 v1, s25 ; 7E020219 v_mac_f32_e32 v1, s24, v3 ; 3E020618 v_mul_f32_e32 v6, v4, v1 ; 100C0304 image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[44:51], s[8:11] ; F0800F00 004B0105 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mul_f32_e32 v5, v2, v2 ; 100A0502 v_mac_f32_e32 v5, v1, v1 ; 3E0A0301 v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 v_rsq_clamp_f32_e32 v6, v5 ; 7E0C5905 v_add_f32_e32 v7, -0.5, v28 ; 060E38F1 v_add_f32_e32 v12, -0.5, v29 ; 06183AF1 v_add_f32_e32 v13, -0.5, v30 ; 061A3CF1 v_mul_f32_e32 v5, v6, v1 ; 100A0306 v_mul_f32_e32 v4, v6, v2 ; 10080506 v_mul_f32_e32 v3, v6, v3 ; 10060706 s_buffer_load_dword s16, s[12:15], 0x8 ; C2080D08 s_buffer_load_dword s17, s[12:15], 0xa ; C2088D0A s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s18, s[0:3], 0x3 ; C2090103 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s19, s[12:15], 0x9 ; C2098D09 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s21, s[0:3], 0x7 ; C20A8107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s22, s[0:3], 0xb ; C20B010B s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B s_buffer_load_dword s20, s[0:3], 0x5 ; C20A0105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s16 ; 7E020210 v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_mov_b32_e32 v1, s17 ; 7E020211 v_mac_f32_e32 v14, s18, v1 ; 3E1C0212 v_mov_b32_e32 v1, s16 ; 7E020210 v_mul_f32_e32 v18, s9, v1 ; 10240209 s_buffer_load_dword s24, s[0:3], 0x9 ; C20C0109 s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C v_mov_b32_e32 v1, s17 ; 7E020211 v_mac_f32_e32 v18, s21, v1 ; 3E240215 v_mov_b32_e32 v1, s16 ; 7E020210 v_mul_f32_e32 v28, s11, v1 ; 1038020B v_mov_b32_e32 v1, s17 ; 7E020211 v_mac_f32_e32 v28, s22, v1 ; 3E380216 v_mov_b32_e32 v1, s19 ; 7E020213 v_mul_f32_e32 v29, s10, v1 ; 103A020A s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C v_mov_b32_e32 v1, s23 ; 7E020217 v_mac_f32_e32 v29, s18, v1 ; 3E3A0212 v_mov_b32_e32 v1, s19 ; 7E020213 v_mul_f32_e32 v30, s20, v1 ; 103C0214 v_mov_b32_e32 v1, s23 ; 7E020217 v_mac_f32_e32 v30, s21, v1 ; 3E3C0215 s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C s_buffer_load_dword s9, s[0:3], 0x4d ; C204814D v_mov_b32_e32 v1, s19 ; 7E020213 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v31, s24, v1 ; 103E0218 s_buffer_load_dword s20, s[0:3], 0x4e ; C20A014E s_buffer_load_dword s24, s[12:15], 0x4 ; C20C0D04 v_mov_b32_e32 v1, s23 ; 7E020217 v_mac_f32_e32 v31, s22, v1 ; 3E3E0216 s_buffer_load_dword s10, s[12:15], 0x5 ; C2050D05 v_add_f32_e32 v2, v7, v7 ; 06040F07 v_add_f32_e32 v1, v12, v12 ; 0602190C v_add_f32_e32 v7, v13, v13 ; 060E1B0D v_mad_f32 v32, v8, v0, s11 ; D2820020 002E0108 v_mad_f32 v33, v15, v0, s9 ; D2820021 0026010F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v34, v16, v0, s20 ; D2820022 00520110 v_mac_f32_e32 v32, s24, v2 ; 3E400418 v_mac_f32_e32 v33, s24, v1 ; 3E420218 s_buffer_load_dword s13, s[0:3], 0xd ; C206810D v_mac_f32_e32 v34, s24, v7 ; 3E440E18 v_mul_f32_e32 v6, v32, v14 ; 100C1D20 v_mac_f32_e32 v6, v33, v18 ; 3E0C2521 v_mac_f32_e32 v6, v34, v28 ; 3E0C3922 v_mov_b32_e32 v8, s16 ; 7E100210 v_mac_f32_e32 v6, s25, v8 ; 3E0C1019 v_mov_b32_e32 v8, s17 ; 7E100211 v_mac_f32_e32 v6, s12, v8 ; 3E0C100C v_mul_f32_e32 v8, v32, v29 ; 10103B20 v_mac_f32_e32 v8, v33, v30 ; 3E103D21 v_mac_f32_e32 v8, v34, v31 ; 3E103F22 v_mov_b32_e32 v12, s19 ; 7E180213 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v8, s13, v12 ; 3E10180D v_mov_b32_e32 v12, s23 ; 7E180217 v_mac_f32_e32 v8, s12, v12 ; 3E10180C v_mul_f32_e32 v12, v9, v5 ; 10180B09 v_mac_f32_e32 v12, v10, v4 ; 3E18090A v_mac_f32_e32 v12, v11, v3 ; 3E18070B v_mul_f32_e32 v13, v5, v12 ; 101A1905 v_mac_f32_e32 v9, -2.0, v13 ; 3E121AF5 v_mul_f32_e32 v13, v4, v12 ; 101A1904 v_mac_f32_e32 v10, -2.0, v13 ; 3E141AF5 v_mul_f32_e32 v12, v3, v12 ; 10181903 v_mac_f32_e32 v11, -2.0, v12 ; 3E1618F5 v_mul_f32_e32 v12, v9, v2 ; 10180509 v_mac_f32_e32 v12, v10, v1 ; 3E18030A v_mac_f32_e32 v12, v11, v7 ; 3E180F0B v_mul_f32_e32 v13, v2, v12 ; 101A1902 v_mad_f32 v13, -2.0, v13, v9 ; D282000D 04261AF5 v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e32 v35, v9, v13 ; 00461B09 v_mul_f32_e32 v9, v1, v12 ; 10121901 v_mad_f32 v9, -2.0, v9, v10 ; D2820009 042A12F5 v_cndmask_b32_e32 v36, v10, v9 ; 0048130A v_mul_f32_e32 v9, v7, v12 ; 10121907 v_mad_f32 v9, -2.0, v9, v11 ; D2820009 042E12F5 v_cndmask_b32_e32 v37, v11, v9 ; 004A130B v_mul_f32_e32 v9, s18, v32 ; 10124012 v_mac_f32_e32 v9, s21, v33 ; 3E124215 v_mac_f32_e32 v9, s22, v34 ; 3E124416 v_add_f32_e32 v9, s12, v9 ; 0612120C v_mul_f32_e32 v17, s10, v14 ; 10221C0A v_mul_f32_e32 v18, s10, v18 ; 1024240A v_mul_f32_e32 v11, s10, v28 ; 1016380A v_mul_f32_e32 v15, s10, v29 ; 101E3A0A v_mul_f32_e32 v16, s10, v30 ; 10203C0A v_mul_f32_e32 v10, s10, v31 ; 10143E0A v_mov_b32_e32 v12, s18 ; 7E180212 v_mul_f32_e32 v13, s10, v12 ; 101A180A v_mov_b32_e32 v12, s21 ; 7E180215 v_mul_f32_e32 v14, s10, v12 ; 101C180A v_mov_b32_e32 v12, s22 ; 7E180216 v_mul_f32_e32 v12, s10, v12 ; 1018180A v_mul_f32_e32 v28, v18, v36 ; 10384912 v_mac_f32_e32 v28, v17, v35 ; 3E384711 v_mac_f32_e32 v28, v11, v37 ; 3E384B0B v_add_f32_e32 v28, v6, v28 ; 06383906 v_mul_f32_e32 v29, v14, v36 ; 103A490E v_mac_f32_e32 v29, v13, v35 ; 3E3A470D v_mac_f32_e32 v29, v12, v37 ; 3E3A4B0C v_add_f32_e32 v29, v9, v29 ; 063A3B09 v_rcp_f32_e32 v29, v29 ; 7E3A551D v_mul_f32_e32 v30, v16, v36 ; 103C4910 v_mac_f32_e32 v30, v15, v35 ; 3E3C470F v_mac_f32_e32 v30, v10, v37 ; 3E3C4B0A v_add_f32_e32 v30, v8, v30 ; 063C3D08 v_mul_f32_e32 v38, v29, v28 ; 104C391D v_mul_f32_e32 v39, v29, v30 ; 104E3D1D v_mul_f32_e32 v28, v19, v5 ; 10380B13 v_mac_f32_e32 v28, v20, v4 ; 3E380914 v_mac_f32_e32 v28, v21, v3 ; 3E380715 v_mul_f32_e32 v29, v5, v28 ; 103A3905 v_mac_f32_e32 v19, -2.0, v29 ; 3E263AF5 v_mul_f32_e32 v29, v4, v28 ; 103A3904 v_mac_f32_e32 v20, -2.0, v29 ; 3E283AF5 v_mul_f32_e32 v28, v3, v28 ; 10383903 v_mac_f32_e32 v21, -2.0, v28 ; 3E2A38F5 v_mul_f32_e32 v28, v19, v2 ; 10380513 v_mac_f32_e32 v28, v20, v1 ; 3E380314 v_mac_f32_e32 v28, v21, v7 ; 3E380F15 v_mul_f32_e32 v29, v2, v28 ; 103A3902 v_mad_f32 v29, -2.0, v29, v19 ; D282001D 044E3AF5 v_cmp_gt_f32_e32 vcc, 0, v28 ; 7C083880 v_cndmask_b32_e32 v30, v19, v29 ; 003C3B13 v_mul_f32_e32 v19, v1, v28 ; 10263901 v_mad_f32 v19, -2.0, v19, v20 ; D2820013 045226F5 v_cndmask_b32_e32 v31, v20, v19 ; 003E2714 v_mul_f32_e32 v19, v7, v28 ; 10263907 v_mad_f32 v19, -2.0, v19, v21 ; D2820013 045626F5 v_cndmask_b32_e32 v40, v21, v19 ; 00502715 v_mul_f32_e32 v19, v18, v31 ; 10263F12 v_mac_f32_e32 v19, v17, v30 ; 3E263D11 v_mac_f32_e32 v19, v11, v40 ; 3E26510B v_add_f32_e32 v19, v6, v19 ; 06262706 v_mul_f32_e32 v20, v14, v31 ; 10283F0E v_mac_f32_e32 v20, v13, v30 ; 3E283D0D v_mac_f32_e32 v20, v12, v40 ; 3E28510C v_add_f32_e32 v20, v9, v20 ; 06282909 v_rcp_f32_e32 v20, v20 ; 7E285514 v_mul_f32_e32 v21, v16, v31 ; 102A3F10 v_mac_f32_e32 v21, v15, v30 ; 3E2A3D0F v_mac_f32_e32 v21, v10, v40 ; 3E2A510A v_add_f32_e32 v21, v8, v21 ; 062A2B08 v_mul_f32_e32 v41, v20, v19 ; 10522714 v_mul_f32_e32 v42, v20, v21 ; 10542B14 v_mul_f32_e32 v19, v22, v5 ; 10260B16 v_mac_f32_e32 v19, v23, v4 ; 3E260917 v_mac_f32_e32 v19, v24, v3 ; 3E260718 v_mul_f32_e32 v20, v5, v19 ; 10282705 v_mac_f32_e32 v22, -2.0, v20 ; 3E2C28F5 v_mul_f32_e32 v20, v4, v19 ; 10282704 v_mac_f32_e32 v23, -2.0, v20 ; 3E2E28F5 v_mul_f32_e32 v19, v3, v19 ; 10262703 v_mac_f32_e32 v24, -2.0, v19 ; 3E3026F5 v_mul_f32_e32 v19, v22, v2 ; 10260516 v_mac_f32_e32 v19, v23, v1 ; 3E260317 v_mac_f32_e32 v19, v24, v7 ; 3E260F18 v_mul_f32_e32 v20, v2, v19 ; 10282702 v_mad_f32 v20, -2.0, v20, v22 ; D2820014 045A28F5 v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 v_cndmask_b32_e32 v43, v22, v20 ; 00562916 v_mul_f32_e32 v20, v1, v19 ; 10282701 v_mad_f32 v20, -2.0, v20, v23 ; D2820014 045E28F5 v_cndmask_b32_e32 v44, v23, v20 ; 00582917 v_mul_f32_e32 v19, v7, v19 ; 10262707 v_mad_f32 v19, -2.0, v19, v24 ; D2820013 046226F5 v_cndmask_b32_e32 v45, v24, v19 ; 005A2718 v_mul_f32_e32 v19, v18, v44 ; 10265912 v_mac_f32_e32 v19, v17, v43 ; 3E265711 v_mac_f32_e32 v19, v11, v45 ; 3E265B0B v_add_f32_e32 v19, v6, v19 ; 06262706 v_mul_f32_e32 v20, v14, v44 ; 1028590E v_mac_f32_e32 v20, v13, v43 ; 3E28570D v_mac_f32_e32 v20, v12, v45 ; 3E285B0C v_add_f32_e32 v20, v9, v20 ; 06282909 v_rcp_f32_e32 v20, v20 ; 7E285514 v_mul_f32_e32 v21, v16, v44 ; 102A5910 v_mac_f32_e32 v21, v15, v43 ; 3E2A570F v_mac_f32_e32 v21, v10, v45 ; 3E2A5B0A v_add_f32_e32 v21, v8, v21 ; 062A2B08 v_mul_f32_e32 v46, v20, v19 ; 105C2714 v_mul_f32_e32 v47, v20, v21 ; 105E2B14 v_mul_f32_e32 v19, v25, v5 ; 10260B19 v_mac_f32_e32 v19, v26, v4 ; 3E26091A v_mac_f32_e32 v19, v27, v3 ; 3E26071B v_mul_f32_e32 v20, v5, v19 ; 10282705 v_mac_f32_e32 v25, -2.0, v20 ; 3E3228F5 v_mul_f32_e32 v20, v4, v19 ; 10282704 v_mac_f32_e32 v26, -2.0, v20 ; 3E3428F5 v_mul_f32_e32 v19, v3, v19 ; 10262703 v_mac_f32_e32 v27, -2.0, v19 ; 3E3626F5 v_mul_f32_e32 v19, v25, v2 ; 10260519 v_mac_f32_e32 v19, v26, v1 ; 3E26031A v_mac_f32_e32 v19, v27, v7 ; 3E260F1B v_mul_f32_e32 v20, v2, v19 ; 10282702 v_mad_f32 v20, -2.0, v20, v25 ; D2820014 046628F5 v_cmp_gt_f32_e32 vcc, 0, v19 ; 7C082680 v_cndmask_b32_e32 v48, v25, v20 ; 00602919 v_mul_f32_e32 v20, v1, v19 ; 10282701 v_mad_f32 v20, -2.0, v20, v26 ; D2820014 046A28F5 v_cndmask_b32_e32 v26, v26, v20 ; 0034291A v_mul_f32_e32 v19, v7, v19 ; 10262707 v_mad_f32 v19, -2.0, v19, v27 ; D2820013 046E26F5 v_cndmask_b32_e32 v49, v27, v19 ; 0062271B s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s13, s[0:3], 0x51 ; C2068151 s_buffer_load_dword s14, s[0:3], 0x52 ; C2070152 s_buffer_load_dword s0, s[0:3], 0x5c ; C200015C v_mul_f32_e32 v19, v18, v26 ; 10263512 v_mac_f32_e32 v19, v17, v48 ; 3E266111 v_mac_f32_e32 v19, v11, v49 ; 3E26630B v_add_f32_e32 v20, v6, v19 ; 06282706 v_mul_f32_e32 v21, v16, v26 ; 102A3510 v_mac_f32_e32 v21, v15, v48 ; 3E2A610F v_mul_f32_e32 v19, v14, v26 ; 1026350E v_mac_f32_e32 v19, v13, v48 ; 3E26610D v_mac_f32_e32 v19, v12, v49 ; 3E26630C v_add_f32_e32 v19, v9, v19 ; 06262709 v_rcp_f32_e32 v22, v19 ; 7E2C5513 v_mac_f32_e32 v21, v10, v49 ; 3E2A630A s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v19, s0 ; 7E265400 v_add_f32_e32 v21, v8, v21 ; 062A2B08 v_mul_f32_e32 v50, v22, v20 ; 10642916 v_mul_f32_e32 v51, v22, v21 ; 10662B16 v_mul_f32_e32 v21, s12, v19 ; 102A260C v_mul_f32_e32 v20, v32, v21 ; 10282B20 v_mul_f32_e32 v22, s13, v19 ; 102C260D v_mac_f32_e32 v20, v33, v22 ; 3E282D21 v_mul_f32_e32 v23, s14, v19 ; 102E260E v_mac_f32_e32 v20, v34, v23 ; 3E282F22 v_mul_f32_e32 v24, s11, v21 ; 10302A0B v_mac_f32_e32 v24, s9, v22 ; 3E302C09 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_mac_f32_e32 v24, s20, v23 ; 3E302E14 v_subrev_f32_e32 v20, v24, v20 ; 0A282918 v_mul_f32_e32 v28, s10, v21 ; 10382A0A v_mul_f32_e32 v29, s10, v22 ; 103A2C0A v_mul_f32_e32 v21, v36, v29 ; 102A3B24 v_mac_f32_e32 v21, v35, v28 ; 3E2A3923 v_mul_f32_e32 v27, s10, v23 ; 10362E0A v_mac_f32_e32 v21, v37, v27 ; 3E2A3725 v_add_f32_e32 v21, v20, v21 ; 062A2B14 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[12:19], s[0:3] ; F0800300 00032026 v_mov_b32_e32 v23, 0x3b7f00fc ; 7E2E02FF 3B7F00FC s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, -v33, v23, v21 ; D2820015 24562F21 v_mov_b32_e32 v24, 0x3f7f0100 ; 7E3002FF 3F7F0100 v_mad_f32 v21, -v32, v24, v21 ; D2820015 24563120 v_mul_f32_e32 v22, v31, v29 ; 102C3B1F v_mac_f32_e32 v22, v30, v28 ; 3E2C391E v_mac_f32_e32 v22, v40, v27 ; 3E2C3728 v_add_f32_e32 v22, v20, v22 ; 062C2D14 image_sample v[30:31], 3, 0, 0, 0, 0, 0, 0, 0, v[41:42], s[12:19], s[0:3] ; F0800300 00031E29 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, -v31, v23, v22 ; D2820016 245A2F1F v_mad_f32 v22, -v30, v24, v22 ; D2820016 245A311E v_mul_f32_e32 v25, v44, v29 ; 10323B2C v_mac_f32_e32 v25, v43, v28 ; 3E32392B v_mac_f32_e32 v25, v45, v27 ; 3E32372D v_add_f32_e32 v25, v20, v25 ; 06323314 image_sample v[30:31], 3, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[12:19], s[0:3] ; F0800300 00031E2E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v25, -v31, v23, v25 ; D2820019 24662F1F v_mad_f32 v25, -v30, v24, v25 ; D2820019 2466311E v_mul_f32_e32 v26, v26, v29 ; 10343B1A v_mac_f32_e32 v26, v48, v28 ; 3E343930 v_mac_f32_e32 v26, v49, v27 ; 3E343731 v_add_f32_e32 v26, v20, v26 ; 06343514 image_sample v[30:31], 3, 0, 0, 0, 0, 0, 0, 0, v[50:51], s[12:19], s[0:3] ; F0800300 00031E32 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, -v31, v23, v26 ; D282001A 246A2F1F v_mad_f32 v26, -v30, v24, v26 ; D282001A 246A311E v_mov_b32_e32 v30, 0x3e6a54c5 ; 7E3C02FF 3E6A54C5 v_mul_f32_e32 v31, v30, v5 ; 103E0B1E v_mov_b32_e32 v32, 0x3e945027 ; 7E4002FF 3E945027 v_mac_f32_e32 v31, v32, v4 ; 3E3E0920 v_mov_b32_e32 v33, 0x3e085879 ; 7E4202FF 3E085879 v_mac_f32_e32 v31, v33, v3 ; 3E3E0721 v_mul_f32_e32 v34, v5, v31 ; 10443F05 v_mac_f32_e32 v30, -2.0, v34 ; 3E3C44F5 v_mul_f32_e32 v34, v4, v31 ; 10443F04 v_mac_f32_e32 v32, -2.0, v34 ; 3E4044F5 v_mul_f32_e32 v31, v3, v31 ; 103E3F03 v_mac_f32_e32 v33, -2.0, v31 ; 3E423EF5 v_mul_f32_e32 v31, v30, v2 ; 103E051E v_mac_f32_e32 v31, v32, v1 ; 3E3E0320 v_mac_f32_e32 v31, v33, v7 ; 3E3E0F21 v_mul_f32_e32 v34, v2, v31 ; 10443F02 v_mad_f32 v34, -2.0, v34, v30 ; D2820022 047A44F5 v_cmp_gt_f32_e32 vcc, 0, v31 ; 7C083E80 v_cndmask_b32_e32 v30, v30, v34 ; 003C451E v_mul_f32_e32 v34, v1, v31 ; 10443F01 v_mad_f32 v34, -2.0, v34, v32 ; D2820022 048244F5 v_cndmask_b32_e32 v32, v32, v34 ; 00404520 v_mul_f32_e32 v31, v7, v31 ; 103E3F07 v_mad_f32 v31, -2.0, v31, v33 ; D282001F 04863EF5 v_cndmask_b32_e32 v31, v33, v31 ; 003E3F21 v_mul_f32_e32 v33, v18, v32 ; 10424112 v_mac_f32_e32 v33, v17, v30 ; 3E423D11 v_mac_f32_e32 v33, v11, v31 ; 3E423F0B v_add_f32_e32 v33, v6, v33 ; 06424306 v_mul_f32_e32 v34, v14, v32 ; 1044410E v_mac_f32_e32 v34, v13, v30 ; 3E443D0D v_mac_f32_e32 v34, v12, v31 ; 3E443F0C v_add_f32_e32 v34, v9, v34 ; 06444509 v_rcp_f32_e32 v34, v34 ; 7E445522 v_mul_f32_e32 v35, v16, v32 ; 10464110 v_mac_f32_e32 v35, v15, v30 ; 3E463D0F v_mac_f32_e32 v35, v10, v31 ; 3E463F0A v_add_f32_e32 v35, v8, v35 ; 06464708 v_mul_f32_e32 v36, v34, v33 ; 10484322 v_mul_f32_e32 v37, v34, v35 ; 104A4722 v_mov_b32_e32 v33, 0xbe62a6b1 ; 7E4202FF BE62A6B1 v_mul_f32_e32 v34, v33, v5 ; 10440B21 v_mov_b32_e32 v35, 0xbe17f887 ; 7E4602FF BE17F887 v_mac_f32_e32 v34, v35, v4 ; 3E440923 v_mov_b32_e32 v38, 0x3e06eb0b ; 7E4C02FF 3E06EB0B v_mac_f32_e32 v34, v38, v3 ; 3E440726 v_mul_f32_e32 v39, v5, v34 ; 104E4505 v_mac_f32_e32 v33, -2.0, v39 ; 3E424EF5 v_mul_f32_e32 v39, v4, v34 ; 104E4504 v_mac_f32_e32 v35, -2.0, v39 ; 3E464EF5 v_mul_f32_e32 v34, v3, v34 ; 10444503 v_mac_f32_e32 v38, -2.0, v34 ; 3E4C44F5 v_mul_f32_e32 v34, v33, v2 ; 10440521 v_mac_f32_e32 v34, v35, v1 ; 3E440323 v_mac_f32_e32 v34, v38, v7 ; 3E440F26 v_mul_f32_e32 v39, v2, v34 ; 104E4502 v_mad_f32 v39, -2.0, v39, v33 ; D2820027 04864EF5 v_cmp_gt_f32_e32 vcc, 0, v34 ; 7C084480 v_cndmask_b32_e32 v33, v33, v39 ; 00424F21 v_mul_f32_e32 v39, v1, v34 ; 104E4501 v_mad_f32 v39, -2.0, v39, v35 ; D2820027 048E4EF5 v_cndmask_b32_e32 v35, v35, v39 ; 00464F23 v_mul_f32_e32 v34, v7, v34 ; 10444507 v_mad_f32 v34, -2.0, v34, v38 ; D2820022 049A44F5 v_cndmask_b32_e32 v34, v38, v34 ; 00444526 v_mul_f32_e32 v38, v18, v35 ; 104C4712 v_mac_f32_e32 v38, v17, v33 ; 3E4C4311 v_mac_f32_e32 v38, v11, v34 ; 3E4C450B v_add_f32_e32 v38, v6, v38 ; 064C4D06 v_mul_f32_e32 v39, v14, v35 ; 104E470E v_mac_f32_e32 v39, v13, v33 ; 3E4E430D v_mac_f32_e32 v39, v12, v34 ; 3E4E450C v_add_f32_e32 v39, v9, v39 ; 064E4F09 v_rcp_f32_e32 v39, v39 ; 7E4E5527 v_mul_f32_e32 v40, v16, v35 ; 10504710 v_mac_f32_e32 v40, v15, v33 ; 3E50430F v_mac_f32_e32 v40, v10, v34 ; 3E50450A v_add_f32_e32 v40, v8, v40 ; 06505108 v_mul_f32_e32 v41, v39, v38 ; 10524D27 v_mul_f32_e32 v42, v39, v40 ; 10545127 v_mov_b32_e32 v38, 0xbeea467c ; 7E4C02FF BEEA467C v_mul_f32_e32 v39, v38, v5 ; 104E0B26 v_mov_b32_e32 v40, 0xbf2a8be3 ; 7E5002FF BF2A8BE3 v_mac_f32_e32 v39, v40, v4 ; 3E4E0928 v_mov_b32_e32 v43, 0x3e6af89c ; 7E5602FF 3E6AF89C v_mac_f32_e32 v39, v43, v3 ; 3E4E072B v_mul_f32_e32 v44, v5, v39 ; 10584F05 v_mac_f32_e32 v38, -2.0, v44 ; 3E4C58F5 v_mul_f32_e32 v44, v4, v39 ; 10584F04 v_mac_f32_e32 v40, -2.0, v44 ; 3E5058F5 v_mul_f32_e32 v39, v3, v39 ; 104E4F03 v_mac_f32_e32 v43, -2.0, v39 ; 3E564EF5 v_mul_f32_e32 v39, v38, v2 ; 104E0526 v_mac_f32_e32 v39, v40, v1 ; 3E4E0328 v_mac_f32_e32 v39, v43, v7 ; 3E4E0F2B v_mul_f32_e32 v44, v2, v39 ; 10584F02 v_mad_f32 v44, -2.0, v44, v38 ; D282002C 049A58F5 v_cmp_gt_f32_e32 vcc, 0, v39 ; 7C084E80 v_cndmask_b32_e32 v38, v38, v44 ; 004C5926 v_mul_f32_e32 v44, v1, v39 ; 10584F01 v_mad_f32 v44, -2.0, v44, v40 ; D282002C 04A258F5 v_cndmask_b32_e32 v40, v40, v44 ; 00505928 v_mul_f32_e32 v39, v7, v39 ; 104E4F07 v_mad_f32 v39, -2.0, v39, v43 ; D2820027 04AE4EF5 v_cndmask_b32_e32 v39, v43, v39 ; 004E4F2B v_mul_f32_e32 v43, v18, v40 ; 10565112 v_mac_f32_e32 v43, v17, v38 ; 3E564D11 v_mac_f32_e32 v43, v11, v39 ; 3E564F0B v_add_f32_e32 v43, v6, v43 ; 06565706 v_mul_f32_e32 v44, v14, v40 ; 1058510E v_mac_f32_e32 v44, v13, v38 ; 3E584D0D v_mac_f32_e32 v44, v12, v39 ; 3E584F0C v_add_f32_e32 v44, v9, v44 ; 06585909 v_rcp_f32_e32 v44, v44 ; 7E58552C v_mul_f32_e32 v45, v16, v40 ; 105A5110 v_mac_f32_e32 v45, v15, v38 ; 3E5A4D0F v_mac_f32_e32 v45, v10, v39 ; 3E5A4F0A v_add_f32_e32 v45, v8, v45 ; 065A5B08 v_mul_f32_e32 v46, v44, v43 ; 105C572C v_mul_f32_e32 v47, v44, v45 ; 105E5B2C v_mov_b32_e32 v43, 0xbd974f72 ; 7E5602FF BD974F72 v_mul_f32_e32 v44, v43, v5 ; 10580B2B v_mov_b32_e32 v45, 0x3d61565c ; 7E5A02FF 3D61565C v_mac_f32_e32 v44, v45, v4 ; 3E58092D v_mov_b32_e32 v48, 0xbc1ef0f1 ; 7E6002FF BC1EF0F1 v_mac_f32_e32 v44, v48, v3 ; 3E580730 v_mul_f32_e32 v49, v5, v44 ; 10625905 v_mac_f32_e32 v43, -2.0, v49 ; 3E5662F5 v_mul_f32_e32 v49, v4, v44 ; 10625904 v_mac_f32_e32 v45, -2.0, v49 ; 3E5A62F5 v_mul_f32_e32 v44, v3, v44 ; 10585903 v_mac_f32_e32 v48, -2.0, v44 ; 3E6058F5 v_mul_f32_e32 v44, v43, v2 ; 1058052B v_mac_f32_e32 v44, v45, v1 ; 3E58032D v_mac_f32_e32 v44, v48, v7 ; 3E580F30 v_mul_f32_e32 v49, v2, v44 ; 10625902 v_mad_f32 v49, -2.0, v49, v43 ; D2820031 04AE62F5 v_cmp_gt_f32_e32 vcc, 0, v44 ; 7C085880 v_cndmask_b32_e32 v43, v43, v49 ; 0056632B v_mul_f32_e32 v49, v1, v44 ; 10625901 v_mad_f32 v49, -2.0, v49, v45 ; D2820031 04B662F5 v_cndmask_b32_e32 v45, v45, v49 ; 005A632D v_mul_f32_e32 v44, v7, v44 ; 10585907 v_mad_f32 v44, -2.0, v44, v48 ; D282002C 04C258F5 v_cndmask_b32_e32 v44, v48, v44 ; 00585930 v_mul_f32_e32 v48, v18, v45 ; 10605B12 v_mac_f32_e32 v48, v17, v43 ; 3E605711 v_mac_f32_e32 v48, v11, v44 ; 3E60590B v_add_f32_e32 v48, v6, v48 ; 06606106 v_mul_f32_e32 v49, v14, v45 ; 10625B0E v_mac_f32_e32 v49, v13, v43 ; 3E62570D v_mac_f32_e32 v49, v12, v44 ; 3E62590C v_add_f32_e32 v49, v9, v49 ; 06626309 v_rcp_f32_e32 v49, v49 ; 7E625531 v_mul_f32_e32 v50, v16, v45 ; 10645B10 v_mac_f32_e32 v50, v15, v43 ; 3E64570F v_mac_f32_e32 v50, v10, v44 ; 3E64590A v_add_f32_e32 v50, v8, v50 ; 06646508 v_mul_f32_e32 v51, v49, v48 ; 10666131 v_mul_f32_e32 v52, v49, v50 ; 10686531 v_mul_f32_e32 v32, v32, v29 ; 10403B20 v_mac_f32_e32 v32, v30, v28 ; 3E40391E v_mac_f32_e32 v32, v31, v27 ; 3E40371F v_add_f32_e32 v30, v20, v32 ; 063C4114 image_sample v[31:32], 3, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[12:19], s[0:3] ; F0800300 00031F24 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, -v32, v23, v30 ; D282001E 247A2F20 v_mad_f32 v30, -v31, v24, v30 ; D282001E 247A311F v_mul_f32_e32 v31, v35, v29 ; 103E3B23 v_mac_f32_e32 v31, v33, v28 ; 3E3E3921 v_mac_f32_e32 v31, v34, v27 ; 3E3E3722 v_add_f32_e32 v31, v20, v31 ; 063E3F14 image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[41:42], s[12:19], s[0:3] ; F0800300 00032029 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v31, -v33, v23, v31 ; D282001F 247E2F21 v_mad_f32 v31, -v32, v24, v31 ; D282001F 247E3120 v_mul_f32_e32 v32, v40, v29 ; 10403B28 v_mac_f32_e32 v32, v38, v28 ; 3E403926 v_mac_f32_e32 v32, v39, v27 ; 3E403727 v_add_f32_e32 v32, v20, v32 ; 06404114 image_sample v[33:34], 3, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[12:19], s[0:3] ; F0800300 0003212E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v32, -v34, v23, v32 ; D2820020 24822F22 v_mad_f32 v32, -v33, v24, v32 ; D2820020 24823121 v_mul_f32_e32 v33, v45, v29 ; 10423B2D v_mac_f32_e32 v33, v43, v28 ; 3E42392B v_mac_f32_e32 v33, v44, v27 ; 3E42372C v_add_f32_e32 v33, v20, v33 ; 06424314 image_sample v[34:35], 3, 0, 0, 0, 0, 0, 0, 0, v[51:52], s[12:19], s[0:3] ; F0800300 00032233 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, -v35, v23, v33 ; D2820021 24862F23 v_mad_f32 v33, -v34, v24, v33 ; D2820021 24863122 v_mov_b32_e32 v34, 0xbd1cc533 ; 7E4402FF BD1CC533 v_mul_f32_e32 v35, v34, v5 ; 10460B22 v_mov_b32_e32 v36, 0xbd23c85c ; 7E4802FF BD23C85C v_mac_f32_e32 v35, v36, v4 ; 3E460924 v_mov_b32_e32 v37, 0x3c20dbad ; 7E4A02FF 3C20DBAD v_mac_f32_e32 v35, v37, v3 ; 3E460725 v_mul_f32_e32 v38, v5, v35 ; 104C4705 v_mac_f32_e32 v34, -2.0, v38 ; 3E444CF5 v_mul_f32_e32 v38, v4, v35 ; 104C4704 v_mac_f32_e32 v36, -2.0, v38 ; 3E484CF5 v_mul_f32_e32 v35, v3, v35 ; 10464703 v_mac_f32_e32 v37, -2.0, v35 ; 3E4A46F5 v_mul_f32_e32 v35, v34, v2 ; 10460522 v_mac_f32_e32 v35, v36, v1 ; 3E460324 v_mac_f32_e32 v35, v37, v7 ; 3E460F25 v_mul_f32_e32 v38, v2, v35 ; 104C4702 v_mad_f32 v38, -2.0, v38, v34 ; D2820026 048A4CF5 v_cmp_gt_f32_e32 vcc, 0, v35 ; 7C084680 v_cndmask_b32_e32 v34, v34, v38 ; 00444D22 v_mul_f32_e32 v38, v1, v35 ; 104C4701 v_mad_f32 v38, -2.0, v38, v36 ; D2820026 04924CF5 v_cndmask_b32_e32 v36, v36, v38 ; 00484D24 v_mul_f32_e32 v35, v7, v35 ; 10464707 v_mad_f32 v35, -2.0, v35, v37 ; D2820023 049646F5 v_cndmask_b32_e32 v35, v37, v35 ; 00464725 v_mul_f32_e32 v37, v18, v36 ; 104A4912 v_mac_f32_e32 v37, v17, v34 ; 3E4A4511 v_mac_f32_e32 v37, v11, v35 ; 3E4A470B v_add_f32_e32 v37, v6, v37 ; 064A4B06 v_mul_f32_e32 v38, v14, v36 ; 104C490E v_mac_f32_e32 v38, v13, v34 ; 3E4C450D v_mac_f32_e32 v38, v12, v35 ; 3E4C470C v_add_f32_e32 v38, v9, v38 ; 064C4D09 v_rcp_f32_e32 v38, v38 ; 7E4C5526 v_mul_f32_e32 v39, v16, v36 ; 104E4910 v_mac_f32_e32 v39, v15, v34 ; 3E4E450F v_mac_f32_e32 v39, v10, v35 ; 3E4E470A v_add_f32_e32 v39, v8, v39 ; 064E4F08 v_mul_f32_e32 v40, v38, v37 ; 10504B26 v_mul_f32_e32 v41, v38, v39 ; 10524F26 v_mov_b32_e32 v37, 0x3f3ec311 ; 7E4A02FF 3F3EC311 v_mul_f32_e32 v38, v37, v5 ; 104C0B25 v_mov_b32_e32 v39, 0x3ea5be7f ; 7E4E02FF 3EA5BE7F v_mac_f32_e32 v38, v39, v4 ; 3E4C0927 v_mov_b32_e32 v42, 0xbedbb0c4 ; 7E5402FF BEDBB0C4 v_mac_f32_e32 v38, v42, v3 ; 3E4C072A v_mul_f32_e32 v43, v5, v38 ; 10564D05 v_mac_f32_e32 v37, -2.0, v43 ; 3E4A56F5 v_mul_f32_e32 v43, v4, v38 ; 10564D04 v_mac_f32_e32 v39, -2.0, v43 ; 3E4E56F5 v_mul_f32_e32 v38, v3, v38 ; 104C4D03 v_mac_f32_e32 v42, -2.0, v38 ; 3E544CF5 v_mul_f32_e32 v38, v37, v2 ; 104C0525 v_mac_f32_e32 v38, v39, v1 ; 3E4C0327 v_mac_f32_e32 v38, v42, v7 ; 3E4C0F2A v_mul_f32_e32 v43, v2, v38 ; 10564D02 v_mad_f32 v43, -2.0, v43, v37 ; D282002B 049656F5 v_cmp_gt_f32_e32 vcc, 0, v38 ; 7C084C80 v_cndmask_b32_e32 v37, v37, v43 ; 004A5725 v_mul_f32_e32 v43, v1, v38 ; 10564D01 v_mad_f32 v43, -2.0, v43, v39 ; D282002B 049E56F5 v_cndmask_b32_e32 v39, v39, v43 ; 004E5727 v_mul_f32_e32 v38, v7, v38 ; 104C4D07 v_mad_f32 v38, -2.0, v38, v42 ; D2820026 04AA4CF5 v_cndmask_b32_e32 v38, v42, v38 ; 004C4D2A v_mul_f32_e32 v42, v18, v39 ; 10544F12 v_mac_f32_e32 v42, v17, v37 ; 3E544B11 v_mac_f32_e32 v42, v11, v38 ; 3E544D0B v_add_f32_e32 v42, v6, v42 ; 06545506 v_mul_f32_e32 v43, v14, v39 ; 10564F0E v_mac_f32_e32 v43, v13, v37 ; 3E564B0D v_mac_f32_e32 v43, v12, v38 ; 3E564D0C v_add_f32_e32 v43, v9, v43 ; 06565709 v_rcp_f32_e32 v43, v43 ; 7E56552B v_mul_f32_e32 v44, v16, v39 ; 10584F10 v_mac_f32_e32 v44, v15, v37 ; 3E584B0F v_mac_f32_e32 v44, v10, v38 ; 3E584D0A v_add_f32_e32 v44, v8, v44 ; 06585908 v_mul_f32_e32 v45, v43, v42 ; 105A552B v_mul_f32_e32 v46, v43, v44 ; 105C592B v_mov_b32_e32 v42, 0xbc7c1590 ; 7E5402FF BC7C1590 v_mul_f32_e32 v43, v42, v5 ; 10560B2A v_mov_b32_e32 v44, 0x3e88ebee ; 7E5802FF 3E88EBEE v_mac_f32_e32 v43, v44, v4 ; 3E56092C v_mov_b32_e32 v47, 0xbd873de2 ; 7E5E02FF BD873DE2 v_mac_f32_e32 v43, v47, v3 ; 3E56072F v_mul_f32_e32 v48, v5, v43 ; 10605705 v_mac_f32_e32 v42, -2.0, v48 ; 3E5460F5 v_mul_f32_e32 v48, v4, v43 ; 10605704 v_mac_f32_e32 v44, -2.0, v48 ; 3E5860F5 v_mul_f32_e32 v43, v3, v43 ; 10565703 v_mac_f32_e32 v47, -2.0, v43 ; 3E5E56F5 v_mul_f32_e32 v43, v42, v2 ; 1056052A v_mac_f32_e32 v43, v44, v1 ; 3E56032C v_mac_f32_e32 v43, v47, v7 ; 3E560F2F v_mul_f32_e32 v48, v2, v43 ; 10605702 v_mad_f32 v48, -2.0, v48, v42 ; D2820030 04AA60F5 v_cmp_gt_f32_e32 vcc, 0, v43 ; 7C085680 v_cndmask_b32_e32 v42, v42, v48 ; 0054612A v_mul_f32_e32 v48, v1, v43 ; 10605701 v_mad_f32 v48, -2.0, v48, v44 ; D2820030 04B260F5 v_cndmask_b32_e32 v44, v44, v48 ; 0058612C v_mul_f32_e32 v43, v7, v43 ; 10565707 v_mad_f32 v43, -2.0, v43, v47 ; D282002B 04BE56F5 v_cndmask_b32_e32 v43, v47, v43 ; 0056572F v_mul_f32_e32 v47, v18, v44 ; 105E5912 v_mac_f32_e32 v47, v17, v42 ; 3E5E5511 v_mac_f32_e32 v47, v11, v43 ; 3E5E570B v_add_f32_e32 v47, v6, v47 ; 065E5F06 v_mul_f32_e32 v48, v14, v44 ; 1060590E v_mac_f32_e32 v48, v13, v42 ; 3E60550D v_mac_f32_e32 v48, v12, v43 ; 3E60570C v_add_f32_e32 v48, v9, v48 ; 06606109 v_rcp_f32_e32 v48, v48 ; 7E605530 v_mul_f32_e32 v49, v16, v44 ; 10625910 v_mac_f32_e32 v49, v15, v42 ; 3E62550F v_mac_f32_e32 v49, v10, v43 ; 3E62570A v_add_f32_e32 v49, v8, v49 ; 06626308 v_mul_f32_e32 v50, v48, v47 ; 10645F30 v_mul_f32_e32 v51, v48, v49 ; 10666330 v_mov_b32_e32 v47, 0xbdc4f7ec ; 7E5E02FF BDC4F7EC v_mul_f32_e32 v48, v47, v5 ; 10600B2F v_mov_b32_e32 v49, 0xbf05df33 ; 7E6202FF BF05DF33 v_mac_f32_e32 v48, v49, v4 ; 3E600931 v_mov_b32_e32 v52, 0xbe806ec1 ; 7E6802FF BE806EC1 v_mac_f32_e32 v48, v52, v3 ; 3E600734 v_mul_f32_e32 v53, v5, v48 ; 106A6105 v_mac_f32_e32 v47, -2.0, v53 ; 3E5E6AF5 v_mul_f32_e32 v53, v4, v48 ; 106A6104 v_mac_f32_e32 v49, -2.0, v53 ; 3E626AF5 v_mul_f32_e32 v48, v3, v48 ; 10606103 v_mac_f32_e32 v52, -2.0, v48 ; 3E6860F5 v_mul_f32_e32 v48, v47, v2 ; 1060052F v_mac_f32_e32 v48, v49, v1 ; 3E600331 v_mac_f32_e32 v48, v52, v7 ; 3E600F34 v_mul_f32_e32 v53, v2, v48 ; 106A6102 v_mad_f32 v53, -2.0, v53, v47 ; D2820035 04BE6AF5 v_cmp_gt_f32_e32 vcc, 0, v48 ; 7C086080 v_cndmask_b32_e32 v47, v47, v53 ; 005E6B2F v_mul_f32_e32 v53, v1, v48 ; 106A6101 v_mad_f32 v53, -2.0, v53, v49 ; D2820035 04C66AF5 v_cndmask_b32_e32 v49, v49, v53 ; 00626B31 v_mul_f32_e32 v48, v7, v48 ; 10606107 v_mad_f32 v48, -2.0, v48, v52 ; D2820030 04D260F5 v_cndmask_b32_e32 v48, v52, v48 ; 00606134 v_mul_f32_e32 v52, v18, v49 ; 10686312 v_mac_f32_e32 v52, v17, v47 ; 3E685F11 v_mac_f32_e32 v52, v11, v48 ; 3E68610B v_add_f32_e32 v52, v6, v52 ; 06686906 v_mul_f32_e32 v53, v14, v49 ; 106A630E v_mac_f32_e32 v53, v13, v47 ; 3E6A5F0D v_mac_f32_e32 v53, v12, v48 ; 3E6A610C v_add_f32_e32 v53, v9, v53 ; 066A6B09 v_rcp_f32_e32 v53, v53 ; 7E6A5535 v_mul_f32_e32 v54, v16, v49 ; 106C6310 v_mac_f32_e32 v54, v15, v47 ; 3E6C5F0F v_mac_f32_e32 v54, v10, v48 ; 3E6C610A v_add_f32_e32 v54, v8, v54 ; 066C6D08 v_mul_f32_e32 v55, v53, v52 ; 106E6935 v_mul_f32_e32 v56, v53, v54 ; 10706D35 v_mul_f32_e32 v36, v36, v29 ; 10483B24 v_mac_f32_e32 v36, v34, v28 ; 3E483922 v_mac_f32_e32 v36, v35, v27 ; 3E483723 v_add_f32_e32 v34, v20, v36 ; 06444914 image_sample v[35:36], 3, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[12:19], s[0:3] ; F0800300 00032328 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v34, -v36, v23, v34 ; D2820022 248A2F24 v_mad_f32 v34, -v35, v24, v34 ; D2820022 248A3123 v_mul_f32_e32 v35, v39, v29 ; 10463B27 v_mac_f32_e32 v35, v37, v28 ; 3E463925 v_mac_f32_e32 v35, v38, v27 ; 3E463726 v_add_f32_e32 v35, v20, v35 ; 06464714 image_sample v[36:37], 3, 0, 0, 0, 0, 0, 0, 0, v[45:46], s[12:19], s[0:3] ; F0800300 0003242D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, -v37, v23, v35 ; D2820023 248E2F25 v_mad_f32 v35, -v36, v24, v35 ; D2820023 248E3124 v_mul_f32_e32 v36, v44, v29 ; 10483B2C v_mac_f32_e32 v36, v42, v28 ; 3E48392A v_mac_f32_e32 v36, v43, v27 ; 3E48372B v_add_f32_e32 v36, v20, v36 ; 06484914 image_sample v[37:38], 3, 0, 0, 0, 0, 0, 0, 0, v[50:51], s[12:19], s[0:3] ; F0800300 00032532 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, -v38, v23, v36 ; D2820024 24922F26 v_mad_f32 v36, -v37, v24, v36 ; D2820024 24923125 v_mul_f32_e32 v37, v49, v29 ; 104A3B31 v_mac_f32_e32 v37, v47, v28 ; 3E4A392F v_mac_f32_e32 v37, v48, v27 ; 3E4A3730 v_add_f32_e32 v37, v20, v37 ; 064A4B14 image_sample v[38:39], 3, 0, 0, 0, 0, 0, 0, 0, v[55:56], s[12:19], s[0:3] ; F0800300 00032637 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v37, -v39, v23, v37 ; D2820025 24962F27 v_mad_f32 v37, -v38, v24, v37 ; D2820025 24963126 v_mov_b32_e32 v38, 0x3efab7fe ; 7E4C02FF 3EFAB7FE v_mul_f32_e32 v39, v38, v5 ; 104E0B26 v_mov_b32_e32 v40, 0xbeedc63b ; 7E5002FF BEEDC63B v_mac_f32_e32 v39, v40, v4 ; 3E4E0928 v_mov_b32_e32 v41, 0xbe18d25f ; 7E5202FF BE18D25F v_mac_f32_e32 v39, v41, v3 ; 3E4E0729 v_mul_f32_e32 v42, v5, v39 ; 10544F05 v_mac_f32_e32 v38, -2.0, v42 ; 3E4C54F5 v_mul_f32_e32 v42, v4, v39 ; 10544F04 v_mac_f32_e32 v40, -2.0, v42 ; 3E5054F5 v_mul_f32_e32 v39, v3, v39 ; 104E4F03 v_mac_f32_e32 v41, -2.0, v39 ; 3E524EF5 v_mul_f32_e32 v39, v38, v2 ; 104E0526 v_mac_f32_e32 v39, v40, v1 ; 3E4E0328 v_mac_f32_e32 v39, v41, v7 ; 3E4E0F29 v_mul_f32_e32 v42, v2, v39 ; 10544F02 v_mad_f32 v42, -2.0, v42, v38 ; D282002A 049A54F5 v_cmp_gt_f32_e32 vcc, 0, v39 ; 7C084E80 v_cndmask_b32_e32 v38, v38, v42 ; 004C5526 v_mul_f32_e32 v42, v1, v39 ; 10544F01 v_mad_f32 v42, -2.0, v42, v40 ; D282002A 04A254F5 v_cndmask_b32_e32 v40, v40, v42 ; 00505528 v_mul_f32_e32 v39, v7, v39 ; 104E4F07 v_mad_f32 v39, -2.0, v39, v41 ; D2820027 04A64EF5 v_cndmask_b32_e32 v39, v41, v39 ; 004E4F29 v_mul_f32_e32 v41, v18, v40 ; 10525112 v_mac_f32_e32 v41, v17, v38 ; 3E524D11 v_mac_f32_e32 v41, v11, v39 ; 3E524F0B v_add_f32_e32 v41, v6, v41 ; 06525306 v_mul_f32_e32 v42, v14, v40 ; 1054510E v_mac_f32_e32 v42, v13, v38 ; 3E544D0D v_mac_f32_e32 v42, v12, v39 ; 3E544F0C v_add_f32_e32 v42, v9, v42 ; 06545509 v_rcp_f32_e32 v42, v42 ; 7E54552A v_mul_f32_e32 v43, v16, v40 ; 10565110 v_mac_f32_e32 v43, v15, v38 ; 3E564D0F v_mac_f32_e32 v43, v10, v39 ; 3E564F0A v_add_f32_e32 v43, v8, v43 ; 06565708 v_mul_f32_e32 v44, v42, v41 ; 1058532A v_mul_f32_e32 v45, v42, v43 ; 105A572A v_mov_b32_e32 v41, 0xbe064c73 ; 7E5202FF BE064C73 v_mul_f32_e32 v42, v41, v5 ; 10540B29 v_mov_b32_e32 v43, 0x3e0e79ee ; 7E5602FF 3E0E79EE v_mac_f32_e32 v42, v43, v4 ; 3E54092B v_mov_b32_e32 v46, 0xbf336cdf ; 7E5C02FF BF336CDF v_mac_f32_e32 v42, v46, v3 ; 3E54072E v_mul_f32_e32 v47, v5, v42 ; 105E5505 v_mac_f32_e32 v41, -2.0, v47 ; 3E525EF5 v_mul_f32_e32 v47, v4, v42 ; 105E5504 v_mac_f32_e32 v43, -2.0, v47 ; 3E565EF5 v_mul_f32_e32 v42, v3, v42 ; 10545503 v_mac_f32_e32 v46, -2.0, v42 ; 3E5C54F5 v_mul_f32_e32 v42, v41, v2 ; 10540529 v_mac_f32_e32 v42, v43, v1 ; 3E54032B v_mac_f32_e32 v42, v46, v7 ; 3E540F2E v_mul_f32_e32 v47, v2, v42 ; 105E5502 v_mad_f32 v47, -2.0, v47, v41 ; D282002F 04A65EF5 v_cmp_gt_f32_e32 vcc, 0, v42 ; 7C085480 v_cndmask_b32_e32 v41, v41, v47 ; 00525F29 v_mul_f32_e32 v47, v1, v42 ; 105E5501 v_mad_f32 v47, -2.0, v47, v43 ; D282002F 04AE5EF5 v_cndmask_b32_e32 v43, v43, v47 ; 00565F2B v_mul_f32_e32 v42, v7, v42 ; 10545507 v_mad_f32 v42, -2.0, v42, v46 ; D282002A 04BA54F5 v_cndmask_b32_e32 v42, v46, v42 ; 0054552E v_mul_f32_e32 v46, v18, v43 ; 105C5712 v_mac_f32_e32 v46, v17, v41 ; 3E5C5311 v_mac_f32_e32 v46, v11, v42 ; 3E5C550B v_add_f32_e32 v46, v6, v46 ; 065C5D06 v_mul_f32_e32 v47, v14, v43 ; 105E570E v_mac_f32_e32 v47, v13, v41 ; 3E5E530D v_mac_f32_e32 v47, v12, v42 ; 3E5E550C v_add_f32_e32 v47, v9, v47 ; 065E5F09 v_rcp_f32_e32 v47, v47 ; 7E5E552F v_mul_f32_e32 v48, v16, v43 ; 10605710 v_mac_f32_e32 v48, v15, v41 ; 3E60530F v_mac_f32_e32 v48, v10, v42 ; 3E60550A v_add_f32_e32 v48, v8, v48 ; 06606108 v_mul_f32_e32 v49, v47, v46 ; 10625D2F v_mul_f32_e32 v50, v47, v48 ; 1064612F v_mov_b32_e32 v46, 0xbeb98394 ; 7E5C02FF BEB98394 v_mul_f32_e32 v47, v46, v5 ; 105E0B2E v_mov_b32_e32 v48, 0xbdcdd3fe ; 7E6002FF BDCDD3FE v_mac_f32_e32 v47, v48, v4 ; 3E5E0930 v_mov_b32_e32 v51, 0xbe9ccb19 ; 7E6602FF BE9CCB19 v_mac_f32_e32 v47, v51, v3 ; 3E5E0733 v_mul_f32_e32 v52, v5, v47 ; 10685F05 v_mac_f32_e32 v46, -2.0, v52 ; 3E5C68F5 v_mul_f32_e32 v52, v4, v47 ; 10685F04 v_mac_f32_e32 v48, -2.0, v52 ; 3E6068F5 v_mul_f32_e32 v47, v3, v47 ; 105E5F03 v_mac_f32_e32 v51, -2.0, v47 ; 3E665EF5 v_mul_f32_e32 v47, v46, v2 ; 105E052E v_mac_f32_e32 v47, v48, v1 ; 3E5E0330 v_mac_f32_e32 v47, v51, v7 ; 3E5E0F33 v_mul_f32_e32 v52, v2, v47 ; 10685F02 v_mad_f32 v52, -2.0, v52, v46 ; D2820034 04BA68F5 v_cmp_gt_f32_e32 vcc, 0, v47 ; 7C085E80 v_cndmask_b32_e32 v46, v46, v52 ; 005C692E v_mul_f32_e32 v52, v1, v47 ; 10685F01 v_mad_f32 v52, -2.0, v52, v48 ; D2820034 04C268F5 v_cndmask_b32_e32 v48, v48, v52 ; 00606930 v_mul_f32_e32 v47, v7, v47 ; 105E5F07 v_mad_f32 v47, -2.0, v47, v51 ; D282002F 04CE5EF5 v_cndmask_b32_e32 v47, v51, v47 ; 005E5F33 v_mul_f32_e32 v51, v18, v48 ; 10666112 v_mac_f32_e32 v51, v17, v46 ; 3E665D11 v_mac_f32_e32 v51, v11, v47 ; 3E665F0B v_add_f32_e32 v51, v6, v51 ; 06666706 v_mul_f32_e32 v52, v14, v48 ; 1068610E v_mac_f32_e32 v52, v13, v46 ; 3E685D0D v_mac_f32_e32 v52, v12, v47 ; 3E685F0C v_add_f32_e32 v52, v9, v52 ; 06686909 v_rcp_f32_e32 v52, v52 ; 7E685534 v_mul_f32_e32 v53, v16, v48 ; 106A6110 v_mac_f32_e32 v53, v15, v46 ; 3E6A5D0F v_mac_f32_e32 v53, v10, v47 ; 3E6A5F0A v_add_f32_e32 v53, v8, v53 ; 066A6B08 v_mul_f32_e32 v54, v52, v51 ; 106C6734 v_mul_f32_e32 v55, v52, v53 ; 106E6B34 v_mul_f32_e32 v40, v40, v29 ; 10503B28 v_mac_f32_e32 v40, v38, v28 ; 3E503926 v_mul_f32_e32 v38, v43, v29 ; 104C3B2B v_mac_f32_e32 v38, v41, v28 ; 3E4C3929 v_mul_f32_e32 v41, v48, v29 ; 10523B30 v_mac_f32_e32 v41, v46, v28 ; 3E52392E v_mov_b32_e32 v43, 0x3e9634db ; 7E5602FF 3E9634DB v_mul_f32_e32 v46, v43, v5 ; 105C0B2B v_mov_b32_e32 v48, 0xbe2eba6b ; 7E6002FF BE2EBA6B v_mac_f32_e32 v46, v48, v4 ; 3E5C0930 v_mov_b32_e32 v51, 0xbf296acf ; 7E6602FF BF296ACF v_mac_f32_e32 v46, v51, v3 ; 3E5C0733 v_mul_f32_e32 v5, v5, v46 ; 100A5D05 v_mul_f32_e32 v4, v4, v46 ; 10085D04 v_mul_f32_e32 v3, v3, v46 ; 10065D03 v_mac_f32_e32 v43, -2.0, v5 ; 3E560AF5 v_mac_f32_e32 v48, -2.0, v4 ; 3E6008F5 v_mac_f32_e32 v51, -2.0, v3 ; 3E6606F5 v_mul_f32_e32 v3, v43, v2 ; 1006052B v_mac_f32_e32 v3, v48, v1 ; 3E060330 v_mac_f32_e32 v3, v51, v7 ; 3E060F33 v_mul_f32_e32 v4, v2, v3 ; 10080702 v_mad_f32 v4, -2.0, v4, v43 ; D2820004 04AE08F5 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e32 v4, v43, v4 ; 0008092B v_mul_f32_e32 v5, v1, v3 ; 100A0701 v_mad_f32 v5, -2.0, v5, v48 ; D2820005 04C20AF5 v_cndmask_b32_e32 v5, v48, v5 ; 000A0B30 v_mul_f32_e32 v29, v5, v29 ; 103A3B05 v_mac_f32_e32 v29, v4, v28 ; 3E3A3904 v_mul_f32_e32 v18, v18, v5 ; 10240B12 v_mac_f32_e32 v18, v17, v4 ; 3E240911 v_mul_f32_e32 v16, v16, v5 ; 10200B10 v_mac_f32_e32 v16, v15, v4 ; 3E20090F v_mul_f32_e32 v5, v14, v5 ; 100A0B0E v_mac_f32_e32 v5, v13, v4 ; 3E0A090D v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mad_f32 v3, -2.0, v3, v51 ; D2820003 04CE06F5 v_cndmask_b32_e32 v3, v51, v3 ; 00060733 v_mac_f32_e32 v40, v39, v27 ; 3E503727 v_mac_f32_e32 v38, v42, v27 ; 3E4C372A v_mac_f32_e32 v41, v47, v27 ; 3E52372F v_mac_f32_e32 v29, v3, v27 ; 3E3A3703 v_mac_f32_e32 v18, v11, v3 ; 3E24070B v_mac_f32_e32 v5, v12, v3 ; 3E0A070C v_add_f32_e32 v4, v9, v5 ; 06080B09 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mac_f32_e32 v16, v10, v3 ; 3E20070A v_add_f32_e32 v3, v6, v18 ; 06062506 v_add_f32_e32 v5, v8, v16 ; 060A2108 v_mul_f32_e32 v6, v4, v3 ; 100C0704 v_mul_f32_e32 v7, v4, v5 ; 100E0B04 v_add_f32_e32 v3, v20, v40 ; 06065114 image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[44:45], s[12:19], s[0:3] ; F0800300 0003042C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, -v5, v23, v3 ; D2820003 240E2F05 v_mad_f32 v3, -v4, v24, v3 ; D2820003 240E3104 v_add_f32_e32 v4, v20, v38 ; 06084D14 image_sample v[8:9], 3, 0, 0, 0, 0, 0, 0, 0, v[49:50], s[12:19], s[0:3] ; F0800300 00030831 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, -v9, v23, v4 ; D2820004 24122F09 v_mad_f32 v4, -v8, v24, v4 ; D2820004 24123108 v_add_f32_e32 v5, v20, v41 ; 060A5314 image_sample v[8:9], 3, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[12:19], s[0:3] ; F0800300 00030836 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, -v9, v23, v5 ; D2820005 24162F09 v_mad_f32 v5, -v8, v24, v5 ; D2820005 24163108 v_add_f32_e32 v8, v20, v29 ; 06103B14 image_sample v[6:7], 3, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[0:3] ; F0800300 00030606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, -v7, v23, v8 ; D2820007 24222F07 v_mad_f32 v6, -v6, v24, v7 ; D2820006 241E3106 v_mov_b32_e32 v7, 0xbac80009 ; 7E0E02FF BAC80009 v_add_f32_e32 v8, v7, v21 ; 06102B07 v_mad_f32 v9, -s8, v8, 1.0 ; D2820009 23CA1008 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_mov_b32_e32 v8, 0x3d800000 ; 7E1002FF 3D800000 v_mul_f32_e32 v9, v8, v9 ; 10121308 v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 v_add_f32_e32 v10, v7, v22 ; 06142D07 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_mad_f32 v10, -s8, v10, 1.0 ; D282000A 23CA1408 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v10, v8, v10 ; 10141508 v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_add_f32_e32 v11, v7, v25 ; 06163307 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v12, v7, v26 ; 06183507 v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_mad_f32 v12, -s8, v12, 1.0 ; D282000C 23CA1808 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_cndmask_b32_e64 v12, v12, 0, vcc ; D200000C 01A9010C v_add_f32_e32 v13, v7, v30 ; 061A3D07 v_cmp_gt_f32_e32 vcc, 0, v13 ; 7C081A80 v_mad_f32 v13, -s8, v13, 1.0 ; D282000D 23CA1A08 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mul_f32_e32 v13, v8, v13 ; 101A1B08 v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_add_f32_e32 v14, v7, v31 ; 061C3F07 v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_mad_f32 v14, -s8, v14, 1.0 ; D282000E 23CA1C08 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_cndmask_b32_e64 v14, v14, 0, vcc ; D200000E 01A9010E v_add_f32_e32 v15, v7, v32 ; 061E4107 v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 v_mad_f32 v15, -s8, v15, 1.0 ; D282000F 23CA1E08 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mul_f32_e32 v15, v8, v15 ; 101E1F08 v_cndmask_b32_e64 v15, v15, 0, vcc ; D200000F 01A9010F v_add_f32_e32 v16, v7, v33 ; 06204307 v_cmp_gt_f32_e32 vcc, 0, v16 ; 7C082080 v_mad_f32 v16, -s8, v16, 1.0 ; D2820010 23CA2008 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_mul_f32_e32 v16, v8, v16 ; 10202108 v_cndmask_b32_e64 v16, v16, 0, vcc ; D2000010 01A90110 v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v9, v11, v9 ; 0612130B v_add_f32_e32 v9, v12, v9 ; 0612130C v_add_f32_e32 v10, v14, v13 ; 06141B0E v_add_f32_e32 v10, v15, v10 ; 0614150F v_add_f32_e32 v10, v16, v10 ; 06141510 v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v10, v7, v34 ; 06144507 v_add_f32_e32 v11, v7, v35 ; 06164707 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_mad_f32 v10, -s8, v10, 1.0 ; D282000A 23CA1408 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v10, v8, v10 ; 10141508 v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v11, v7, v36 ; 06164907 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v11, v7, v37 ; 06164B07 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v3, v7, v3 ; 06060707 v_add_f32_e32 v4, v7, v4 ; 06080907 v_add_f32_e32 v5, v7, v5 ; 060A0B07 v_add_f32_e32 v6, v7, v6 ; 060C0D07 v_mad_f32 v7, -s8, v3, 1.0 ; D2820007 23CA0608 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_mad_f32 v3, -s8, v4, 1.0 ; D2820003 23CA0808 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_cmp_gt_f32_e64 s[0:1], 0, v4 ; D0080000 00020880 v_mad_f32 v4, -s8, v5, 1.0 ; D2820004 23CA0A08 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mov_b32_e32 v10, 0x41000000 ; 7E1402FF 41000000 v_mov_b32_e32 v11, 0x40e00000 ; 7E1602FF 40E00000 v_mad_f32 v2, v11, v2, v10 ; D2820002 042A050B v_mac_f32_e32 v10, v11, v1 ; 3E14030B v_cmp_gt_f32_e64 s[2:3], 0, v5 ; D0080002 00020A80 v_mad_f32 v1, -s8, v6, 1.0 ; D2820001 23CA0C08 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cmp_gt_f32_e64 s[4:5], 0, v6 ; D0080004 00020C80 v_mul_f32_e32 v0, v19, v0 ; 10000113 v_mul_f32_e32 v0, 0x477fff00, v0 ; 100000FF 477FFF00 v_floor_f32_e32 v0, v0 ; 7E004900 v_mul_f32_e32 v5, 0x3b800000, v0 ; 100A00FF 3B800000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mul_f32_e32 v6, v8, v7 ; 100C0F08 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_cndmask_b32_e64 v3, v3, 0, s[0:1] ; D2000003 00010103 v_add_f32_e32 v3, v3, v6 ; 06060D03 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_cndmask_b32_e64 v4, v4, 0, s[2:3] ; D2000004 00090104 v_add_f32_e32 v3, v4, v3 ; 06060704 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_cndmask_b32_e64 v1, v1, 0, s[4:5] ; D2000001 00110101 v_add_f32_e32 v1, v1, v3 ; 06020701 v_add_f32_e32 v1, v1, v9 ; 06021301 v_mov_b32_e32 v3, 0x3b808081 ; 7E0602FF 3B808081 v_madmk_f32_e32 v0, v5, v0, 0xc3800000 ; 40000105 C3800000 v_mul_f32_e32 v4, v3, v5 ; 10080B03 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_floor_f32_e32 v2, v2 ; 7E044902 v_floor_f32_e32 v3, v10 ; 7E06490A v_mul_f32_e32 v2, 0x3d808081, v2 ; 100404FF 3D808081 v_madmk_f32_e32 v2, v3, v2, 0x3b808083 ; 40040503 3B808083 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 60 Code Size: 5140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.9961, 0.0039, 255.0000, 0.0625} IMM[1] FLT32 { 16.0000, -8.0000, 0.1429, 0.0000} IMM[2] FLT32 { 1.0000, -2.0000, 0.0350, -0.7000} IMM[3] UINT32 {0, 0, 0, 0} IMM[4] FLT32 { 3.3333, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yzw, TEMP[0], SAMP[0], 2D 2: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[0].xyyy 3: MUL TEMP[2].x, TEMP[0].wwww, IMM[0].zzzz 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww 5: FLR TEMP[3].x, TEMP[3].xxxx 6: MOV TEMP[4].x, TEMP[3].xxxx 7: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 8: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 9: MOV TEMP[4].y, TEMP[2].xxxx 10: ADD TEMP[2].xy, TEMP[4].xyyy, IMM[1].yyyy 11: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].zzzz 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 15: ADD TEMP[4].x, IMM[2].xxxx, -TEMP[4].xxxx 16: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy 17: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx 18: MAX TEMP[2].x, IMM[1].wwww, TEMP[2].xxxx 19: SQRT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[3].z, TEMP[2].xxxx 21: MOV TEMP[2].y, IMM[1].wwww 22: MOV TEMP[2].x, CONST[1][0].xxxx 23: MAD TEMP[4].xy, IMM[2].yyyy, TEMP[2].xyyy, IN[0].xyyy 24: MOV TEMP[4].xy, TEMP[4].xyyy 25: TEX TEMP[4], TEMP[4], SAMP[0], 2D 26: DP2 TEMP[5].x, TEMP[4].yzzz, IMM[0].xyyy 27: ADD TEMP[6].x, TEMP[5].xxxx, -TEMP[1].xxxx 28: MUL TEMP[7].x, IMM[0].zzzz, TEMP[4].wwww 29: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MOV TEMP[9].x, TEMP[8].xxxx 32: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 33: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 34: MOV TEMP[9].y, TEMP[7].xxxx 35: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 36: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 37: MOV TEMP[8].x, TEMP[7].xxxx 38: MOV TEMP[8].y, TEMP[7].yyyy 39: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 40: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 41: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 42: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 43: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 44: SQRT TEMP[7].x, TEMP[7].xxxx 45: MOV TEMP[8].z, TEMP[7].xxxx 46: MOV TEMP[7].x, -TEMP[6].xxxx 47: FSLT TEMP[9].x, TEMP[6].xxxx, IMM[1].wwww 48: UIF TEMP[9].xxxx :0 49: MOV TEMP[7].x, TEMP[7].xxxx 50: ELSE :0 51: MOV TEMP[7].x, TEMP[6].xxxx 52: ENDIF 53: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 56: MOV_SAT TEMP[5].x, TEMP[5].xxxx 57: ADD TEMP[5].x, IMM[2].xxxx, -TEMP[5].xxxx 58: DP3 TEMP[6].x, TEMP[8].xyzz, TEMP[3].xyzz 59: ADD TEMP[6].x, TEMP[6].xxxx, IMM[2].wwww 60: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].xxxx 61: MOV_SAT TEMP[6].x, TEMP[6].xxxx 62: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 63: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 64: ADD TEMP[6].xy, -TEMP[2].xyyy, IN[0].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6], TEMP[6], SAMP[0], 2D 67: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 68: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 69: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 70: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 71: FLR TEMP[10].x, TEMP[10].xxxx 72: MOV TEMP[11].x, TEMP[10].xxxx 73: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 74: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 75: MOV TEMP[11].y, TEMP[9].xxxx 76: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 77: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 78: MOV TEMP[10].x, TEMP[9].xxxx 79: MOV TEMP[10].y, TEMP[9].yyyy 80: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 81: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 82: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 83: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 84: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 85: SQRT TEMP[9].x, TEMP[9].xxxx 86: MOV TEMP[10].z, TEMP[9].xxxx 87: MOV TEMP[9].x, -TEMP[8].xxxx 88: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[9].x, TEMP[9].xxxx 91: ELSE :0 92: MOV TEMP[9].x, TEMP[8].xxxx 93: ENDIF 94: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 95: RCP TEMP[7].x, TEMP[7].xxxx 96: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 97: MOV_SAT TEMP[7].x, TEMP[7].xxxx 98: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 99: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 100: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 101: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 102: MOV_SAT TEMP[8].x, TEMP[8].xxxx 103: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 104: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 105: ADD TEMP[5].x, IMM[2].xxxx, TEMP[5].xxxx 106: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 107: MOV TEMP[6].xy, IN[0].xyyy 108: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 110: ADD TEMP[6].xy, TEMP[2].xyyy, IN[0].xyyy 111: MOV TEMP[6].xy, TEMP[6].xyyy 112: TEX TEMP[6], TEMP[6], SAMP[0], 2D 113: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 114: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 115: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 116: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 117: FLR TEMP[10].x, TEMP[10].xxxx 118: MOV TEMP[11].x, TEMP[10].xxxx 119: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 120: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 121: MOV TEMP[11].y, TEMP[9].xxxx 122: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 123: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 124: MOV TEMP[10].x, TEMP[9].xxxx 125: MOV TEMP[10].y, TEMP[9].yyyy 126: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 127: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 128: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 129: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 130: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 131: SQRT TEMP[9].x, TEMP[9].xxxx 132: MOV TEMP[10].z, TEMP[9].xxxx 133: MOV TEMP[9].x, -TEMP[8].xxxx 134: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 135: UIF TEMP[11].xxxx :0 136: MOV TEMP[9].x, TEMP[9].xxxx 137: ELSE :0 138: MOV TEMP[9].x, TEMP[8].xxxx 139: ENDIF 140: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 141: RCP TEMP[7].x, TEMP[7].xxxx 142: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 143: MOV_SAT TEMP[7].x, TEMP[7].xxxx 144: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 145: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 146: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 147: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 148: MOV_SAT TEMP[8].x, TEMP[8].xxxx 149: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 150: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 151: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 152: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[4].yyyy, IN[0].xyyy 153: MOV TEMP[2].xy, TEMP[2].xyyy 154: TEX TEMP[2], TEMP[2], SAMP[0], 2D 155: DP2 TEMP[6].x, TEMP[2].yzzz, IMM[0].xyyy 156: ADD TEMP[1].x, TEMP[6].xxxx, -TEMP[1].xxxx 157: MUL TEMP[7].x, IMM[0].zzzz, TEMP[2].wwww 158: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 159: FLR TEMP[8].x, TEMP[8].xxxx 160: MOV TEMP[9].x, TEMP[8].xxxx 161: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 162: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 163: MOV TEMP[9].y, TEMP[7].xxxx 164: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 165: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 166: MOV TEMP[8].x, TEMP[7].xxxx 167: MOV TEMP[8].y, TEMP[7].yyyy 168: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 169: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 170: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 171: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 172: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 173: SQRT TEMP[7].x, TEMP[7].xxxx 174: MOV TEMP[8].z, TEMP[7].xxxx 175: MOV TEMP[7].x, -TEMP[1].xxxx 176: FSLT TEMP[9].x, TEMP[1].xxxx, IMM[1].wwww 177: UIF TEMP[9].xxxx :0 178: MOV TEMP[7].x, TEMP[7].xxxx 179: ELSE :0 180: MOV TEMP[7].x, TEMP[1].xxxx 181: ENDIF 182: MUL TEMP[1].x, IMM[2].zzzz, TEMP[6].xxxx 183: RCP TEMP[1].x, TEMP[1].xxxx 184: MUL TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx 185: MOV_SAT TEMP[1].x, TEMP[1].xxxx 186: ADD TEMP[1].x, IMM[2].xxxx, -TEMP[1].xxxx 187: DP3 TEMP[3].x, TEMP[8].xyzz, TEMP[3].xyzz 188: ADD TEMP[3].x, IMM[2].wwww, TEMP[3].xxxx 189: MUL TEMP[3].x, TEMP[3].xxxx, IMM[4].xxxx 190: MOV_SAT TEMP[3].x, TEMP[3].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 192: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[4].xxxx 193: ADD TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx 194: RCP TEMP[1].x, TEMP[1].xxxx 195: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx 196: MOV TEMP[1].y, TEMP[0].yyyy 197: MOV TEMP[1].z, TEMP[0].zzzz 198: MOV TEMP[1].w, TEMP[0].wwww 199: MOV OUT[0], TEMP[1] 200: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 1 %39 = extractelement <4 x float> %37, i32 2 %40 = extractelement <4 x float> %37, i32 3 %41 = fmul float %38, 0x3FEFE02000000000 %42 = fmul float %39, 0x3F6FE01F80000000 %43 = fadd float %41, %42 %44 = fmul float %40, 2.550000e+02 %45 = fmul float %44, 6.250000e-02 %46 = call float @floor(float %45) %47 = fmul float %46, 1.600000e+01 %48 = fsub float %44, %47 %49 = fadd float %46, -8.000000e+00 %50 = fadd float %48, -8.000000e+00 %51 = fmul float %49, 0x3FC24924A0000000 %52 = fmul float %50, 0x3FC24924A0000000 %53 = fmul float %51, %51 %54 = fsub float 1.000000e+00, %53 %55 = fmul float %52, %52 %56 = fsub float %54, %55 %57 = call float @llvm.maxnum.f32(float %56, float 0.000000e+00) %58 = call float @llvm.sqrt.f32(float %57) %59 = fmul float %24, -2.000000e+00 %60 = fadd float %59, %29 %61 = bitcast float %60 to i32 %62 = bitcast float %30 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fmul float %69, 0x3FEFE02000000000 %73 = fmul float %70, 0x3F6FE01F80000000 %74 = fadd float %72, %73 %75 = fsub float %74, %43 %76 = fmul float %71, 2.550000e+02 %77 = fmul float %76, 6.250000e-02 %78 = call float @floor(float %77) %79 = fmul float %78, 1.600000e+01 %80 = fsub float %76, %79 %81 = fadd float %78, -8.000000e+00 %82 = fadd float %80, -8.000000e+00 %83 = fmul float %81, 0x3FC24924A0000000 %84 = fmul float %82, 0x3FC24924A0000000 %85 = fmul float %83, %83 %86 = fsub float 1.000000e+00, %85 %87 = fmul float %84, %84 %88 = fsub float %86, %87 %89 = call float @llvm.maxnum.f32(float %88, float 0.000000e+00) %90 = call float @llvm.sqrt.f32(float %89) %91 = fsub float -0.000000e+00, %75 %92 = fcmp olt float %75, 0.000000e+00 %. = select i1 %92, float %91, float %75 %93 = fmul float %74, 0x3FA1EB8520000000 %94 = fdiv float 1.000000e+00, %93 %95 = fmul float %., %94 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fsub float 1.000000e+00, %96 %98 = fmul float %83, %51 %99 = fmul float %84, %52 %100 = fadd float %99, %98 %101 = fmul float %90, %58 %102 = fadd float %100, %101 %103 = fadd float %102, 0xBFE6666660000000 %104 = fmul float %103, 0x400AAAAAA0000000 %105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %106 = fmul float %97, %105 %107 = fmul float %106, %68 %108 = fsub float %29, %24 %109 = bitcast float %108 to i32 %110 = bitcast float %30 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %26 to <32 x i8> %114 = bitcast <4 x i32> %28 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = extractelement <4 x float> %115, i32 3 %120 = fmul float %117, 0x3FEFE02000000000 %121 = fmul float %118, 0x3F6FE01F80000000 %122 = fadd float %120, %121 %123 = fsub float %122, %43 %124 = fmul float %119, 2.550000e+02 %125 = fmul float %124, 6.250000e-02 %126 = call float @floor(float %125) %127 = fmul float %126, 1.600000e+01 %128 = fsub float %124, %127 %129 = fadd float %126, -8.000000e+00 %130 = fadd float %128, -8.000000e+00 %131 = fmul float %129, 0x3FC24924A0000000 %132 = fmul float %130, 0x3FC24924A0000000 %133 = fmul float %131, %131 %134 = fsub float 1.000000e+00, %133 %135 = fmul float %132, %132 %136 = fsub float %134, %135 %137 = call float @llvm.maxnum.f32(float %136, float 0.000000e+00) %138 = call float @llvm.sqrt.f32(float %137) %139 = fsub float -0.000000e+00, %123 %140 = fcmp olt float %123, 0.000000e+00 %temp36.0 = select i1 %140, float %139, float %123 %141 = fmul float %122, 0x3FA1EB8520000000 %142 = fdiv float 1.000000e+00, %141 %143 = fmul float %temp36.0, %142 %144 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %145 = fsub float 1.000000e+00, %144 %146 = fmul float %131, %51 %147 = fmul float %132, %52 %148 = fadd float %147, %146 %149 = fmul float %138, %58 %150 = fadd float %148, %149 %151 = fadd float %150, 0xBFE6666660000000 %152 = fmul float %151, 0x400AAAAAA0000000 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fmul float %145, %153 %155 = fadd float %106, %154 %156 = fadd float %155, 1.000000e+00 %157 = fmul float %154, %116 %158 = fadd float %157, %107 %159 = bitcast float %29 to i32 %160 = bitcast float %30 to i32 %161 = insertelement <2 x i32> undef, i32 %159, i32 0 %162 = insertelement <2 x i32> %161, i32 %160, i32 1 %163 = bitcast <8 x i32> %26 to <32 x i8> %164 = bitcast <4 x i32> %28 to <16 x i8> %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = fadd float %158, %166 %168 = fadd float %24, %29 %169 = fadd float %30, 0.000000e+00 %170 = bitcast float %168 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %26 to <32 x i8> %175 = bitcast <4 x i32> %28 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = extractelement <4 x float> %176, i32 3 %181 = fmul float %178, 0x3FEFE02000000000 %182 = fmul float %179, 0x3F6FE01F80000000 %183 = fadd float %181, %182 %184 = fsub float %183, %43 %185 = fmul float %180, 2.550000e+02 %186 = fmul float %185, 6.250000e-02 %187 = call float @floor(float %186) %188 = fmul float %187, 1.600000e+01 %189 = fsub float %185, %188 %190 = fadd float %187, -8.000000e+00 %191 = fadd float %189, -8.000000e+00 %192 = fmul float %190, 0x3FC24924A0000000 %193 = fmul float %191, 0x3FC24924A0000000 %194 = fmul float %192, %192 %195 = fsub float 1.000000e+00, %194 %196 = fmul float %193, %193 %197 = fsub float %195, %196 %198 = call float @llvm.maxnum.f32(float %197, float 0.000000e+00) %199 = call float @llvm.sqrt.f32(float %198) %200 = fsub float -0.000000e+00, %184 %201 = fcmp olt float %184, 0.000000e+00 %.57 = select i1 %201, float %200, float %184 %202 = fmul float %183, 0x3FA1EB8520000000 %203 = fdiv float 1.000000e+00, %202 %204 = fmul float %.57, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fsub float 1.000000e+00, %205 %207 = fmul float %192, %51 %208 = fmul float %193, %52 %209 = fadd float %208, %207 %210 = fmul float %199, %58 %211 = fadd float %209, %210 %212 = fadd float %211, 0xBFE6666660000000 %213 = fmul float %212, 0x400AAAAAA0000000 %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %206, %214 %216 = fadd float %156, %215 %217 = fmul float %215, %177 %218 = fadd float %217, %167 %219 = fmul float %24, 2.000000e+00 %220 = fadd float %219, %29 %221 = fadd float %30, 0.000000e+00 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %26 to <32 x i8> %227 = bitcast <4 x i32> %28 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fmul float %230, 0x3FEFE02000000000 %234 = fmul float %231, 0x3F6FE01F80000000 %235 = fadd float %233, %234 %236 = fsub float %235, %43 %237 = fmul float %232, 2.550000e+02 %238 = fmul float %237, 6.250000e-02 %239 = call float @floor(float %238) %240 = fmul float %239, 1.600000e+01 %241 = fsub float %237, %240 %242 = fadd float %239, -8.000000e+00 %243 = fadd float %241, -8.000000e+00 %244 = fmul float %242, 0x3FC24924A0000000 %245 = fmul float %243, 0x3FC24924A0000000 %246 = fmul float %244, %244 %247 = fsub float 1.000000e+00, %246 %248 = fmul float %245, %245 %249 = fsub float %247, %248 %250 = call float @llvm.maxnum.f32(float %249, float 0.000000e+00) %251 = call float @llvm.sqrt.f32(float %250) %252 = fsub float -0.000000e+00, %236 %253 = fcmp olt float %236, 0.000000e+00 %temp28.1 = select i1 %253, float %252, float %236 %254 = fmul float %235, 0x3FA1EB8520000000 %255 = fdiv float 1.000000e+00, %254 %256 = fmul float %temp28.1, %255 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fsub float 1.000000e+00, %257 %259 = fmul float %244, %51 %260 = fmul float %245, %52 %261 = fadd float %260, %259 %262 = fmul float %251, %58 %263 = fadd float %261, %262 %264 = fadd float %263, 0xBFE6666660000000 %265 = fmul float %264, 0x400AAAAAA0000000 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %258, %266 %268 = fmul float %267, %229 %269 = fadd float %268, %218 %270 = fadd float %216, %267 %271 = fdiv float 1.000000e+00, %270 %272 = fmul float %269, %271 %273 = call i32 @llvm.SI.packf16(float %272, float %38) %274 = bitcast i32 %273 to float %275 = call i32 @llvm.SI.packf16(float %39, float %40) %276 = bitcast i32 %275 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %274, float %276, float %274, float %276) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x0 ; C2021500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0x3b7f00fc ; 7E0002FF 3B7F00FC v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v1, 0x3f7f0100 ; 7E0202FF 3F7F0100 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v8, v0, v6 ; 10100D00 v_mad_f32 v9, -2.0, s4, v2 ; D2820009 040808F5 v_mac_f32_e32 v8, v1, v5 ; 3E100B01 v_mov_b32_e32 v10, v3 ; 7E140303 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[0:3] ; F0800F00 00030909 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v0, v11 ; 101A1700 v_mac_f32_e32 v13, v1, v10 ; 3E1A1501 v_mad_f32 v11, v11, v0, -v8 ; D282000B 8422010B v_mac_f32_e32 v11, v1, v10 ; 3E161501 v_mov_b32_e32 v10, 0x3d0f5c29 ; 7E1402FF 3D0F5C29 v_mul_f32_e32 v13, v10, v13 ; 101A1B0A v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mov_b32_e32 v14, 0x80000000 ; 7E1C02FF 80000000 v_xor_b32_e32 v15, v11, v14 ; 3A1E1D0B v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e32 v11, v11, v15 ; 00161F0B v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mov_b32_e32 v13, 0x437f0000 ; 7E1A02FF 437F0000 v_mul_f32_e32 v15, v13, v7 ; 101E0F0D v_mov_b32_e32 v16, 0x3d800000 ; 7E2002FF 3D800000 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_floor_f32_e32 v15, v15 ; 7E1E490F v_mov_b32_e32 v17, 0xc1800000 ; 7E2202FF C1800000 v_mul_f32_e32 v18, v17, v15 ; 10241F11 v_mac_f32_e32 v18, v13, v7 ; 3E240F0D v_mov_b32_e32 v19, 0xc1000000 ; 7E2602FF C1000000 v_add_f32_e32 v15, v19, v15 ; 061E1F13 v_add_f32_e32 v18, v19, v18 ; 06242513 v_mov_b32_e32 v20, 0x3e124925 ; 7E2802FF 3E124925 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mul_f32_e32 v21, v13, v12 ; 102A190D v_mul_f32_e32 v21, v16, v21 ; 102A2B10 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v22, v17, v21 ; 102C2B11 v_mac_f32_e32 v22, v13, v12 ; 3E2C190D v_add_f32_e32 v12, v19, v21 ; 06182B13 v_add_f32_e32 v21, v19, v22 ; 062A2D13 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v21, v20, v21 ; 102A2B14 v_mad_f32 v22, -v12, v12, 1.0 ; D2820016 23CA190C v_mad_f32 v22, -v21, v21, v22 ; D2820016 245A2B15 v_mul_f32_e32 v12, v15, v12 ; 1018190F v_mac_f32_e32 v12, v18, v21 ; 3E182B12 v_mad_f32 v21, -v15, v15, 1.0 ; D2820015 23CA1F0F v_mad_f32 v21, -v18, v18, v21 ; D2820015 24562512 v_max_f32_e32 v21, 0, v21 ; 202A2A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_max_f32_e32 v22, 0, v22 ; 202C2C80 v_sqrt_f32_e32 v22, v22 ; 7E2C6716 v_mac_f32_e32 v12, v21, v22 ; 3E182D15 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_mov_b32_e32 v22, 0xbf333333 ; 7E2C02FF BF333333 v_add_f32_e32 v12, v22, v12 ; 06181916 v_mov_b32_e32 v23, 0x40555555 ; 7E2E02FF 40555555 v_mul_f32_e32 v12, v23, v12 ; 10181917 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v24, v12, v11 ; 1030170C v_subrev_f32_e32 v25, s4, v2 ; 0A320404 v_mac_f32_e32 v4, v9, v24 ; 3E083109 v_mov_b32_e32 v26, v3 ; 7E340303 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[12:19], s[0:3] ; F0800F00 00031819 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v0, v26 ; 10123500 v_mac_f32_e32 v9, v1, v25 ; 3E123301 v_mad_f32 v26, v26, v0, -v8 ; D282001A 8422011A v_mac_f32_e32 v26, v1, v25 ; 3E343301 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_rcp_f32_e32 v9, v9 ; 7E125509 v_cmp_gt_f32_e32 vcc, 0, v26 ; 7C083480 v_xor_b32_e32 v25, v26, v14 ; 3A321D1A v_cndmask_b32_e32 v25, v26, v25 ; 0032331A v_mul_f32_e32 v9, v9, v25 ; 10123309 v_mul_f32_e32 v25, v13, v27 ; 1032370D v_mul_f32_e32 v25, v16, v25 ; 10323310 v_floor_f32_e32 v25, v25 ; 7E324919 v_mul_f32_e32 v26, v17, v25 ; 10343311 v_mac_f32_e32 v26, v13, v27 ; 3E34370D v_add_f32_e32 v25, v19, v25 ; 06323313 v_add_f32_e32 v26, v19, v26 ; 06343513 v_mul_f32_e32 v25, v20, v25 ; 10323314 v_mul_f32_e32 v26, v20, v26 ; 10343514 v_mad_f32 v27, -v25, v25, 1.0 ; D282001B 23CA3319 v_mad_f32 v27, -v26, v26, v27 ; D282001B 246E351A v_mul_f32_e32 v25, v15, v25 ; 1032330F v_mac_f32_e32 v25, v18, v26 ; 3E323512 v_max_f32_e32 v26, 0, v27 ; 20343680 v_sqrt_f32_e32 v26, v26 ; 7E34671A v_mac_f32_e32 v25, v21, v26 ; 3E323515 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_add_f32_e32 v25, v22, v25 ; 06323316 v_mul_f32_e32 v25, v23, v25 ; 10323317 v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 v_mul_f32_e32 v26, v25, v9 ; 10341319 v_mac_f32_e32 v4, v24, v26 ; 3E083518 v_mad_f32 v9, v9, v25, 1.0 ; D2820009 03CA3309 v_add_f32_e32 v24, s4, v2 ; 06300404 v_add_f32_e32 v25, 0, v3 ; 06320680 v_mac_f32_e64 v2, 2.0, s4 ; D23E0002 000008F4 v_mac_f32_e32 v9, v12, v11 ; 3E12170C v_mov_b32_e32 v3, v25 ; 7E060319 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[12:19], s[0:3] ; F0800F00 00031818 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v13, v27 ; 1016370D v_mul_f32_e32 v11, v16, v11 ; 10161710 v_floor_f32_e32 v11, v11 ; 7E16490B v_mul_f32_e32 v12, v17, v11 ; 10181711 v_mac_f32_e32 v12, v13, v27 ; 3E18370D v_add_f32_e32 v11, v19, v11 ; 06161713 v_add_f32_e32 v12, v19, v12 ; 06181913 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mad_f32 v27, -v11, v11, 1.0 ; D282001B 23CA170B v_mad_f32 v27, -v12, v12, v27 ; D282001B 246E190C v_mul_f32_e32 v11, v15, v11 ; 1016170F v_mac_f32_e32 v11, v18, v12 ; 3E161912 v_max_f32_e32 v12, 0, v27 ; 20183680 v_sqrt_f32_e32 v12, v12 ; 7E18670C v_mac_f32_e32 v11, v21, v12 ; 3E161915 image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00031B02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v13, v30 ; 10043D0D v_mul_f32_e32 v2, v16, v2 ; 10040510 v_floor_f32_e32 v2, v2 ; 7E044902 v_mul_f32_e32 v3, v17, v2 ; 10060511 v_mac_f32_e32 v3, v13, v30 ; 3E063D0D v_add_f32_e32 v2, v19, v2 ; 06040513 v_add_f32_e32 v3, v19, v3 ; 06060713 v_mul_f32_e32 v2, v20, v2 ; 10040514 v_mul_f32_e32 v3, v20, v3 ; 10060714 v_mul_f32_e32 v12, v15, v2 ; 1018050F v_mac_f32_e32 v12, v18, v3 ; 3E180712 v_mad_f32 v2, -v2, v2, 1.0 ; D2820002 23CA0502 v_mad_f32 v2, -v3, v3, v2 ; D2820002 240A0703 v_max_f32_e32 v2, 0, v2 ; 20040480 v_sqrt_f32_e32 v2, v2 ; 7E046702 v_mac_f32_e32 v12, v21, v2 ; 3E180515 v_mul_f32_e32 v2, v0, v26 ; 10043500 v_mac_f32_e32 v2, v1, v25 ; 3E043301 v_mad_f32 v3, v26, v0, -v8 ; D2820003 8422011A v_mac_f32_e32 v3, v1, v25 ; 3E063301 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_rcp_f32_e32 v2, v2 ; 7E045502 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_xor_b32_e32 v13, v3, v14 ; 3A1A1D03 v_cndmask_b32_e32 v3, v3, v13 ; 00061B03 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mad_f32 v3, v29, v0, -v8 ; D2820003 8422011D v_mac_f32_e32 v3, v1, v28 ; 3E063901 v_xor_b32_e32 v8, v3, v14 ; 3A101D03 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e32 v3, v3, v8 ; 00061103 v_mul_f32_e32 v0, v0, v29 ; 10003B00 v_mac_f32_e32 v0, v1, v28 ; 3E003901 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e32 v2, v22, v11 ; 06041716 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_rcp_f32_e32 v0, v0 ; 7E005500 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v9, v2, v1 ; 3E120302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_add_f32_e32 v2, v22, v12 ; 06041916 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v9, v2, v0 ; 3E120102 v_rcp_f32_e32 v3, v9 ; 7E065509 v_mac_f32_e32 v4, v24, v1 ; 3E080318 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mac_f32_e32 v4, v27, v0 ; 3E08011B v_mul_f32_e32 v0, v3, v4 ; 10000903 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 32 Code Size: 956 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.9961, 0.0039, 255.0000, 0.0625} IMM[1] FLT32 { 16.0000, -8.0000, 0.1429, 0.0000} IMM[2] FLT32 { 1.0000, -2.0000, 0.0350, -0.7000} IMM[3] UINT32 {0, 0, 0, 0} IMM[4] FLT32 { 3.3333, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yzw, TEMP[0], SAMP[0], 2D 2: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[0].xyyy 3: MUL TEMP[2].x, TEMP[0].wwww, IMM[0].zzzz 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww 5: FLR TEMP[3].x, TEMP[3].xxxx 6: MOV TEMP[4].x, TEMP[3].xxxx 7: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 8: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 9: MOV TEMP[4].y, TEMP[2].xxxx 10: ADD TEMP[2].xy, TEMP[4].xyyy, IMM[1].yyyy 11: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].zzzz 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 15: ADD TEMP[4].x, IMM[2].xxxx, -TEMP[4].xxxx 16: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy 17: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx 18: MAX TEMP[2].x, IMM[1].wwww, TEMP[2].xxxx 19: SQRT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[3].z, TEMP[2].xxxx 21: MOV TEMP[2].x, IMM[1].wwww 22: MOV TEMP[2].y, CONST[1][0].yyyy 23: MAD TEMP[4].xy, IMM[2].yyyy, TEMP[2].xyyy, IN[0].xyyy 24: MOV TEMP[4].xy, TEMP[4].xyyy 25: TEX TEMP[4], TEMP[4], SAMP[0], 2D 26: DP2 TEMP[5].x, TEMP[4].yzzz, IMM[0].xyyy 27: ADD TEMP[6].x, TEMP[5].xxxx, -TEMP[1].xxxx 28: MUL TEMP[7].x, IMM[0].zzzz, TEMP[4].wwww 29: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MOV TEMP[9].x, TEMP[8].xxxx 32: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 33: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 34: MOV TEMP[9].y, TEMP[7].xxxx 35: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 36: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 37: MOV TEMP[8].x, TEMP[7].xxxx 38: MOV TEMP[8].y, TEMP[7].yyyy 39: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 40: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 41: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 42: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 43: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 44: SQRT TEMP[7].x, TEMP[7].xxxx 45: MOV TEMP[8].z, TEMP[7].xxxx 46: MOV TEMP[7].x, -TEMP[6].xxxx 47: FSLT TEMP[9].x, TEMP[6].xxxx, IMM[1].wwww 48: UIF TEMP[9].xxxx :0 49: MOV TEMP[7].x, TEMP[7].xxxx 50: ELSE :0 51: MOV TEMP[7].x, TEMP[6].xxxx 52: ENDIF 53: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 56: MOV_SAT TEMP[5].x, TEMP[5].xxxx 57: ADD TEMP[5].x, IMM[2].xxxx, -TEMP[5].xxxx 58: DP3 TEMP[6].x, TEMP[8].xyzz, TEMP[3].xyzz 59: ADD TEMP[6].x, TEMP[6].xxxx, IMM[2].wwww 60: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].xxxx 61: MOV_SAT TEMP[6].x, TEMP[6].xxxx 62: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 63: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 64: ADD TEMP[6].xy, -TEMP[2].xyyy, IN[0].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6], TEMP[6], SAMP[0], 2D 67: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 68: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 69: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 70: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 71: FLR TEMP[10].x, TEMP[10].xxxx 72: MOV TEMP[11].x, TEMP[10].xxxx 73: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 74: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 75: MOV TEMP[11].y, TEMP[9].xxxx 76: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 77: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 78: MOV TEMP[10].x, TEMP[9].xxxx 79: MOV TEMP[10].y, TEMP[9].yyyy 80: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 81: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 82: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 83: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 84: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 85: SQRT TEMP[9].x, TEMP[9].xxxx 86: MOV TEMP[10].z, TEMP[9].xxxx 87: MOV TEMP[9].x, -TEMP[8].xxxx 88: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[9].x, TEMP[9].xxxx 91: ELSE :0 92: MOV TEMP[9].x, TEMP[8].xxxx 93: ENDIF 94: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 95: RCP TEMP[7].x, TEMP[7].xxxx 96: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 97: MOV_SAT TEMP[7].x, TEMP[7].xxxx 98: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 99: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 100: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 101: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 102: MOV_SAT TEMP[8].x, TEMP[8].xxxx 103: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 104: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 105: ADD TEMP[5].x, IMM[2].xxxx, TEMP[5].xxxx 106: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 107: MOV TEMP[6].xy, IN[0].xyyy 108: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 110: ADD TEMP[6].xy, TEMP[2].xyyy, IN[0].xyyy 111: MOV TEMP[6].xy, TEMP[6].xyyy 112: TEX TEMP[6], TEMP[6], SAMP[0], 2D 113: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 114: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 115: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 116: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 117: FLR TEMP[10].x, TEMP[10].xxxx 118: MOV TEMP[11].x, TEMP[10].xxxx 119: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 120: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 121: MOV TEMP[11].y, TEMP[9].xxxx 122: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 123: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 124: MOV TEMP[10].x, TEMP[9].xxxx 125: MOV TEMP[10].y, TEMP[9].yyyy 126: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 127: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 128: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 129: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 130: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 131: SQRT TEMP[9].x, TEMP[9].xxxx 132: MOV TEMP[10].z, TEMP[9].xxxx 133: MOV TEMP[9].x, -TEMP[8].xxxx 134: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 135: UIF TEMP[11].xxxx :0 136: MOV TEMP[9].x, TEMP[9].xxxx 137: ELSE :0 138: MOV TEMP[9].x, TEMP[8].xxxx 139: ENDIF 140: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 141: RCP TEMP[7].x, TEMP[7].xxxx 142: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 143: MOV_SAT TEMP[7].x, TEMP[7].xxxx 144: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 145: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 146: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 147: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 148: MOV_SAT TEMP[8].x, TEMP[8].xxxx 149: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 150: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 151: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 152: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[4].yyyy, IN[0].xyyy 153: MOV TEMP[2].xy, TEMP[2].xyyy 154: TEX TEMP[2], TEMP[2], SAMP[0], 2D 155: DP2 TEMP[6].x, TEMP[2].yzzz, IMM[0].xyyy 156: ADD TEMP[1].x, TEMP[6].xxxx, -TEMP[1].xxxx 157: MUL TEMP[7].x, IMM[0].zzzz, TEMP[2].wwww 158: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 159: FLR TEMP[8].x, TEMP[8].xxxx 160: MOV TEMP[9].x, TEMP[8].xxxx 161: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 162: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 163: MOV TEMP[9].y, TEMP[7].xxxx 164: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 165: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 166: MOV TEMP[8].x, TEMP[7].xxxx 167: MOV TEMP[8].y, TEMP[7].yyyy 168: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 169: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 170: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 171: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 172: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 173: SQRT TEMP[7].x, TEMP[7].xxxx 174: MOV TEMP[8].z, TEMP[7].xxxx 175: MOV TEMP[7].x, -TEMP[1].xxxx 176: FSLT TEMP[9].x, TEMP[1].xxxx, IMM[1].wwww 177: UIF TEMP[9].xxxx :0 178: MOV TEMP[7].x, TEMP[7].xxxx 179: ELSE :0 180: MOV TEMP[7].x, TEMP[1].xxxx 181: ENDIF 182: MUL TEMP[1].x, IMM[2].zzzz, TEMP[6].xxxx 183: RCP TEMP[1].x, TEMP[1].xxxx 184: MUL TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx 185: MOV_SAT TEMP[1].x, TEMP[1].xxxx 186: ADD TEMP[1].x, IMM[2].xxxx, -TEMP[1].xxxx 187: DP3 TEMP[3].x, TEMP[8].xyzz, TEMP[3].xyzz 188: ADD TEMP[3].x, IMM[2].wwww, TEMP[3].xxxx 189: MUL TEMP[3].x, TEMP[3].xxxx, IMM[4].xxxx 190: MOV_SAT TEMP[3].x, TEMP[3].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 192: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[4].xxxx 193: ADD TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx 194: RCP TEMP[1].x, TEMP[1].xxxx 195: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx 196: MOV TEMP[1].y, TEMP[0].yyyy 197: MOV TEMP[1].z, TEMP[0].zzzz 198: MOV TEMP[1].w, TEMP[0].wwww 199: MOV OUT[0], TEMP[1] 200: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 1 %39 = extractelement <4 x float> %37, i32 2 %40 = extractelement <4 x float> %37, i32 3 %41 = fmul float %38, 0x3FEFE02000000000 %42 = fmul float %39, 0x3F6FE01F80000000 %43 = fadd float %41, %42 %44 = fmul float %40, 2.550000e+02 %45 = fmul float %44, 6.250000e-02 %46 = call float @floor(float %45) %47 = fmul float %46, 1.600000e+01 %48 = fsub float %44, %47 %49 = fadd float %46, -8.000000e+00 %50 = fadd float %48, -8.000000e+00 %51 = fmul float %49, 0x3FC24924A0000000 %52 = fmul float %50, 0x3FC24924A0000000 %53 = fmul float %51, %51 %54 = fsub float 1.000000e+00, %53 %55 = fmul float %52, %52 %56 = fsub float %54, %55 %57 = call float @llvm.maxnum.f32(float %56, float 0.000000e+00) %58 = call float @llvm.sqrt.f32(float %57) %59 = fmul float %24, -2.000000e+00 %60 = fadd float %59, %30 %61 = bitcast float %29 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fmul float %69, 0x3FEFE02000000000 %73 = fmul float %70, 0x3F6FE01F80000000 %74 = fadd float %72, %73 %75 = fsub float %74, %43 %76 = fmul float %71, 2.550000e+02 %77 = fmul float %76, 6.250000e-02 %78 = call float @floor(float %77) %79 = fmul float %78, 1.600000e+01 %80 = fsub float %76, %79 %81 = fadd float %78, -8.000000e+00 %82 = fadd float %80, -8.000000e+00 %83 = fmul float %81, 0x3FC24924A0000000 %84 = fmul float %82, 0x3FC24924A0000000 %85 = fmul float %83, %83 %86 = fsub float 1.000000e+00, %85 %87 = fmul float %84, %84 %88 = fsub float %86, %87 %89 = call float @llvm.maxnum.f32(float %88, float 0.000000e+00) %90 = call float @llvm.sqrt.f32(float %89) %91 = fsub float -0.000000e+00, %75 %92 = fcmp olt float %75, 0.000000e+00 %. = select i1 %92, float %91, float %75 %93 = fmul float %74, 0x3FA1EB8520000000 %94 = fdiv float 1.000000e+00, %93 %95 = fmul float %., %94 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fsub float 1.000000e+00, %96 %98 = fmul float %83, %51 %99 = fmul float %84, %52 %100 = fadd float %99, %98 %101 = fmul float %90, %58 %102 = fadd float %100, %101 %103 = fadd float %102, 0xBFE6666660000000 %104 = fmul float %103, 0x400AAAAAA0000000 %105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %106 = fmul float %97, %105 %107 = fmul float %106, %68 %108 = fsub float %30, %24 %109 = bitcast float %29 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %26 to <32 x i8> %114 = bitcast <4 x i32> %28 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = extractelement <4 x float> %115, i32 3 %120 = fmul float %117, 0x3FEFE02000000000 %121 = fmul float %118, 0x3F6FE01F80000000 %122 = fadd float %120, %121 %123 = fsub float %122, %43 %124 = fmul float %119, 2.550000e+02 %125 = fmul float %124, 6.250000e-02 %126 = call float @floor(float %125) %127 = fmul float %126, 1.600000e+01 %128 = fsub float %124, %127 %129 = fadd float %126, -8.000000e+00 %130 = fadd float %128, -8.000000e+00 %131 = fmul float %129, 0x3FC24924A0000000 %132 = fmul float %130, 0x3FC24924A0000000 %133 = fmul float %131, %131 %134 = fsub float 1.000000e+00, %133 %135 = fmul float %132, %132 %136 = fsub float %134, %135 %137 = call float @llvm.maxnum.f32(float %136, float 0.000000e+00) %138 = call float @llvm.sqrt.f32(float %137) %139 = fsub float -0.000000e+00, %123 %140 = fcmp olt float %123, 0.000000e+00 %temp36.0 = select i1 %140, float %139, float %123 %141 = fmul float %122, 0x3FA1EB8520000000 %142 = fdiv float 1.000000e+00, %141 %143 = fmul float %temp36.0, %142 %144 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %145 = fsub float 1.000000e+00, %144 %146 = fmul float %131, %51 %147 = fmul float %132, %52 %148 = fadd float %147, %146 %149 = fmul float %138, %58 %150 = fadd float %148, %149 %151 = fadd float %150, 0xBFE6666660000000 %152 = fmul float %151, 0x400AAAAAA0000000 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fmul float %145, %153 %155 = fadd float %106, %154 %156 = fadd float %155, 1.000000e+00 %157 = fmul float %154, %116 %158 = fadd float %157, %107 %159 = bitcast float %29 to i32 %160 = bitcast float %30 to i32 %161 = insertelement <2 x i32> undef, i32 %159, i32 0 %162 = insertelement <2 x i32> %161, i32 %160, i32 1 %163 = bitcast <8 x i32> %26 to <32 x i8> %164 = bitcast <4 x i32> %28 to <16 x i8> %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = fadd float %158, %166 %168 = fadd float %29, 0.000000e+00 %169 = fadd float %24, %30 %170 = bitcast float %168 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %26 to <32 x i8> %175 = bitcast <4 x i32> %28 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = extractelement <4 x float> %176, i32 3 %181 = fmul float %178, 0x3FEFE02000000000 %182 = fmul float %179, 0x3F6FE01F80000000 %183 = fadd float %181, %182 %184 = fsub float %183, %43 %185 = fmul float %180, 2.550000e+02 %186 = fmul float %185, 6.250000e-02 %187 = call float @floor(float %186) %188 = fmul float %187, 1.600000e+01 %189 = fsub float %185, %188 %190 = fadd float %187, -8.000000e+00 %191 = fadd float %189, -8.000000e+00 %192 = fmul float %190, 0x3FC24924A0000000 %193 = fmul float %191, 0x3FC24924A0000000 %194 = fmul float %192, %192 %195 = fsub float 1.000000e+00, %194 %196 = fmul float %193, %193 %197 = fsub float %195, %196 %198 = call float @llvm.maxnum.f32(float %197, float 0.000000e+00) %199 = call float @llvm.sqrt.f32(float %198) %200 = fsub float -0.000000e+00, %184 %201 = fcmp olt float %184, 0.000000e+00 %.57 = select i1 %201, float %200, float %184 %202 = fmul float %183, 0x3FA1EB8520000000 %203 = fdiv float 1.000000e+00, %202 %204 = fmul float %.57, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fsub float 1.000000e+00, %205 %207 = fmul float %192, %51 %208 = fmul float %193, %52 %209 = fadd float %208, %207 %210 = fmul float %199, %58 %211 = fadd float %209, %210 %212 = fadd float %211, 0xBFE6666660000000 %213 = fmul float %212, 0x400AAAAAA0000000 %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %206, %214 %216 = fadd float %156, %215 %217 = fmul float %215, %177 %218 = fadd float %217, %167 %219 = fadd float %29, 0.000000e+00 %220 = fmul float %24, 2.000000e+00 %221 = fadd float %220, %30 %222 = bitcast float %219 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %26 to <32 x i8> %227 = bitcast <4 x i32> %28 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fmul float %230, 0x3FEFE02000000000 %234 = fmul float %231, 0x3F6FE01F80000000 %235 = fadd float %233, %234 %236 = fsub float %235, %43 %237 = fmul float %232, 2.550000e+02 %238 = fmul float %237, 6.250000e-02 %239 = call float @floor(float %238) %240 = fmul float %239, 1.600000e+01 %241 = fsub float %237, %240 %242 = fadd float %239, -8.000000e+00 %243 = fadd float %241, -8.000000e+00 %244 = fmul float %242, 0x3FC24924A0000000 %245 = fmul float %243, 0x3FC24924A0000000 %246 = fmul float %244, %244 %247 = fsub float 1.000000e+00, %246 %248 = fmul float %245, %245 %249 = fsub float %247, %248 %250 = call float @llvm.maxnum.f32(float %249, float 0.000000e+00) %251 = call float @llvm.sqrt.f32(float %250) %252 = fsub float -0.000000e+00, %236 %253 = fcmp olt float %236, 0.000000e+00 %temp28.1 = select i1 %253, float %252, float %236 %254 = fmul float %235, 0x3FA1EB8520000000 %255 = fdiv float 1.000000e+00, %254 %256 = fmul float %temp28.1, %255 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fsub float 1.000000e+00, %257 %259 = fmul float %244, %51 %260 = fmul float %245, %52 %261 = fadd float %260, %259 %262 = fmul float %251, %58 %263 = fadd float %261, %262 %264 = fadd float %263, 0xBFE6666660000000 %265 = fmul float %264, 0x400AAAAAA0000000 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %258, %266 %268 = fmul float %267, %229 %269 = fadd float %268, %218 %270 = fadd float %216, %267 %271 = fdiv float 1.000000e+00, %270 %272 = fmul float %269, %271 %273 = call i32 @llvm.SI.packf16(float %272, float %38) %274 = bitcast i32 %273 to float %275 = call i32 @llvm.SI.packf16(float %39, float %40) %276 = bitcast i32 %275 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %274, float %276, float %274, float %276) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v0, v2 ; 7E000302 v_mov_b32_e32 v1, v3 ; 7E020303 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, -2.0, s8, v3 ; D2820001 040C10F5 v_mov_b32_e32 v4, 0x3b7f00fc ; 7E0802FF 3B7F00FC v_mov_b32_e32 v5, 0x3f7f0100 ; 7E0A02FF 3F7F0100 image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030602 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v4, v8 ; 10141104 v_mac_f32_e32 v10, v5, v7 ; 3E140F05 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[0:3] ; F0800F00 00030B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v13 ; 10001B04 v_mac_f32_e32 v0, v5, v12 ; 3E001905 v_mad_f32 v1, v13, v4, -v10 ; D2820001 842A090D v_mac_f32_e32 v1, v5, v12 ; 3E021905 v_mov_b32_e32 v12, 0x3d0f5c29 ; 7E1802FF 3D0F5C29 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_rcp_f32_e32 v0, v0 ; 7E005500 v_mov_b32_e32 v13, 0x80000000 ; 7E1A02FF 80000000 v_xor_b32_e32 v15, v1, v13 ; 3A1E1B01 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e32 v1, v1, v15 ; 00021F01 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mov_b32_e32 v1, 0x437f0000 ; 7E0202FF 437F0000 v_mul_f32_e32 v15, v1, v9 ; 101E1301 v_mov_b32_e32 v16, 0x3d800000 ; 7E2002FF 3D800000 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_floor_f32_e32 v15, v15 ; 7E1E490F v_mov_b32_e32 v17, 0xc1800000 ; 7E2202FF C1800000 v_mul_f32_e32 v18, v17, v15 ; 10241F11 v_mac_f32_e32 v18, v1, v9 ; 3E241301 v_mov_b32_e32 v19, 0xc1000000 ; 7E2602FF C1000000 v_add_f32_e32 v15, v19, v15 ; 061E1F13 v_add_f32_e32 v18, v19, v18 ; 06242513 v_mov_b32_e32 v20, 0x3e124925 ; 7E2802FF 3E124925 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_mul_f32_e32 v21, v1, v14 ; 102A1D01 v_mul_f32_e32 v21, v16, v21 ; 102A2B10 v_floor_f32_e32 v21, v21 ; 7E2A4915 v_mul_f32_e32 v22, v17, v21 ; 102C2B11 v_mac_f32_e32 v22, v1, v14 ; 3E2C1D01 v_add_f32_e32 v14, v19, v21 ; 061C2B13 v_add_f32_e32 v21, v19, v22 ; 062A2D13 v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_mul_f32_e32 v21, v20, v21 ; 102A2B14 v_mad_f32 v22, -v14, v14, 1.0 ; D2820016 23CA1D0E v_mad_f32 v22, -v21, v21, v22 ; D2820016 245A2B15 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_mac_f32_e32 v14, v18, v21 ; 3E1C2B12 v_mad_f32 v21, -v15, v15, 1.0 ; D2820015 23CA1F0F v_mad_f32 v21, -v18, v18, v21 ; D2820015 24562512 v_max_f32_e32 v21, 0, v21 ; 202A2A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_max_f32_e32 v22, 0, v22 ; 202C2C80 v_sqrt_f32_e32 v22, v22 ; 7E2C6716 v_mac_f32_e32 v14, v21, v22 ; 3E1C2D15 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_mov_b32_e32 v22, 0xbf333333 ; 7E2C02FF BF333333 v_add_f32_e32 v14, v22, v14 ; 061C1D16 v_mov_b32_e32 v23, 0x40555555 ; 7E2E02FF 40555555 v_mul_f32_e32 v14, v23, v14 ; 101C1D17 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mul_f32_e32 v24, v14, v0 ; 1030010E v_subrev_f32_e32 v25, s8, v3 ; 0A320608 v_mov_b32_e32 v26, v2 ; 7E340302 v_mov_b32_e32 v27, v3 ; 7E360303 v_mac_f32_e32 v6, v11, v24 ; 3E0C310B v_mov_b32_e32 v27, v25 ; 7E360319 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[12:19], s[0:3] ; F0800F00 0003181A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v4, v26 ; 10163504 v_mac_f32_e32 v11, v5, v25 ; 3E163305 v_mad_f32 v26, v26, v4, -v10 ; D282001A 842A091A v_mac_f32_e32 v26, v5, v25 ; 3E343305 v_mul_f32_e32 v11, v12, v11 ; 1016170C v_rcp_f32_e32 v11, v11 ; 7E16550B v_cmp_gt_f32_e32 vcc, 0, v26 ; 7C083480 v_xor_b32_e32 v25, v26, v13 ; 3A321B1A v_cndmask_b32_e32 v25, v26, v25 ; 0032331A v_mul_f32_e32 v11, v11, v25 ; 1016330B v_mul_f32_e32 v25, v1, v27 ; 10323701 v_mul_f32_e32 v25, v16, v25 ; 10323310 v_floor_f32_e32 v25, v25 ; 7E324919 v_mul_f32_e32 v26, v17, v25 ; 10343311 v_mac_f32_e32 v26, v1, v27 ; 3E343701 v_add_f32_e32 v25, v19, v25 ; 06323313 v_add_f32_e32 v26, v19, v26 ; 06343513 v_mul_f32_e32 v25, v20, v25 ; 10323314 v_mul_f32_e32 v26, v20, v26 ; 10343514 v_mad_f32 v27, -v25, v25, 1.0 ; D282001B 23CA3319 v_mad_f32 v27, -v26, v26, v27 ; D282001B 246E351A v_mul_f32_e32 v25, v15, v25 ; 1032330F v_mac_f32_e32 v25, v18, v26 ; 3E323512 v_max_f32_e32 v26, 0, v27 ; 20343680 v_sqrt_f32_e32 v26, v26 ; 7E34671A v_mac_f32_e32 v25, v21, v26 ; 3E323515 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_add_f32_e32 v25, v22, v25 ; 06323316 v_mul_f32_e32 v25, v23, v25 ; 10323317 v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 v_mul_f32_e32 v26, v25, v11 ; 10341719 v_mac_f32_e32 v6, v24, v26 ; 3E0C3518 v_mad_f32 v11, v11, v25, 1.0 ; D282000B 03CA330B v_mac_f32_e32 v11, v14, v0 ; 3E16010E v_add_f32_e32 v24, 0, v2 ; 06300480 v_add_f32_e32 v25, s8, v3 ; 06320608 v_mac_f32_e64 v3, 2.0, s8 ; D23E0003 000010F4 image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[12:19], s[0:3] ; F0800F00 00031A18 v_mov_b32_e32 v25, v3 ; 7E320303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v1, v29 ; 10003B01 v_mul_f32_e32 v0, v16, v0 ; 10000110 v_floor_f32_e32 v0, v0 ; 7E004900 v_mul_f32_e32 v2, v17, v0 ; 10040111 v_mac_f32_e32 v2, v1, v29 ; 3E043B01 v_add_f32_e32 v0, v19, v0 ; 06000113 v_add_f32_e32 v2, v19, v2 ; 06040513 v_mul_f32_e32 v0, v20, v0 ; 10000114 v_mul_f32_e32 v2, v20, v2 ; 10040514 v_mad_f32 v3, -v0, v0, 1.0 ; D2820003 23CA0100 v_mad_f32 v3, -v2, v2, v3 ; D2820003 240E0502 v_mul_f32_e32 v0, v15, v0 ; 1000010F v_mac_f32_e32 v0, v18, v2 ; 3E000512 v_max_f32_e32 v2, 0, v3 ; 20040680 v_sqrt_f32_e32 v2, v2 ; 7E046702 v_mac_f32_e32 v0, v21, v2 ; 3E000515 image_sample v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[12:19], s[0:3] ; F0800F00 00031D18 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v1, v32 ; 10044101 v_mul_f32_e32 v2, v16, v2 ; 10040510 v_floor_f32_e32 v2, v2 ; 7E044902 v_mul_f32_e32 v3, v17, v2 ; 10060511 v_mac_f32_e32 v3, v1, v32 ; 3E064101 v_add_f32_e32 v1, v19, v2 ; 06020513 v_add_f32_e32 v2, v19, v3 ; 06040713 v_mul_f32_e32 v1, v20, v1 ; 10020314 v_mul_f32_e32 v2, v20, v2 ; 10040514 v_mul_f32_e32 v3, v15, v1 ; 1006030F v_mac_f32_e32 v3, v18, v2 ; 3E060512 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_mad_f32 v1, -v2, v2, v1 ; D2820001 24060502 v_max_f32_e32 v1, 0, v1 ; 20020280 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v3, v21, v1 ; 3E060315 v_mul_f32_e32 v1, v4, v28 ; 10023904 v_mac_f32_e32 v1, v5, v27 ; 3E023705 v_mad_f32 v2, v28, v4, -v10 ; D2820002 842A091C v_mac_f32_e32 v2, v5, v27 ; 3E043705 v_mul_f32_e32 v1, v12, v1 ; 1002030C v_rcp_f32_e32 v1, v1 ; 7E025501 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_xor_b32_e32 v14, v2, v13 ; 3A1C1B02 v_cndmask_b32_e32 v2, v2, v14 ; 00041D02 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mad_f32 v2, v31, v4, -v10 ; D2820002 842A091F v_mac_f32_e32 v2, v5, v30 ; 3E043D05 v_xor_b32_e32 v10, v2, v13 ; 3A141B02 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e32 v2, v2, v10 ; 00041502 v_mul_f32_e32 v4, v4, v31 ; 10083F04 v_mac_f32_e32 v4, v5, v30 ; 3E083D05 v_mul_f32_e32 v4, v12, v4 ; 1008090C v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e32 v0, v22, v0 ; 06000116 v_mul_f32_e32 v0, v23, v0 ; 10000117 v_rcp_f32_e32 v4, v4 ; 7E085504 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v11, v0, v1 ; 3E160300 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_add_f32_e32 v2, v22, v3 ; 06040716 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v11, v2, v1 ; 3E160302 v_rcp_f32_e32 v3, v11 ; 7E06550B v_mac_f32_e32 v6, v26, v0 ; 3E0C011A v_mul_f32_e32 v0, v2, v1 ; 10000302 v_mac_f32_e32 v6, v29, v0 ; 3E0C011D v_mul_f32_e32 v0, v3, v6 ; 10000D03 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 v_cvt_pkrtz_f16_f32_e32 v1, v8, v9 ; 5E021308 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 36 Code Size: 972 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0], TEMP[0], SAMP[0], 2D 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <4 x i32> undef, i32 %28, i32 0 %31 = insertelement <4 x i32> %30, i32 %29, i32 1 %32 = insertelement <4 x i32> %31, i32 0, i32 2 %33 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v4, 0 ; 7E080280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0900F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], SHADOW2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..16], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, 2.0000, 0.5000} IMM[1] FLT32 { 1.0000, 255.0000, 0.0625, 16.0000} IMM[2] UINT32 {0, 16, 3, 304} IMM[3] FLT32 { -8.0000, 0.1429, 0.9961, 0.0039} IMM[4] FLT32 { 0.0010, 120.0000, -0.3000, 1.4286} IMM[5] FLT32 { 0.0000, 2.1000, 0.2060, 0.0749} IMM[6] UINT32 {4, 32, 44, 48} IMM[7] UINT32 {200, 196, 192, 204} IMM[8] UINT32 {216, 212, 208, 220} IMM[9] UINT32 {232, 228, 224, 236} IMM[10] UINT32 {248, 244, 240, 252} IMM[11] UINT32 {256, 76, 80, 92} IMM[12] UINT32 {96, 64, 0, 0} IMM[13] FLT32 { 0.1236, 0.2125, 0.7154, 0.0721} IMM[14] FLT32 { 0.4500, 1.8500, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0].xyz, TEMP[0], SAMP[2], 2D 3: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].yyyy 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].zzzz 5: MOV TEMP[1].xy, IN[0].xyyy 6: MOV TEMP[1].w, IMM[0].xxxx 7: TXL TEMP[1].xyz, TEMP[1], SAMP[3], 2D 8: MOV TEMP[2].xy, IN[0].xyyy 9: MOV TEMP[2].w, IMM[0].xxxx 10: TXL TEMP[2].x, TEMP[2], SAMP[4], 2D 11: MUL TEMP[3].xy, IN[0].xyyy, IMM[0].wwww 12: MUL TEMP[4].xy, CONST[1][0].xyyy, TEMP[3].xyyy 13: FRC TEMP[4].xy, TEMP[4].xyyy 14: ADD TEMP[4].xy, IMM[1].xxxx, -TEMP[4].xyyy 15: MUL TEMP[5].xy, CONST[1][1].xyyy, TEMP[4].xyyy 16: ADD TEMP[3].xy, TEMP[3].xyyy, -TEMP[5].xyyy 17: MOV TEMP[5].xy, TEMP[3].xyyy 18: TEX TEMP[5], TEMP[5], SAMP[1], 2D 19: MOV TEMP[6].y, IMM[0].xxxx 20: MOV TEMP[6].x, CONST[1][1].xxxx 21: ADD TEMP[6].xy, TEMP[6].xyyy, TEMP[3].xyyy 22: MOV TEMP[6].xy, TEMP[6].xyyy 23: TEX TEMP[6], TEMP[6], SAMP[1], 2D 24: MOV TEMP[7].x, IMM[0].xxxx 25: MOV TEMP[7].y, CONST[1][1].yyyy 26: ADD TEMP[7].xy, TEMP[7].xyyy, TEMP[3].xyyy 27: MOV TEMP[7].xy, TEMP[7].xyyy 28: TEX TEMP[7], TEMP[7], SAMP[1], 2D 29: ADD TEMP[3].xy, CONST[1][1].xyyy, TEMP[3].xyyy 30: MOV TEMP[3].xy, TEMP[3].xyyy 31: TEX TEMP[3], TEMP[3], SAMP[1], 2D 32: ADD TEMP[8].x, IMM[1].xxxx, -TEMP[4].xxxx 33: ADD TEMP[9].x, IMM[1].xxxx, -TEMP[4].yyyy 34: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[8].xxxx 35: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[4].xxxx 36: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[4].yyyy 37: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].yyyy 38: MOV TEMP[11].x, TEMP[10].xxxx 39: MOV TEMP[11].y, TEMP[9].xxxx 40: MOV TEMP[11].z, TEMP[8].xxxx 41: MOV TEMP[11].w, TEMP[4].xxxx 42: MUL TEMP[12].x, TEMP[5].wwww, IMM[1].yyyy 43: MUL TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz 44: FLR TEMP[13].x, TEMP[13].xxxx 45: MOV TEMP[14].x, TEMP[13].xxxx 46: MUL TEMP[13].x, TEMP[13].xxxx, IMM[1].wwww 47: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[13].xxxx 48: MOV TEMP[14].y, TEMP[12].xxxx 49: ADD TEMP[12].xy, TEMP[14].xyyy, IMM[3].xxxx 50: MUL TEMP[12].xy, TEMP[12].xyyy, IMM[3].yyyy 51: MOV TEMP[13].x, TEMP[12].xxxx 52: MOV TEMP[13].y, TEMP[12].yyyy 53: MUL TEMP[14].x, TEMP[12].xxxx, TEMP[12].xxxx 54: ADD TEMP[14].x, IMM[1].xxxx, -TEMP[14].xxxx 55: MUL TEMP[12].x, TEMP[12].yyyy, TEMP[12].yyyy 56: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 57: MAX TEMP[12].x, IMM[0].xxxx, TEMP[12].xxxx 58: SQRT TEMP[12].x, TEMP[12].xxxx 59: MOV TEMP[13].z, TEMP[12].xxxx 60: DP2 TEMP[12].x, TEMP[5].yzzz, IMM[3].zwww 61: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[2].xxxx 62: ADD TEMP[12].x, TEMP[12].xxxx, IMM[4].xxxx 63: MOV TEMP[14].x, -TEMP[12].xxxx 64: FSLT TEMP[15].x, TEMP[12].xxxx, IMM[0].xxxx 65: UIF TEMP[15].xxxx :0 66: MOV TEMP[14].x, TEMP[14].xxxx 67: ELSE :0 68: MOV TEMP[14].x, TEMP[12].xxxx 69: ENDIF 70: MUL TEMP[12].x, TEMP[14].xxxx, IMM[4].yyyy 71: MOV_SAT TEMP[12].x, TEMP[12].xxxx 72: ADD TEMP[12].x, IMM[1].xxxx, -TEMP[12].xxxx 73: DP3 TEMP[13].x, TEMP[13].xyzz, TEMP[0].xyzz 74: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].zzzz 75: MUL TEMP[13].x, TEMP[13].xxxx, IMM[4].wwww 76: MOV_SAT TEMP[13].x, TEMP[13].xxxx 77: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 78: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 79: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[5].xxxx 80: MUL TEMP[13].x, IMM[1].yyyy, TEMP[6].wwww 81: MUL TEMP[14].x, IMM[1].zzzz, TEMP[13].xxxx 82: FLR TEMP[14].x, TEMP[14].xxxx 83: MOV TEMP[15].x, TEMP[14].xxxx 84: MUL TEMP[14].x, IMM[1].wwww, TEMP[14].xxxx 85: ADD TEMP[13].x, TEMP[13].xxxx, -TEMP[14].xxxx 86: MOV TEMP[15].y, TEMP[13].xxxx 87: ADD TEMP[13].xy, IMM[3].xxxx, TEMP[15].xyyy 88: MUL TEMP[13].xy, TEMP[13].xyyy, IMM[3].yyyy 89: MOV TEMP[14].x, TEMP[13].xxxx 90: MOV TEMP[14].y, TEMP[13].yyyy 91: MUL TEMP[15].x, TEMP[13].xxxx, TEMP[13].xxxx 92: ADD TEMP[15].x, IMM[1].xxxx, -TEMP[15].xxxx 93: MUL TEMP[13].x, TEMP[13].yyyy, TEMP[13].yyyy 94: ADD TEMP[13].x, TEMP[15].xxxx, -TEMP[13].xxxx 95: MAX TEMP[13].x, IMM[0].xxxx, TEMP[13].xxxx 96: SQRT TEMP[13].x, TEMP[13].xxxx 97: MOV TEMP[14].z, TEMP[13].xxxx 98: DP2 TEMP[13].x, TEMP[6].yzzz, IMM[3].zwww 99: ADD TEMP[13].x, TEMP[13].xxxx, -TEMP[2].xxxx 100: ADD TEMP[13].x, IMM[4].xxxx, TEMP[13].xxxx 101: MOV TEMP[15].x, -TEMP[13].xxxx 102: FSLT TEMP[16].x, TEMP[13].xxxx, IMM[0].xxxx 103: UIF TEMP[16].xxxx :0 104: MOV TEMP[15].x, TEMP[15].xxxx 105: ELSE :0 106: MOV TEMP[15].x, TEMP[13].xxxx 107: ENDIF 108: MUL TEMP[13].x, IMM[4].yyyy, TEMP[15].xxxx 109: MOV_SAT TEMP[13].x, TEMP[13].xxxx 110: ADD TEMP[13].x, IMM[1].xxxx, -TEMP[13].xxxx 111: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[0].xyzz 112: ADD TEMP[14].x, IMM[4].zzzz, TEMP[14].xxxx 113: MUL TEMP[14].x, TEMP[14].xxxx, IMM[4].wwww 114: MOV_SAT TEMP[14].x, TEMP[14].xxxx 115: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx 116: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[13].xxxx 117: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 118: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[6].xxxx, TEMP[12].xxxx 119: MUL TEMP[12].x, IMM[1].yyyy, TEMP[7].wwww 120: MUL TEMP[13].x, IMM[1].zzzz, TEMP[12].xxxx 121: FLR TEMP[13].x, TEMP[13].xxxx 122: MOV TEMP[14].x, TEMP[13].xxxx 123: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 124: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[13].xxxx 125: MOV TEMP[14].y, TEMP[12].xxxx 126: ADD TEMP[12].xy, IMM[3].xxxx, TEMP[14].xyyy 127: MUL TEMP[12].xy, TEMP[12].xyyy, IMM[3].yyyy 128: MOV TEMP[13].x, TEMP[12].xxxx 129: MOV TEMP[13].y, TEMP[12].yyyy 130: MUL TEMP[14].x, TEMP[12].xxxx, TEMP[12].xxxx 131: ADD TEMP[14].x, IMM[1].xxxx, -TEMP[14].xxxx 132: MUL TEMP[12].x, TEMP[12].yyyy, TEMP[12].yyyy 133: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 134: MAX TEMP[12].x, IMM[0].xxxx, TEMP[12].xxxx 135: SQRT TEMP[12].x, TEMP[12].xxxx 136: MOV TEMP[13].z, TEMP[12].xxxx 137: DP2 TEMP[12].x, TEMP[7].yzzz, IMM[3].zwww 138: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[2].xxxx 139: ADD TEMP[12].x, IMM[4].xxxx, TEMP[12].xxxx 140: MOV TEMP[14].x, -TEMP[12].xxxx 141: FSLT TEMP[15].x, TEMP[12].xxxx, IMM[0].xxxx 142: UIF TEMP[15].xxxx :0 143: MOV TEMP[14].x, TEMP[14].xxxx 144: ELSE :0 145: MOV TEMP[14].x, TEMP[12].xxxx 146: ENDIF 147: MUL TEMP[12].x, IMM[4].yyyy, TEMP[14].xxxx 148: MOV_SAT TEMP[12].x, TEMP[12].xxxx 149: ADD TEMP[12].x, IMM[1].xxxx, -TEMP[12].xxxx 150: DP3 TEMP[13].x, TEMP[13].xyzz, TEMP[0].xyzz 151: ADD TEMP[13].x, IMM[4].zzzz, TEMP[13].xxxx 152: MUL TEMP[13].x, TEMP[13].xxxx, IMM[4].wwww 153: MOV_SAT TEMP[13].x, TEMP[13].xxxx 154: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 155: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[12].xxxx 156: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[8].xxxx 157: MAD TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx, TEMP[9].xxxx 158: MUL TEMP[9].x, IMM[1].yyyy, TEMP[3].wwww 159: MUL TEMP[12].x, IMM[1].zzzz, TEMP[9].xxxx 160: FLR TEMP[12].x, TEMP[12].xxxx 161: MOV TEMP[13].x, TEMP[12].xxxx 162: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 163: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[12].xxxx 164: MOV TEMP[13].y, TEMP[9].xxxx 165: ADD TEMP[9].xy, IMM[3].xxxx, TEMP[13].xyyy 166: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[3].yyyy 167: MOV TEMP[12].x, TEMP[9].xxxx 168: MOV TEMP[12].y, TEMP[9].yyyy 169: MUL TEMP[13].x, TEMP[9].xxxx, TEMP[9].xxxx 170: ADD TEMP[13].x, IMM[1].xxxx, -TEMP[13].xxxx 171: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 172: ADD TEMP[9].x, TEMP[13].xxxx, -TEMP[9].xxxx 173: MAX TEMP[9].x, IMM[0].xxxx, TEMP[9].xxxx 174: SQRT TEMP[9].x, TEMP[9].xxxx 175: MOV TEMP[12].z, TEMP[9].xxxx 176: DP2 TEMP[9].x, TEMP[3].yzzz, IMM[3].zwww 177: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[2].xxxx 178: ADD TEMP[9].x, IMM[4].xxxx, TEMP[9].xxxx 179: MOV TEMP[13].x, -TEMP[9].xxxx 180: FSLT TEMP[14].x, TEMP[9].xxxx, IMM[0].xxxx 181: UIF TEMP[14].xxxx :0 182: MOV TEMP[13].x, TEMP[13].xxxx 183: ELSE :0 184: MOV TEMP[13].x, TEMP[9].xxxx 185: ENDIF 186: MUL TEMP[9].x, IMM[4].yyyy, TEMP[13].xxxx 187: MOV_SAT TEMP[9].x, TEMP[9].xxxx 188: ADD TEMP[9].x, IMM[1].xxxx, -TEMP[9].xxxx 189: DP3 TEMP[12].x, TEMP[12].xyzz, TEMP[0].xyzz 190: ADD TEMP[12].x, IMM[4].zzzz, TEMP[12].xxxx 191: MUL TEMP[12].x, TEMP[12].xxxx, IMM[4].wwww 192: MOV_SAT TEMP[12].x, TEMP[12].xxxx 193: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 194: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[9].xxxx 195: ADD TEMP[9].x, TEMP[10].xxxx, TEMP[4].xxxx 196: MOV TEMP[5].x, TEMP[5].xxxx 197: MOV TEMP[5].y, TEMP[6].xxxx 198: MOV TEMP[5].z, TEMP[7].xxxx 199: MOV TEMP[5].w, TEMP[3].xxxx 200: DP4 TEMP[5].x, TEMP[11], TEMP[5] 201: MAD TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx, TEMP[8].xxxx 202: RCP TEMP[4].x, TEMP[9].xxxx 203: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 204: FSLT TEMP[4].x, TEMP[9].xxxx, IMM[5].xxxx 205: UIF TEMP[4].xxxx :0 206: MOV TEMP[4].x, TEMP[5].xxxx 207: ELSE :0 208: MOV TEMP[4].x, TEMP[3].xxxx 209: ENDIF 210: MAD TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx, CONST[4][19].xyzz 211: MOV TEMP[3].xy, IN[0].xyyy 212: MOV TEMP[3].w, IMM[0].xxxx 213: TXL TEMP[3].xyz, TEMP[3], SAMP[5], 2D 214: ADD TEMP[5].xyz, TEMP[2].xyzz, -CONST[4][19].xyzz 215: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 216: RSQ TEMP[6].x, TEMP[6].xxxx 217: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 218: MOV TEMP[5].xyz, -TEMP[5].xyzx 219: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[5].xyzz 220: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[0].xyzz 221: MUL TEMP[6].xyz, IMM[0].zzzz, TEMP[6].xyzz 222: ADD TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xyzz 223: DP3 TEMP[5].x, CONST[5][2].xyzz, TEMP[5].xyzz 224: MOV_SAT TEMP[5].x, TEMP[5].xxxx 225: POW TEMP[5].x, TEMP[5].xxxx, CONST[5][2].wwww 226: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 227: MAD TEMP[1].xyz, TEMP[5].xxxx, CONST[5][3].xyzz, TEMP[1].xyzz 228: MUL TEMP[6].x, CONST[5][12].xxxx, TEMP[2].xxxx 229: MAD TEMP[6].x, CONST[5][12].yyyy, TEMP[2].yyyy, TEMP[6].xxxx 230: MAD TEMP[6].x, CONST[5][12].zzzz, TEMP[2].zzzz, TEMP[6].xxxx 231: ADD TEMP[6].x, TEMP[6].xxxx, CONST[5][12].wwww 232: MUL TEMP[7].x, CONST[5][13].xxxx, TEMP[2].xxxx 233: MAD TEMP[7].x, CONST[5][13].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 234: MAD TEMP[7].x, CONST[5][13].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 235: ADD TEMP[7].x, TEMP[7].xxxx, CONST[5][13].wwww 236: MOV TEMP[6].y, TEMP[7].xxxx 237: MUL TEMP[7].x, CONST[5][14].xxxx, TEMP[2].xxxx 238: MAD TEMP[7].x, CONST[5][14].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 239: MAD TEMP[7].x, CONST[5][14].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 240: ADD TEMP[7].x, TEMP[7].xxxx, CONST[5][14].wwww 241: MOV TEMP[6].z, TEMP[7].xxxx 242: MUL TEMP[7].x, CONST[5][15].xxxx, TEMP[2].xxxx 243: MAD TEMP[7].x, CONST[5][15].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 244: MAD TEMP[2].x, CONST[5][15].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 245: ADD TEMP[2].x, TEMP[2].xxxx, CONST[5][15].wwww 246: RCP TEMP[2].xyz, TEMP[2].xxxx 247: MUL TEMP[2].xyz, TEMP[6].xyzz, TEMP[2].xyzz 248: MOV_SAT TEMP[6].x, -TEMP[2].zzzz 249: MOV TEMP[7].x, -CONST[5][16].xxxx 250: MOV TEMP[8].x, TEMP[7].xxxx 251: MOV TEMP[8].y, CONST[5][16].xxxx 252: MOV TEMP[9].x, CONST[5][16].xxxx 253: MOV TEMP[9].y, TEMP[7].xxxx 254: ADD TEMP[10].xy, TEMP[2].xyyy, CONST[5][16].xxxx 255: MOV TEMP[10].xy, TEMP[10].xyyy 256: MOV TEMP[10].z, TEMP[6].xxxx 257: MOV TEMP[10].w, IMM[0].xxxx 258: TXL TEMP[10].x, TEMP[10], SAMP[0], SHADOW2D 259: MOV TEMP[10].x, TEMP[10].xxxx 260: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[2].xyyy 261: MOV TEMP[8].xy, TEMP[8].xyyy 262: MOV TEMP[8].z, TEMP[6].xxxx 263: MOV TEMP[8].w, IMM[0].xxxx 264: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 265: MOV TEMP[10].y, TEMP[8].xxxx 266: ADD TEMP[8].xy, TEMP[2].xyyy, TEMP[9].xyyy 267: MOV TEMP[8].xy, TEMP[8].xyyy 268: MOV TEMP[8].z, TEMP[6].xxxx 269: MOV TEMP[8].w, IMM[0].xxxx 270: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 271: MOV TEMP[10].z, TEMP[8].xxxx 272: ADD TEMP[8].xy, TEMP[2].xyyy, TEMP[7].xxxx 273: MOV TEMP[8].xy, TEMP[8].xyyy 274: MOV TEMP[8].z, TEMP[6].xxxx 275: MOV TEMP[8].w, IMM[0].xxxx 276: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 277: MOV TEMP[10].w, TEMP[8].xxxx 278: MOV TEMP[8].y, IMM[0].xxxx 279: MOV TEMP[8].x, CONST[5][16].xxxx 280: MOV TEMP[9].y, IMM[0].xxxx 281: MOV TEMP[9].x, TEMP[7].xxxx 282: MOV TEMP[11].x, IMM[0].xxxx 283: MOV TEMP[11].y, TEMP[7].xxxx 284: MOV TEMP[7].x, IMM[0].xxxx 285: MOV TEMP[7].y, CONST[5][16].xxxx 286: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[2].xyyy 287: MOV TEMP[8].xy, TEMP[8].xyyy 288: MOV TEMP[8].z, TEMP[6].xxxx 289: MOV TEMP[8].w, IMM[0].xxxx 290: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 291: MOV TEMP[8].x, TEMP[8].xxxx 292: ADD TEMP[9].xy, TEMP[9].xyyy, TEMP[2].xyyy 293: MOV TEMP[9].xy, TEMP[9].xyyy 294: MOV TEMP[9].z, TEMP[6].xxxx 295: MOV TEMP[9].w, IMM[0].xxxx 296: TXL TEMP[9].x, TEMP[9], SAMP[0], SHADOW2D 297: MOV TEMP[8].y, TEMP[9].xxxx 298: ADD TEMP[9].xy, TEMP[11].xyyy, TEMP[2].xyyy 299: MOV TEMP[9].xy, TEMP[9].xyyy 300: MOV TEMP[9].z, TEMP[6].xxxx 301: MOV TEMP[9].w, IMM[0].xxxx 302: TXL TEMP[9].x, TEMP[9], SAMP[0], SHADOW2D 303: MOV TEMP[8].z, TEMP[9].xxxx 304: ADD TEMP[7].xy, TEMP[7].xyyy, TEMP[2].xyyy 305: MOV TEMP[7].xy, TEMP[7].xyyy 306: MOV TEMP[7].z, TEMP[6].xxxx 307: MOV TEMP[7].w, IMM[0].xxxx 308: TXL TEMP[7].x, TEMP[7], SAMP[0], SHADOW2D 309: MOV TEMP[8].w, TEMP[7].xxxx 310: ADD TEMP[4].x, IMM[1].xxxx, -TEMP[4].xxxx 311: POW TEMP[7].x, TEMP[4].xxxx, IMM[5].yyyy 312: MUL TEMP[9].xyz, CONST[5][4].wwww, CONST[5][5].xyzz 313: MUL TEMP[11].xyz, CONST[5][5].wwww, CONST[5][6].xyzz 314: MOV TEMP[2].xy, TEMP[2].xyyy 315: MOV TEMP[2].z, TEMP[6].xxxx 316: MOV TEMP[2].w, IMM[0].xxxx 317: TXL TEMP[2].x, TEMP[2], SAMP[0], SHADOW2D 318: DP4 TEMP[6].x, TEMP[10], IMM[5].wwww 319: DP4 TEMP[8].x, TEMP[8], IMM[13].xxxx 320: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 321: MAD TEMP[2].x, TEMP[2].xxxx, IMM[5].zzzz, TEMP[6].xxxx 322: DP3 TEMP[6].x, -CONST[5][0].xyzz, TEMP[0].xyzz 323: MOV_SAT TEMP[6].x, TEMP[6].xxxx 324: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx 325: ADD TEMP[6].x, IMM[1].xxxx, -TEMP[2].xxxx 326: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 327: DP3 TEMP[8].x, TEMP[11].xyzz, IMM[13].yzww 328: LRP TEMP[8].xyz, TEMP[3].zzzz, TEMP[8].xxxx, TEMP[11].xyzz 329: DP3 TEMP[0].x, CONST[5][4].xyzz, TEMP[0].xyzz 330: MOV_SAT TEMP[0].x, TEMP[0].xxxx 331: MUL TEMP[0].x, TEMP[7].xxxx, TEMP[0].xxxx 332: DP3 TEMP[7].x, TEMP[9].xyzz, IMM[13].yzww 333: LRP TEMP[7].xyz, TEMP[3].zzzz, TEMP[7].xxxx, TEMP[9].xyzz 334: POW TEMP[4].x, TEMP[4].xxxx, IMM[14].yyyy 335: LRP TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx, IMM[14].xxxx 336: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 337: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][1].xyzz 338: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 339: MAD TEMP[0].xyz, TEMP[6].xxxx, TEMP[8].xyzz, TEMP[0].xyzz 340: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 341: LRP TEMP[0].xyz, TEMP[3].yyyy, TEMP[1].xyzz, TEMP[0].xyzz 342: MOV TEMP[1].x, TEMP[0].xxxx 343: MOV TEMP[1].y, TEMP[0].yyyy 344: MOV TEMP[1].z, TEMP[0].zzzz 345: MOV_SAT TEMP[0].x, TEMP[5].xxxx 346: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].zzzz 347: MOV TEMP[1].w, TEMP[0].xxxx 348: MOV OUT[0], TEMP[1] 349: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 304) %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 308) %32 = call float @llvm.SI.load.const(<16 x i8> %29, i32 312) %33 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.load.const(<16 x i8> %34, i32 0) %36 = call float @llvm.SI.load.const(<16 x i8> %34, i32 4) %37 = call float @llvm.SI.load.const(<16 x i8> %34, i32 8) %38 = call float @llvm.SI.load.const(<16 x i8> %34, i32 16) %39 = call float @llvm.SI.load.const(<16 x i8> %34, i32 20) %40 = call float @llvm.SI.load.const(<16 x i8> %34, i32 24) %41 = call float @llvm.SI.load.const(<16 x i8> %34, i32 32) %42 = call float @llvm.SI.load.const(<16 x i8> %34, i32 36) %43 = call float @llvm.SI.load.const(<16 x i8> %34, i32 40) %44 = call float @llvm.SI.load.const(<16 x i8> %34, i32 44) %45 = call float @llvm.SI.load.const(<16 x i8> %34, i32 48) %46 = call float @llvm.SI.load.const(<16 x i8> %34, i32 52) %47 = call float @llvm.SI.load.const(<16 x i8> %34, i32 56) %48 = call float @llvm.SI.load.const(<16 x i8> %34, i32 64) %49 = call float @llvm.SI.load.const(<16 x i8> %34, i32 68) %50 = call float @llvm.SI.load.const(<16 x i8> %34, i32 72) %51 = call float @llvm.SI.load.const(<16 x i8> %34, i32 76) %52 = call float @llvm.SI.load.const(<16 x i8> %34, i32 80) %53 = call float @llvm.SI.load.const(<16 x i8> %34, i32 84) %54 = call float @llvm.SI.load.const(<16 x i8> %34, i32 88) %55 = call float @llvm.SI.load.const(<16 x i8> %34, i32 92) %56 = call float @llvm.SI.load.const(<16 x i8> %34, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %34, i32 100) %58 = call float @llvm.SI.load.const(<16 x i8> %34, i32 104) %59 = call float @llvm.SI.load.const(<16 x i8> %34, i32 192) %60 = call float @llvm.SI.load.const(<16 x i8> %34, i32 196) %61 = call float @llvm.SI.load.const(<16 x i8> %34, i32 200) %62 = call float @llvm.SI.load.const(<16 x i8> %34, i32 204) %63 = call float @llvm.SI.load.const(<16 x i8> %34, i32 208) %64 = call float @llvm.SI.load.const(<16 x i8> %34, i32 212) %65 = call float @llvm.SI.load.const(<16 x i8> %34, i32 216) %66 = call float @llvm.SI.load.const(<16 x i8> %34, i32 220) %67 = call float @llvm.SI.load.const(<16 x i8> %34, i32 224) %68 = call float @llvm.SI.load.const(<16 x i8> %34, i32 228) %69 = call float @llvm.SI.load.const(<16 x i8> %34, i32 232) %70 = call float @llvm.SI.load.const(<16 x i8> %34, i32 236) %71 = call float @llvm.SI.load.const(<16 x i8> %34, i32 240) %72 = call float @llvm.SI.load.const(<16 x i8> %34, i32 244) %73 = call float @llvm.SI.load.const(<16 x i8> %34, i32 248) %74 = call float @llvm.SI.load.const(<16 x i8> %34, i32 252) %75 = call float @llvm.SI.load.const(<16 x i8> %34, i32 256) %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 %84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %85 = bitcast <8 x i32> addrspace(2)* %84 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %88 = bitcast <4 x i32> addrspace(2)* %87 to <16 x i8> addrspace(2)* %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %91 = bitcast <8 x i32> addrspace(2)* %90 to <32 x i8> addrspace(2)* %92 = load <32 x i8>, <32 x i8> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %94 = bitcast <4 x i32> addrspace(2)* %93 to <16 x i8> addrspace(2)* %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %97 = bitcast <8 x i32> addrspace(2)* %96 to <32 x i8> addrspace(2)* %98 = load <32 x i8>, <32 x i8> addrspace(2)* %97, align 32, !tbaa !0 %99 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %100 = bitcast <4 x i32> addrspace(2)* %99 to <16 x i8> addrspace(2)* %101 = load <16 x i8>, <16 x i8> addrspace(2)* %100, align 16, !tbaa !0 %102 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %103 = bitcast <8 x i32> addrspace(2)* %102 to <32 x i8> addrspace(2)* %104 = load <32 x i8>, <32 x i8> addrspace(2)* %103, align 32, !tbaa !0 %105 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %106 = bitcast <4 x i32> addrspace(2)* %105 to <16 x i8> addrspace(2)* %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %113 = bitcast float %108 to i32 %114 = bitcast float %109 to i32 %115 = insertelement <4 x i32> undef, i32 %113, i32 0 %116 = insertelement <4 x i32> %115, i32 %114, i32 1 %117 = insertelement <4 x i32> %116, i32 0, i32 2 %118 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %117, <32 x i8> %86, <16 x i8> %89, i32 2) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fadd float %119, -5.000000e-01 %123 = fadd float %120, -5.000000e-01 %124 = fadd float %121, -5.000000e-01 %125 = fmul float %122, 2.000000e+00 %126 = fmul float %123, 2.000000e+00 %127 = fmul float %124, 2.000000e+00 %128 = bitcast float %108 to i32 %129 = bitcast float %109 to i32 %130 = insertelement <4 x i32> undef, i32 %128, i32 0 %131 = insertelement <4 x i32> %130, i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 0, i32 2 %133 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %132, <32 x i8> %92, <16 x i8> %95, i32 2) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = extractelement <4 x float> %133, i32 2 %137 = bitcast float %108 to i32 %138 = bitcast float %109 to i32 %139 = insertelement <4 x i32> undef, i32 %137, i32 0 %140 = insertelement <4 x i32> %139, i32 %138, i32 1 %141 = insertelement <4 x i32> %140, i32 0, i32 2 %142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %141, <32 x i8> %98, <16 x i8> %101, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = fmul float %108, 5.000000e-01 %145 = fmul float %109, 5.000000e-01 %146 = fmul float %24, %144 %147 = fmul float %25, %145 %148 = call float @llvm.AMDIL.fraction.(float %146) %149 = call float @llvm.AMDIL.fraction.(float %147) %150 = fsub float 1.000000e+00, %148 %151 = fsub float 1.000000e+00, %149 %152 = fmul float %26, %150 %153 = fmul float %27, %151 %154 = fsub float %144, %152 %155 = fsub float %145, %153 %156 = bitcast float %154 to i32 %157 = bitcast float %155 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = bitcast <8 x i32> %81 to <32 x i8> %161 = bitcast <4 x i32> %83 to <16 x i8> %162 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %160, <16 x i8> %161, i32 2) %163 = extractelement <4 x float> %162, i32 0 %164 = extractelement <4 x float> %162, i32 1 %165 = extractelement <4 x float> %162, i32 2 %166 = extractelement <4 x float> %162, i32 3 %167 = fadd float %26, %154 %168 = fadd float %155, 0.000000e+00 %169 = bitcast float %167 to i32 %170 = bitcast float %168 to i32 %171 = insertelement <2 x i32> undef, i32 %169, i32 0 %172 = insertelement <2 x i32> %171, i32 %170, i32 1 %173 = bitcast <8 x i32> %81 to <32 x i8> %174 = bitcast <4 x i32> %83 to <16 x i8> %175 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %172, <32 x i8> %173, <16 x i8> %174, i32 2) %176 = extractelement <4 x float> %175, i32 0 %177 = extractelement <4 x float> %175, i32 1 %178 = extractelement <4 x float> %175, i32 2 %179 = extractelement <4 x float> %175, i32 3 %180 = fadd float %154, 0.000000e+00 %181 = fadd float %27, %155 %182 = bitcast float %180 to i32 %183 = bitcast float %181 to i32 %184 = insertelement <2 x i32> undef, i32 %182, i32 0 %185 = insertelement <2 x i32> %184, i32 %183, i32 1 %186 = bitcast <8 x i32> %81 to <32 x i8> %187 = bitcast <4 x i32> %83 to <16 x i8> %188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %185, <32 x i8> %186, <16 x i8> %187, i32 2) %189 = extractelement <4 x float> %188, i32 0 %190 = extractelement <4 x float> %188, i32 1 %191 = extractelement <4 x float> %188, i32 2 %192 = extractelement <4 x float> %188, i32 3 %193 = fadd float %26, %154 %194 = fadd float %27, %155 %195 = bitcast float %193 to i32 %196 = bitcast float %194 to i32 %197 = insertelement <2 x i32> undef, i32 %195, i32 0 %198 = insertelement <2 x i32> %197, i32 %196, i32 1 %199 = bitcast <8 x i32> %81 to <32 x i8> %200 = bitcast <4 x i32> %83 to <16 x i8> %201 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %198, <32 x i8> %199, <16 x i8> %200, i32 2) %202 = extractelement <4 x float> %201, i32 0 %203 = extractelement <4 x float> %201, i32 1 %204 = extractelement <4 x float> %201, i32 2 %205 = extractelement <4 x float> %201, i32 3 %206 = fsub float 1.000000e+00, %150 %207 = fsub float 1.000000e+00, %151 %208 = fmul float %207, %206 %209 = fmul float %207, %150 %210 = fmul float %206, %151 %211 = fmul float %150, %151 %212 = fmul float %166, 2.550000e+02 %213 = fmul float %212, 6.250000e-02 %214 = call float @floor(float %213) %215 = fmul float %214, 1.600000e+01 %216 = fsub float %212, %215 %217 = fadd float %214, -8.000000e+00 %218 = fadd float %216, -8.000000e+00 %219 = fmul float %217, 0x3FC24924A0000000 %220 = fmul float %218, 0x3FC24924A0000000 %221 = fmul float %219, %219 %222 = fsub float 1.000000e+00, %221 %223 = fmul float %220, %220 %224 = fsub float %222, %223 %225 = call float @llvm.maxnum.f32(float %224, float 0.000000e+00) %226 = call float @llvm.sqrt.f32(float %225) %227 = fmul float %164, 0x3FEFE02000000000 %228 = fmul float %165, 0x3F6FE01F80000000 %229 = fadd float %227, %228 %230 = fsub float %229, %143 %231 = fadd float %230, 0x3F50000FA0000000 %232 = fsub float -0.000000e+00, %231 %233 = fcmp olt float %231, 0.000000e+00 %. = select i1 %233, float %232, float %231 %234 = fmul float %., 1.200000e+02 %235 = call float @llvm.AMDIL.clamp.(float %234, float 0.000000e+00, float 1.000000e+00) %236 = fsub float 1.000000e+00, %235 %237 = fmul float %219, %125 %238 = fmul float %220, %126 %239 = fadd float %238, %237 %240 = fmul float %226, %127 %241 = fadd float %239, %240 %242 = fadd float %241, 0xBFD3333340000000 %243 = fmul float %242, 0x3FF6DB6DC0000000 %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) %245 = fmul float %236, %244 %246 = fmul float %208, %245 %247 = fmul float %246, %163 %248 = fmul float %179, 2.550000e+02 %249 = fmul float %248, 6.250000e-02 %250 = call float @floor(float %249) %251 = fmul float %250, 1.600000e+01 %252 = fsub float %248, %251 %253 = fadd float %250, -8.000000e+00 %254 = fadd float %252, -8.000000e+00 %255 = fmul float %253, 0x3FC24924A0000000 %256 = fmul float %254, 0x3FC24924A0000000 %257 = fmul float %255, %255 %258 = fsub float 1.000000e+00, %257 %259 = fmul float %256, %256 %260 = fsub float %258, %259 %261 = call float @llvm.maxnum.f32(float %260, float 0.000000e+00) %262 = call float @llvm.sqrt.f32(float %261) %263 = fmul float %177, 0x3FEFE02000000000 %264 = fmul float %178, 0x3F6FE01F80000000 %265 = fadd float %263, %264 %266 = fsub float %265, %143 %267 = fadd float %266, 0x3F50000FA0000000 %268 = fsub float -0.000000e+00, %267 %269 = fcmp olt float %267, 0.000000e+00 %temp60.0 = select i1 %269, float %268, float %267 %270 = fmul float %temp60.0, 1.200000e+02 %271 = call float @llvm.AMDIL.clamp.(float %270, float 0.000000e+00, float 1.000000e+00) %272 = fsub float 1.000000e+00, %271 %273 = fmul float %255, %125 %274 = fmul float %256, %126 %275 = fadd float %274, %273 %276 = fmul float %262, %127 %277 = fadd float %275, %276 %278 = fadd float %277, 0xBFD3333340000000 %279 = fmul float %278, 0x3FF6DB6DC0000000 %280 = call float @llvm.AMDIL.clamp.(float %279, float 0.000000e+00, float 1.000000e+00) %281 = fmul float %272, %280 %282 = fmul float %209, %281 %283 = fadd float %246, %282 %284 = fmul float %282, %176 %285 = fadd float %284, %247 %286 = fmul float %192, 2.550000e+02 %287 = fmul float %286, 6.250000e-02 %288 = call float @floor(float %287) %289 = fmul float %288, 1.600000e+01 %290 = fsub float %286, %289 %291 = fadd float %288, -8.000000e+00 %292 = fadd float %290, -8.000000e+00 %293 = fmul float %291, 0x3FC24924A0000000 %294 = fmul float %292, 0x3FC24924A0000000 %295 = fmul float %293, %293 %296 = fsub float 1.000000e+00, %295 %297 = fmul float %294, %294 %298 = fsub float %296, %297 %299 = call float @llvm.maxnum.f32(float %298, float 0.000000e+00) %300 = call float @llvm.sqrt.f32(float %299) %301 = fmul float %190, 0x3FEFE02000000000 %302 = fmul float %191, 0x3F6FE01F80000000 %303 = fadd float %301, %302 %304 = fsub float %303, %143 %305 = fadd float %304, 0x3F50000FA0000000 %306 = fsub float -0.000000e+00, %305 %307 = fcmp olt float %305, 0.000000e+00 %.80 = select i1 %307, float %306, float %305 %308 = fmul float %.80, 1.200000e+02 %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00) %310 = fsub float 1.000000e+00, %309 %311 = fmul float %293, %125 %312 = fmul float %294, %126 %313 = fadd float %312, %311 %314 = fmul float %300, %127 %315 = fadd float %313, %314 %316 = fadd float %315, 0xBFD3333340000000 %317 = fmul float %316, 0x3FF6DB6DC0000000 %318 = call float @llvm.AMDIL.clamp.(float %317, float 0.000000e+00, float 1.000000e+00) %319 = fmul float %310, %318 %320 = fmul float %210, %319 %321 = fadd float %283, %320 %322 = fmul float %320, %189 %323 = fadd float %322, %285 %324 = fmul float %205, 2.550000e+02 %325 = fmul float %324, 6.250000e-02 %326 = call float @floor(float %325) %327 = fmul float %326, 1.600000e+01 %328 = fsub float %324, %327 %329 = fadd float %326, -8.000000e+00 %330 = fadd float %328, -8.000000e+00 %331 = fmul float %329, 0x3FC24924A0000000 %332 = fmul float %330, 0x3FC24924A0000000 %333 = fmul float %331, %331 %334 = fsub float 1.000000e+00, %333 %335 = fmul float %332, %332 %336 = fsub float %334, %335 %337 = call float @llvm.maxnum.f32(float %336, float 0.000000e+00) %338 = call float @llvm.sqrt.f32(float %337) %339 = fmul float %203, 0x3FEFE02000000000 %340 = fmul float %204, 0x3F6FE01F80000000 %341 = fadd float %339, %340 %342 = fsub float %341, %143 %343 = fadd float %342, 0x3F50000FA0000000 %344 = fsub float -0.000000e+00, %343 %345 = fcmp olt float %343, 0.000000e+00 %temp52.0 = select i1 %345, float %344, float %343 %346 = fmul float %temp52.0, 1.200000e+02 %347 = call float @llvm.AMDIL.clamp.(float %346, float 0.000000e+00, float 1.000000e+00) %348 = fsub float 1.000000e+00, %347 %349 = fmul float %331, %125 %350 = fmul float %332, %126 %351 = fadd float %350, %349 %352 = fmul float %338, %127 %353 = fadd float %351, %352 %354 = fadd float %353, 0xBFD3333340000000 %355 = fmul float %354, 0x3FF6DB6DC0000000 %356 = call float @llvm.AMDIL.clamp.(float %355, float 0.000000e+00, float 1.000000e+00) %357 = fmul float %348, %356 %358 = fmul float %211, %357 %359 = fadd float %321, %358 %360 = fmul float %208, %163 %361 = fmul float %209, %176 %362 = fadd float %360, %361 %363 = fmul float %210, %189 %364 = fadd float %362, %363 %365 = fmul float %211, %202 %366 = fadd float %364, %365 %367 = fmul float %358, %202 %368 = fadd float %367, %323 %369 = fdiv float 1.000000e+00, %359 %370 = fmul float %368, %369 %371 = fcmp olt float %359, 0x3E80C6F7A0000000 %.81 = select i1 %371, float %366, float %370 %372 = fmul float %110, %143 %373 = fadd float %372, %30 %374 = fmul float %111, %143 %375 = fadd float %374, %31 %376 = fmul float %112, %143 %377 = fadd float %376, %32 %378 = bitcast float %108 to i32 %379 = bitcast float %109 to i32 %380 = insertelement <4 x i32> undef, i32 %378, i32 0 %381 = insertelement <4 x i32> %380, i32 %379, i32 1 %382 = insertelement <4 x i32> %381, i32 0, i32 2 %383 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %104, <16 x i8> %107, i32 2) %384 = extractelement <4 x float> %383, i32 0 %385 = extractelement <4 x float> %383, i32 1 %386 = extractelement <4 x float> %383, i32 2 %387 = fsub float %373, %30 %388 = fsub float %375, %31 %389 = fsub float %377, %32 %390 = fmul float %387, %387 %391 = fmul float %388, %388 %392 = fadd float %391, %390 %393 = fmul float %389, %389 %394 = fadd float %392, %393 %395 = call float @llvm.AMDGPU.rsq.clamped.f32(float %394) %396 = fmul float %387, %395 %397 = fmul float %388, %395 %398 = fmul float %389, %395 %399 = fmul float %396, %125 %400 = fsub float -0.000000e+00, %399 %401 = fmul float %397, %126 %402 = fsub float %400, %401 %403 = fmul float %398, %127 %404 = fsub float %402, %403 %405 = fmul float %404, %125 %406 = fmul float %404, %126 %407 = fmul float %404, %127 %408 = fmul float %405, 2.000000e+00 %409 = fmul float %406, 2.000000e+00 %410 = fmul float %407, 2.000000e+00 %411 = fsub float -0.000000e+00, %408 %412 = fsub float %411, %396 %413 = fsub float -0.000000e+00, %409 %414 = fsub float %413, %397 %415 = fsub float -0.000000e+00, %410 %416 = fsub float %415, %398 %417 = fmul float %41, %412 %418 = fmul float %42, %414 %419 = fadd float %418, %417 %420 = fmul float %43, %416 %421 = fadd float %419, %420 %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = call float @llvm.pow.f32(float %422, float %44) %424 = fmul float %423, %384 %425 = fmul float %424, %45 %426 = fadd float %425, %134 %427 = fmul float %424, %46 %428 = fadd float %427, %135 %429 = fmul float %424, %47 %430 = fadd float %429, %136 %431 = fmul float %59, %373 %432 = fmul float %60, %375 %433 = fadd float %432, %431 %434 = fmul float %61, %377 %435 = fadd float %434, %433 %436 = fadd float %435, %62 %437 = fmul float %63, %373 %438 = fmul float %64, %375 %439 = fadd float %438, %437 %440 = fmul float %65, %377 %441 = fadd float %440, %439 %442 = fadd float %441, %66 %443 = fmul float %67, %373 %444 = fmul float %68, %375 %445 = fadd float %444, %443 %446 = fmul float %69, %377 %447 = fadd float %446, %445 %448 = fadd float %447, %70 %449 = fmul float %71, %373 %450 = fmul float %72, %375 %451 = fadd float %450, %449 %452 = fmul float %73, %377 %453 = fadd float %452, %451 %454 = fadd float %453, %74 %455 = fdiv float 1.000000e+00, %454 %456 = fmul float %436, %455 %457 = fmul float %442, %455 %458 = fmul float %448, %455 %459 = fsub float -0.000000e+00, %458 %460 = call float @llvm.AMDIL.clamp.(float %459, float 0.000000e+00, float 1.000000e+00) %461 = fadd float %456, %75 %462 = fadd float %457, %75 %463 = bitcast float %460 to i32 %464 = bitcast float %461 to i32 %465 = bitcast float %462 to i32 %466 = insertelement <4 x i32> undef, i32 %463, i32 0 %467 = insertelement <4 x i32> %466, i32 %464, i32 1 %468 = insertelement <4 x i32> %467, i32 %465, i32 2 %469 = insertelement <4 x i32> %468, i32 0, i32 3 %470 = bitcast <8 x i32> %77 to <32 x i8> %471 = bitcast <4 x i32> %79 to <16 x i8> %472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 7) %473 = extractelement <4 x float> %472, i32 0 %474 = fsub float %456, %75 %475 = fadd float %75, %457 %476 = bitcast float %460 to i32 %477 = bitcast float %474 to i32 %478 = bitcast float %475 to i32 %479 = insertelement <4 x i32> undef, i32 %476, i32 0 %480 = insertelement <4 x i32> %479, i32 %477, i32 1 %481 = insertelement <4 x i32> %480, i32 %478, i32 2 %482 = insertelement <4 x i32> %481, i32 0, i32 3 %483 = bitcast <8 x i32> %77 to <32 x i8> %484 = bitcast <4 x i32> %79 to <16 x i8> %485 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %482, <32 x i8> %483, <16 x i8> %484, i32 7) %486 = extractelement <4 x float> %485, i32 0 %487 = fadd float %456, %75 %488 = fsub float %457, %75 %489 = bitcast float %460 to i32 %490 = bitcast float %487 to i32 %491 = bitcast float %488 to i32 %492 = insertelement <4 x i32> undef, i32 %489, i32 0 %493 = insertelement <4 x i32> %492, i32 %490, i32 1 %494 = insertelement <4 x i32> %493, i32 %491, i32 2 %495 = insertelement <4 x i32> %494, i32 0, i32 3 %496 = bitcast <8 x i32> %77 to <32 x i8> %497 = bitcast <4 x i32> %79 to <16 x i8> %498 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 7) %499 = extractelement <4 x float> %498, i32 0 %500 = fsub float %456, %75 %501 = fsub float %457, %75 %502 = bitcast float %460 to i32 %503 = bitcast float %500 to i32 %504 = bitcast float %501 to i32 %505 = insertelement <4 x i32> undef, i32 %502, i32 0 %506 = insertelement <4 x i32> %505, i32 %503, i32 1 %507 = insertelement <4 x i32> %506, i32 %504, i32 2 %508 = insertelement <4 x i32> %507, i32 0, i32 3 %509 = bitcast <8 x i32> %77 to <32 x i8> %510 = bitcast <4 x i32> %79 to <16 x i8> %511 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 7) %512 = extractelement <4 x float> %511, i32 0 %513 = fadd float %75, %456 %514 = fadd float %457, 0.000000e+00 %515 = bitcast float %460 to i32 %516 = bitcast float %513 to i32 %517 = bitcast float %514 to i32 %518 = insertelement <4 x i32> undef, i32 %515, i32 0 %519 = insertelement <4 x i32> %518, i32 %516, i32 1 %520 = insertelement <4 x i32> %519, i32 %517, i32 2 %521 = insertelement <4 x i32> %520, i32 0, i32 3 %522 = bitcast <8 x i32> %77 to <32 x i8> %523 = bitcast <4 x i32> %79 to <16 x i8> %524 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %521, <32 x i8> %522, <16 x i8> %523, i32 7) %525 = extractelement <4 x float> %524, i32 0 %526 = fsub float %456, %75 %527 = fadd float %457, 0.000000e+00 %528 = bitcast float %460 to i32 %529 = bitcast float %526 to i32 %530 = bitcast float %527 to i32 %531 = insertelement <4 x i32> undef, i32 %528, i32 0 %532 = insertelement <4 x i32> %531, i32 %529, i32 1 %533 = insertelement <4 x i32> %532, i32 %530, i32 2 %534 = insertelement <4 x i32> %533, i32 0, i32 3 %535 = bitcast <8 x i32> %77 to <32 x i8> %536 = bitcast <4 x i32> %79 to <16 x i8> %537 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %534, <32 x i8> %535, <16 x i8> %536, i32 7) %538 = extractelement <4 x float> %537, i32 0 %539 = fadd float %456, 0.000000e+00 %540 = fsub float %457, %75 %541 = bitcast float %460 to i32 %542 = bitcast float %539 to i32 %543 = bitcast float %540 to i32 %544 = insertelement <4 x i32> undef, i32 %541, i32 0 %545 = insertelement <4 x i32> %544, i32 %542, i32 1 %546 = insertelement <4 x i32> %545, i32 %543, i32 2 %547 = insertelement <4 x i32> %546, i32 0, i32 3 %548 = bitcast <8 x i32> %77 to <32 x i8> %549 = bitcast <4 x i32> %79 to <16 x i8> %550 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 7) %551 = extractelement <4 x float> %550, i32 0 %552 = fadd float %456, 0.000000e+00 %553 = fadd float %75, %457 %554 = bitcast float %460 to i32 %555 = bitcast float %552 to i32 %556 = bitcast float %553 to i32 %557 = insertelement <4 x i32> undef, i32 %554, i32 0 %558 = insertelement <4 x i32> %557, i32 %555, i32 1 %559 = insertelement <4 x i32> %558, i32 %556, i32 2 %560 = insertelement <4 x i32> %559, i32 0, i32 3 %561 = bitcast <8 x i32> %77 to <32 x i8> %562 = bitcast <4 x i32> %79 to <16 x i8> %563 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %560, <32 x i8> %561, <16 x i8> %562, i32 7) %564 = extractelement <4 x float> %563, i32 0 %565 = fsub float 1.000000e+00, %.81 %566 = call float @llvm.pow.f32(float %565, float 0x4000CCCCC0000000) %567 = fmul float %51, %52 %568 = fmul float %51, %53 %569 = fmul float %51, %54 %570 = fmul float %55, %56 %571 = fmul float %55, %57 %572 = fmul float %55, %58 %573 = bitcast float %460 to i32 %574 = bitcast float %456 to i32 %575 = bitcast float %457 to i32 %576 = insertelement <4 x i32> undef, i32 %573, i32 0 %577 = insertelement <4 x i32> %576, i32 %574, i32 1 %578 = insertelement <4 x i32> %577, i32 %575, i32 2 %579 = insertelement <4 x i32> %578, i32 0, i32 3 %580 = bitcast <8 x i32> %77 to <32 x i8> %581 = bitcast <4 x i32> %79 to <16 x i8> %582 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %579, <32 x i8> %580, <16 x i8> %581, i32 7) %583 = extractelement <4 x float> %582, i32 0 %584 = fmul float %473, 0x3FB32D10E0000000 %585 = fmul float %486, 0x3FB32D10E0000000 %586 = fadd float %584, %585 %587 = fmul float %499, 0x3FB32D10E0000000 %588 = fadd float %586, %587 %589 = fmul float %512, 0x3FB32D10E0000000 %590 = fadd float %588, %589 %591 = fmul float %525, 0x3FBFA3FCC0000000 %592 = fmul float %538, 0x3FBFA3FCC0000000 %593 = fadd float %591, %592 %594 = fmul float %551, 0x3FBFA3FCC0000000 %595 = fadd float %593, %594 %596 = fmul float %564, 0x3FBFA3FCC0000000 %597 = fadd float %595, %596 %598 = fadd float %590, %597 %599 = fmul float %583, 0x3FCA5DFA80000000 %600 = fadd float %599, %598 %601 = fmul float %35, %125 %602 = fsub float -0.000000e+00, %601 %603 = fmul float %36, %126 %604 = fsub float %602, %603 %605 = fmul float %37, %127 %606 = fsub float %604, %605 %607 = call float @llvm.AMDIL.clamp.(float %606, float 0.000000e+00, float 1.000000e+00) %608 = fmul float %600, %607 %609 = fsub float 1.000000e+00, %608 %610 = fmul float %609, %566 %611 = fmul float %570, 0x3FCB333340000000 %612 = fmul float %571, 0x3FE6E48E80000000 %613 = fadd float %612, %611 %614 = fmul float %572, 0x3FB2752540000000 %615 = fadd float %613, %614 %616 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %570) %617 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %571) %618 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %572) %619 = fmul float %48, %125 %620 = fmul float %49, %126 %621 = fadd float %620, %619 %622 = fmul float %50, %127 %623 = fadd float %621, %622 %624 = call float @llvm.AMDIL.clamp.(float %623, float 0.000000e+00, float 1.000000e+00) %625 = fmul float %566, %624 %626 = fmul float %567, 0x3FCB333340000000 %627 = fmul float %568, 0x3FE6E48E80000000 %628 = fadd float %627, %626 %629 = fmul float %569, 0x3FB2752540000000 %630 = fadd float %628, %629 %631 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %567) %632 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %568) %633 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %569) %634 = call float @llvm.pow.f32(float %565, float 0x3FFD9999A0000000) %635 = call float @llvm.AMDGPU.lrp(float %634, float 1.000000e+00, float 0x3FDCCCCCC0000000) %636 = fmul float %608, %635 %637 = fmul float %636, %38 %638 = fmul float %636, %39 %639 = fmul float %636, %40 %640 = fmul float %625, %631 %641 = fadd float %640, %637 %642 = fmul float %625, %632 %643 = fadd float %642, %638 %644 = fmul float %625, %633 %645 = fadd float %644, %639 %646 = fmul float %610, %616 %647 = fadd float %646, %641 %648 = fmul float %610, %617 %649 = fadd float %648, %643 %650 = fmul float %610, %618 %651 = fadd float %650, %645 %652 = fmul float %426, %647 %653 = fmul float %428, %649 %654 = fmul float %430, %651 %655 = call float @llvm.AMDGPU.lrp(float %385, float %426, float %652) %656 = call float @llvm.AMDGPU.lrp(float %385, float %428, float %653) %657 = call float @llvm.AMDGPU.lrp(float %385, float %430, float %654) %658 = call float @llvm.AMDIL.clamp.(float %424, float 0.000000e+00, float 1.000000e+00) %659 = fmul float %658, %386 %660 = call i32 @llvm.SI.packf16(float %655, float %656) %661 = bitcast i32 %660 to float %662 = call i32 @llvm.SI.packf16(float %657, float %659) %663 = bitcast i32 %662 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %661, float %663, float %661, float %663) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v16, 0x437f0000 ; 7E2002FF 437F0000 v_mov_b32_e32 v19, 0x3d800000 ; 7E2602FF 3D800000 v_mov_b32_e32 v18, 0xc1800000 ; 7E2402FF C1800000 v_mov_b32_e32 v17, 0xc1000000 ; 7E2202FF C1000000 v_mov_b32_e32 v15, 0x3e124925 ; 7E1E02FF 3E124925 v_mov_b32_e32 v13, 0x3b7f00fc ; 7E1A02FF 3B7F00FC v_mov_b32_e32 v14, 0x3f7f0100 ; 7E1C02FF 3F7F0100 v_mov_b32_e32 v11, 0x3a80007d ; 7E1602FF 3A80007D v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_mov_b32_e32 v10, 0x42f00000 ; 7E1402FF 42F00000 v_mov_b32_e32 v9, 0xbe99999a ; 7E1202FF BE99999A v_mov_b32_e32 v8, 0x3fb6db6e ; 7E1002FF 3FB6DB6E v_mov_b32_e32 v7, 0x340637bd ; 7E0E02FF 340637BD v_mov_b32_e32 v3, 0x3d996887 ; 7E0602FF 3D996887 v_mov_b32_e32 v2, 0x3dfd1fe6 ; 7E0402FF 3DFD1FE6 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[32:35], s[2:3], 0x10 ; C0900310 s_load_dwordx4 s[8:11], s[2:3], 0x14 ; C0840314 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 v_interp_p1_f32 v24, v0, 0, 0, [m0] ; C8600000 v_interp_p2_f32 v24, [v24], v1, 0, 0, [m0] ; C8610001 v_interp_p1_f32 v25, v0, 1, 0, [m0] ; C8640100 v_interp_p2_f32 v25, [v25], v1, 1, 0, [m0] ; C8650101 v_mov_b32_e32 v26, 0 ; 7E340280 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[12:15], 0x0 ; C20E0D00 s_buffer_load_dword s29, s[12:15], 0x1 ; C20E8D01 s_buffer_load_dword s30, s[12:15], 0x4 ; C20F0D04 s_buffer_load_dword s31, s[12:15], 0x5 ; C20F8D05 s_buffer_load_dword s5, s[32:35], 0x4c ; C202A14C s_buffer_load_dword s12, s[32:35], 0x4d ; C206214D s_buffer_load_dword s13, s[32:35], 0x4e ; C206A14E s_buffer_load_dword s3, s[8:11], 0x0 ; C2018900 s_buffer_load_dword s4, s[8:11], 0x1 ; C2020901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 s_buffer_load_dword s0, s[8:11], 0x4 ; C2000904 s_buffer_load_dword s1, s[8:11], 0x5 ; C2008905 v_interp_p1_f32 v20, v0, 0, 1, [m0] ; C8500400 v_interp_p2_f32 v20, [v20], v1, 0, 1, [m0] ; C8510401 v_interp_p1_f32 v21, v0, 1, 1, [m0] ; C8540500 v_interp_p2_f32 v21, [v21], v1, 1, 1, [m0] ; C8550501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_buffer_load_dword s14, s[8:11], 0x31 ; C2070931 s_buffer_load_dword s15, s[8:11], 0x32 ; C2078932 v_mul_f32_e32 v1, 0.5, v24 ; 100230F0 v_mul_f32_e32 v4, 0.5, v25 ; 100832F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v1 ; 100A021C v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v1, s28, v1, -v5 ; D2820001 8416021C v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 s_buffer_load_dword s28, s[8:11], 0x33 ; C20E0933 v_mul_f32_e32 v5, s29, v4 ; 100A081D v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v4, s29, v4, -v5 ; D2820004 8416081D v_sub_f32_e32 v22, 1.0, v4 ; 082C08F2 s_buffer_load_dword s29, s[8:11], 0x34 ; C20E8934 v_mul_f32_e32 v4, s30, v1 ; 1008021E v_mul_f32_e32 v5, s31, v22 ; 100A2C1F v_mad_f32 v27, 0.5, v24, -v4 ; D282001B 841230F0 v_mad_f32 v28, 0.5, v25, -v5 ; D282001C 841632F0 v_add_f32_e32 v29, 0, v27 ; 063A3680 v_add_f32_e32 v30, s31, v28 ; 063C381F s_buffer_load_dword s37, s[8:11], 0x38 ; C2128938 s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939 s_buffer_load_dword s31, s[8:11], 0x3a ; C20F893A s_buffer_load_dword s35, s[8:11], 0x3c ; C211893C s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D s_buffer_load_dword s32, s[8:11], 0x3e ; C210093E s_buffer_load_dword s33, s[8:11], 0x3f ; C210893F s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710 s_load_dwordx8 s[60:67], s[6:7], 0x18 ; C0DE0718 s_load_dwordx8 s[68:75], s[6:7], 0x20 ; C0E20720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[52:59], s[44:47] ; F0900700 016D1F18 image_sample_l v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[60:67], s[40:43] ; F0900700 014F0418 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 image_sample_l v34, 1, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[68:75], s[48:51] ; F0900100 01912218 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v20, v20, v34, s5 ; D2820014 00164514 v_mul_f32_e32 v23, s37, v20 ; 102E2825 v_mad_f32 v21, v21, v34, s12 ; D2820015 00324515 v_mac_f32_e32 v23, s36, v21 ; 3E2E2A24 v_mul_f32_e32 v35, s35, v20 ; 10462823 v_mac_f32_e32 v35, s34, v21 ; 3E462A22 v_add_f32_e32 v36, s30, v27 ; 0648361E v_add_f32_e32 v37, 0, v28 ; 064A3880 image_sample v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[40:47], s[24:27] ; F0800F00 00CA261B image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[40:47], s[24:27] ; F0800F00 00CA2A24 v_mov_b32_e32 v37, v30 ; 7E4A031E s_buffer_load_dword s30, s[8:11], 0x3b ; C20F093B image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[40:47], s[24:27] ; F0800F00 00CA1B1D image_sample v[46:49], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[40:47], s[24:27] ; F0800F00 00CA2E24 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 v_mad_f32 v0, v0, v34, s13 ; D2820000 00364500 v_mac_f32_e32 v35, s32, v0 ; 3E460020 v_add_f32_e32 v35, s33, v35 ; 06464621 v_rcp_f32_e32 v35, v35 ; 7E465523 s_buffer_load_dword s24, s[8:11], 0x30 ; C20C0930 v_mac_f32_e32 v23, s31, v0 ; 3E2E001F s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_add_f32_e32 v23, s30, v23 ; 062E2E1E v_mul_f32_e32 v23, v35, v23 ; 102E2F23 s_buffer_load_dword s25, s[8:11], 0x35 ; C20C8935 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[50:52], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[20:23] ; F0900700 00A93218 v_add_f32_e64 v23, 0, -v23 clamp ; D2060817 40022E80 s_buffer_load_dword s30, s[8:11], 0x36 ; C20F0936 s_buffer_load_dword s31, s[8:11], 0x37 ; C20F8937 s_buffer_load_dword s32, s[8:11], 0x40 ; C2100940 v_mul_f32_e32 v24, s24, v20 ; 10302818 v_mac_f32_e32 v24, s14, v21 ; 3E302A0E v_mac_f32_e32 v24, s15, v0 ; 3E30000F v_add_f32_e32 v36, s28, v24 ; 0648301C v_mul_f32_e32 v24, s29, v20 ; 1030281D s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v24, s25, v21 ; 3E302A19 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v24, s30, v0 ; 3E30001E v_add_f32_e32 v37, s31, v24 ; 064A301F v_mad_f32 v24, v36, v35, s32 ; D2820018 00824724 v_mad_f32 v25, v37, v35, s32 ; D2820019 00824725 v_mad_f32 v53, v36, v35, -s32 ; D2820035 80824724 v_mov_b32_e32 v54, v23 ; 7E6C0317 v_mov_b32_e32 v55, v24 ; 7E6E0318 v_mov_b32_e32 v56, v25 ; 7E700319 v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v55, v53 ; 7E6E0335 v_mad_f32 v53, v37, v35, -s32 ; D2820035 80824725 v_mov_b32_e32 v58, v23 ; 7E740317 v_mov_b32_e32 v59, v24 ; 7E760318 v_mov_b32_e32 v60, v25 ; 7E780319 v_mov_b32_e32 v61, v26 ; 7E7A031A v_mov_b32_e32 v56, v25 ; 7E700319 v_mov_b32_e32 v60, v53 ; 7E780335 v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v61, v26 ; 7E7A031A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_c_l v62, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[20:27], s[16:19] ; F0B00100 00853E17 image_sample_c_l v63, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[20:27], s[16:19] ; F0B00100 00853F36 v_mov_b32_e32 v56, v53 ; 7E700335 image_sample_c_l v58, 1, 0, 0, 0, 0, 0, 0, 0, v[58:61], s[20:27], s[16:19] ; F0B00100 00853A3A v_mov_b32_e32 v57, v26 ; 7E72031A image_sample_c_l v59, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[20:27], s[16:19] ; F0B00100 00853B36 v_mad_f32 v56, v37, v35, 0 ; D2820038 02024725 v_mov_b32_e32 v64, v23 ; 7E800317 v_mov_b32_e32 v65, v24 ; 7E820318 v_mov_b32_e32 v66, v25 ; 7E840319 v_mov_b32_e32 v67, v26 ; 7E86031A v_mov_b32_e32 v66, v56 ; 7E840338 v_mov_b32_e32 v67, v26 ; 7E86031A v_mov_b32_e32 v57, v26 ; 7E72031A v_mad_f32 v24, v36, v35, 0 ; D2820018 02024724 v_mov_b32_e32 v68, v23 ; 7E880317 v_mov_b32_e32 v69, v24 ; 7E8A0318 v_mov_b32_e32 v70, v25 ; 7E8C0319 v_mov_b32_e32 v71, v26 ; 7E8E031A image_sample_c_l v60, 1, 0, 0, 0, 0, 0, 0, 0, v[64:67], s[20:27], s[16:19] ; F0B00100 00853C40 v_mov_b32_e32 v70, v53 ; 7E8C0335 image_sample_c_l v53, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[20:27], s[16:19] ; F0B00100 00853536 v_mov_b32_e32 v71, v26 ; 7E8E031A image_sample_c_l v54, 1, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[20:27], s[16:19] ; F0B00100 00853644 v_mul_f32_e32 v36, v35, v36 ; 10484923 v_mul_f32_e32 v35, v35, v37 ; 10464B23 image_sample_c_l v37, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[20:27], s[16:19] ; F0B00100 00852517 v_mov_b32_e32 v24, v36 ; 7E300324 v_mov_b32_e32 v25, v35 ; 7E320323 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[20:27], s[16:19] ; F0B00100 00851717 v_add_f32_e32 v24, -0.5, v31 ; 06303EF1 v_add_f32_e32 v25, -0.5, v32 ; 063240F1 v_add_f32_e32 v26, -0.5, v33 ; 063442F1 v_mad_f32 v31, v40, v13, -v34 ; D282001F 848A1B28 v_mac_f32_e32 v31, v14, v39 ; 3E3E4F0E v_add_f32_e32 v31, v11, v31 ; 063E3F0B v_cmp_gt_f32_e32 vcc, 0, v31 ; 7C083E80 v_xor_b32_e32 v32, v31, v12 ; 3A40191F v_cndmask_b32_e32 v31, v31, v32 ; 003E411F v_add_f32_e32 v24, v24, v24 ; 06303118 v_add_f32_e32 v25, v25, v25 ; 06323319 v_mul_f32_e32 v32, v16, v41 ; 10405310 v_mul_f32_e32 v32, v19, v32 ; 10404113 v_floor_f32_e32 v32, v32 ; 7E404920 v_mul_f32_e32 v33, v18, v32 ; 10424112 v_mac_f32_e32 v33, v16, v41 ; 3E425310 v_add_f32_e32 v32, v17, v32 ; 06404111 v_add_f32_e32 v33, v17, v33 ; 06424311 v_mul_f32_e32 v32, v15, v32 ; 1040410F v_mul_f32_e32 v33, v15, v33 ; 1042430F v_mad_f32 v35, -v32, v32, 1.0 ; D2820023 23CA4120 v_mad_f32 v35, -v33, v33, v35 ; D2820023 248E4321 v_mul_f32_e32 v32, v24, v32 ; 10404118 v_mac_f32_e32 v32, v25, v33 ; 3E404319 v_add_f32_e32 v26, v26, v26 ; 0634351A v_max_f32_e32 v33, 0, v35 ; 20424680 v_sqrt_f32_e32 v33, v33 ; 7E426721 v_mac_f32_e32 v32, v26, v33 ; 3E40431A v_mul_f32_e32 v31, v10, v31 ; 103E3F0A v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 v_sub_f32_e32 v31, 1.0, v31 ; 083E3EF2 v_add_f32_e32 v32, v9, v32 ; 06404109 v_mul_f32_e32 v32, v8, v32 ; 10404108 v_add_f32_e64 v32, 0, v32 clamp ; D2060820 00024080 v_mul_f32_e32 v31, v32, v31 ; 103E3F20 v_mad_f32 v32, v44, v13, -v34 ; D2820020 848A1B2C v_mac_f32_e32 v32, v14, v43 ; 3E40570E v_add_f32_e32 v32, v11, v32 ; 0640410B v_cmp_gt_f32_e32 vcc, 0, v32 ; 7C084080 v_xor_b32_e32 v33, v32, v12 ; 3A421920 v_cndmask_b32_e32 v32, v32, v33 ; 00404320 v_mul_f32_e32 v33, v16, v45 ; 10425B10 v_mul_f32_e32 v33, v19, v33 ; 10424313 v_floor_f32_e32 v33, v33 ; 7E424921 v_mul_f32_e32 v35, v18, v33 ; 10464312 v_mac_f32_e32 v35, v16, v45 ; 3E465B10 v_add_f32_e32 v33, v17, v33 ; 06424311 v_add_f32_e32 v35, v17, v35 ; 06464711 v_mul_f32_e32 v33, v15, v33 ; 1042430F v_mul_f32_e32 v35, v15, v35 ; 1046470F v_mad_f32 v36, -v33, v33, 1.0 ; D2820024 23CA4321 v_mad_f32 v36, -v35, v35, v36 ; D2820024 24924723 v_mul_f32_e32 v33, v24, v33 ; 10424318 v_mac_f32_e32 v33, v25, v35 ; 3E424719 v_max_f32_e32 v35, 0, v36 ; 20464880 v_sqrt_f32_e32 v35, v35 ; 7E466723 v_mac_f32_e32 v33, v26, v35 ; 3E42471A v_mul_f32_e32 v32, v10, v32 ; 1040410A v_add_f32_e64 v32, 0, v32 clamp ; D2060820 00024080 v_sub_f32_e32 v32, 1.0, v32 ; 084040F2 v_add_f32_e32 v33, v9, v33 ; 06424309 v_mul_f32_e32 v33, v8, v33 ; 10424308 v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 v_mul_f32_e32 v32, v33, v32 ; 10404121 v_sub_f32_e32 v33, 1.0, v1 ; 084202F2 v_sub_f32_e32 v35, 1.0, v22 ; 08462CF2 v_mul_f32_e32 v36, v33, v35 ; 10484721 v_mul_f32_e32 v35, v1, v35 ; 10464701 v_mul_f32_e32 v39, v31, v36 ; 104E491F v_mul_f32_e32 v39, v38, v39 ; 104E4F26 v_mul_f32_e32 v32, v32, v35 ; 10404720 v_mac_f32_e32 v39, v42, v32 ; 3E4E412A v_mul_f32_e32 v35, v42, v35 ; 1046472A v_mac_f32_e32 v35, v38, v36 ; 3E464926 v_mad_f32 v29, v29, v13, -v34 ; D282001D 848A1B1D v_mac_f32_e32 v29, v14, v28 ; 3E3A390E v_add_f32_e32 v28, v11, v29 ; 06383B0B v_cmp_gt_f32_e32 vcc, 0, v28 ; 7C083880 v_xor_b32_e32 v29, v28, v12 ; 3A3A191C v_cndmask_b32_e32 v28, v28, v29 ; 00383B1C v_mul_f32_e32 v29, v16, v30 ; 103A3D10 v_mul_f32_e32 v29, v19, v29 ; 103A3B13 v_floor_f32_e32 v29, v29 ; 7E3A491D v_mul_f32_e32 v38, v18, v29 ; 104C3B12 v_mac_f32_e32 v38, v16, v30 ; 3E4C3D10 v_add_f32_e32 v29, v17, v29 ; 063A3B11 v_add_f32_e32 v30, v17, v38 ; 063C4D11 v_mul_f32_e32 v29, v15, v29 ; 103A3B0F v_mul_f32_e32 v30, v15, v30 ; 103C3D0F v_mad_f32 v38, -v29, v29, 1.0 ; D2820026 23CA3B1D v_mad_f32 v38, -v30, v30, v38 ; D2820026 249A3D1E v_mul_f32_e32 v29, v24, v29 ; 103A3B18 v_mac_f32_e32 v29, v25, v30 ; 3E3A3D19 v_max_f32_e32 v30, 0, v38 ; 203C4C80 v_sqrt_f32_e32 v30, v30 ; 7E3C671E v_mac_f32_e32 v29, v26, v30 ; 3E3A3D1A v_mul_f32_e32 v28, v10, v28 ; 1038390A v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 v_sub_f32_e32 v28, 1.0, v28 ; 083838F2 v_add_f32_e32 v29, v9, v29 ; 063A3B09 v_mul_f32_e32 v29, v8, v29 ; 103A3B08 v_add_f32_e64 v29, 0, v29 clamp ; D206081D 00023A80 v_mul_f32_e32 v28, v29, v28 ; 1038391D v_mul_f32_e32 v29, v22, v33 ; 103A4316 v_mul_f32_e32 v30, v28, v29 ; 103C3B1C v_mac_f32_e32 v39, v27, v30 ; 3E4E3D1B v_mac_f32_e32 v35, v27, v29 ; 3E463B1B v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v22, v16, v49 ; 102C6310 v_mul_f32_e32 v19, v19, v22 ; 10262D13 v_floor_f32_e32 v19, v19 ; 7E264913 v_mul_f32_e32 v18, v18, v19 ; 10242712 v_mac_f32_e32 v18, v16, v49 ; 3E246310 v_add_f32_e32 v16, v17, v19 ; 06202711 v_add_f32_e32 v17, v17, v18 ; 06222511 v_mul_f32_e32 v16, v15, v16 ; 1020210F v_mul_f32_e32 v15, v15, v17 ; 101E230F v_mad_f32 v13, v48, v13, -v34 ; D282000D 848A1B30 v_mac_f32_e32 v13, v14, v47 ; 3E1A5F0E v_add_f32_e32 v11, v11, v13 ; 06161B0B v_xor_b32_e32 v12, v11, v12 ; 3A18190B v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e32 v11, v11, v12 ; 0016190B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mad_f32 v11, -v16, v16, 1.0 ; D282000B 23CA2110 v_mad_f32 v11, -v15, v15, v11 ; D282000B 242E1F0F v_mul_f32_e32 v12, v24, v16 ; 10182118 v_mac_f32_e32 v12, v25, v15 ; 3E181F19 v_max_f32_e32 v11, 0, v11 ; 20161680 v_sqrt_f32_e32 v11, v11 ; 7E16670B v_mac_f32_e32 v12, v26, v11 ; 3E18171A v_add_f32_e32 v9, v9, v12 ; 06121909 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_add_f32_e64 v9, 0, v10 clamp ; D2060809 00021480 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mac_f32_e32 v39, v46, v9 ; 3E4E132E v_mac_f32_e32 v35, v46, v1 ; 3E46032E v_mac_f32_e32 v32, v31, v36 ; 3E40491F v_subrev_f32_e32 v9, s5, v20 ; 0A122805 v_subrev_f32_e32 v10, s12, v21 ; 0A142A0C v_subrev_f32_e32 v0, s13, v0 ; 0A00000D v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_mac_f32_e32 v11, v0, v0 ; 3E160100 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mac_f32_e32 v32, v28, v29 ; 3E403B1C v_mac_f32_e32 v32, v8, v1 ; 3E400308 v_cmp_gt_f32_e32 vcc, v7, v32 ; 7C084107 v_mul_f32_e32 v1, v11, v9 ; 1002130B v_mul_f32_e32 v1, v24, v1 ; 10020318 v_mul_f32_e32 v7, v11, v10 ; 100E150B v_mad_f32 v1, -v7, v25, -v1 ; D2820001 A4063307 v_mul_f32_e32 v7, v11, v0 ; 100E010B v_mad_f32 v1, -v7, v26, v1 ; D2820001 24063507 v_mul_f32_e32 v7, v24, v1 ; 100E0318 v_mac_f32_e32 v7, v24, v1 ; 3E0E0318 v_mul_f32_e32 v8, v25, v1 ; 10100319 v_mac_f32_e32 v8, v25, v1 ; 3E100319 v_mul_f32_e32 v12, v26, v1 ; 1018031A v_mac_f32_e32 v12, v26, v1 ; 3E18031A s_buffer_load_dword s5, s[8:11], 0x8 ; C2028908 s_buffer_load_dword s6, s[8:11], 0x9 ; C2030909 s_buffer_load_dword s7, s[8:11], 0xa ; C203890A v_rcp_f32_e32 v1, v32 ; 7E025520 v_mad_f32 v7, -v9, v11, -v7 ; D2820007 A41E1709 v_mad_f32 v8, -v10, v11, -v8 ; D2820008 A422170A v_mad_f32 v0, -v0, v11, -v12 ; D2820000 A4321700 v_mul_f32_e32 v1, v1, v39 ; 10024F01 v_cndmask_b32_e32 v1, v1, v35 ; 00024701 s_buffer_load_dword s12, s[8:11], 0xb ; C206090B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v7, s5, v7 ; 100E0E05 v_mac_f32_e32 v7, s6, v8 ; 3E0E1006 v_mac_f32_e32 v7, s7, v0 ; 3E0E0007 s_buffer_load_dword s5, s[8:11], 0x6 ; C2028906 s_buffer_load_dword s6, s[8:11], 0x17 ; C2030917 s_buffer_load_dword s7, s[8:11], 0x18 ; C2038918 s_buffer_load_dword s13, s[8:11], 0x19 ; C2068919 s_buffer_load_dword s14, s[8:11], 0x1a ; C207091A s_buffer_load_dword s15, s[8:11], 0xc ; C207890C s_buffer_load_dword s16, s[8:11], 0xd ; C208090D s_buffer_load_dword s17, s[8:11], 0xe ; C208890E s_buffer_load_dword s18, s[8:11], 0x10 ; C2090910 s_buffer_load_dword s19, s[8:11], 0x11 ; C2098911 s_buffer_load_dword s20, s[8:11], 0x12 ; C20A0912 s_buffer_load_dword s21, s[8:11], 0x13 ; C20A8913 s_buffer_load_dword s22, s[8:11], 0x14 ; C20B0914 s_buffer_load_dword s23, s[8:11], 0x15 ; C20B8915 s_buffer_load_dword s8, s[8:11], 0x16 ; C2040916 v_mul_f32_e32 v0, v3, v63 ; 10007F03 v_mac_f32_e32 v0, v3, v62 ; 3E007D03 v_mac_f32_e32 v0, v3, v58 ; 3E007503 v_mac_f32_e32 v0, v3, v59 ; 3E007703 v_mul_f32_e32 v3, v2, v53 ; 10066B02 v_mac_f32_e32 v3, v2, v60 ; 3E067902 v_mac_f32_e32 v3, v2, v54 ; 3E066D02 v_mac_f32_e32 v3, v2, v37 ; 3E064B02 v_mul_f32_e32 v2, s3, v24 ; 10043003 v_mad_f32 v2, -s4, v25, -v2 ; D2820002 A40A3204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s18, v24 ; 10103012 v_mac_f32_e32 v8, s19, v25 ; 3E103213 v_mad_f32 v2, -s2, v26, v2 ; D2820002 240A3402 v_mac_f32_e32 v8, s20, v26 ; 3E103414 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_log_f32_e32 v1, v1 ; 7E024F01 v_mov_b32_e32 v9, s22 ; 7E120216 v_mul_f32_e32 v9, s21, v9 ; 10121215 v_mov_b32_e32 v10, s23 ; 7E140217 v_mul_f32_e32 v10, s21, v10 ; 10141415 v_mov_b32_e32 v11, s8 ; 7E160208 v_mul_f32_e32 v11, s21, v11 ; 10161615 v_add_f32_e32 v0, v3, v0 ; 06000103 v_madmk_f32_e32 v0, v23, v0, 0x3e52efd4 ; 40000117 3E52EFD4 v_mul_legacy_f32_e32 v3, 0x3feccccd, v1 ; 0E0602FF 3FECCCCD v_exp_f32_e32 v3, v3 ; 7E064B03 v_sub_f32_e32 v12, 1.0, v3 ; 081806F2 v_mul_f32_e32 v12, 0x3ee66666, v12 ; 101818FF 3EE66666 v_mac_f32_e32 v12, 1.0, v3 ; 3E1806F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v3, v2, v0 ; 10060102 v_mul_f32_e32 v3, v12, v3 ; 1006070C v_mul_f32_e32 v12, s0, v3 ; 10180600 v_mul_f32_e32 v13, s1, v3 ; 101A0601 v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mov_b32_e32 v14, 0x3e59999a ; 7E1C02FF 3E59999A v_mov_b32_e32 v15, 0x3f372474 ; 7E1E02FF 3F372474 v_mul_f32_e32 v16, v14, v9 ; 1020130E v_mac_f32_e32 v16, v15, v10 ; 3E20150F v_mov_b32_e32 v17, 0x3d93a92a ; 7E2202FF 3D93A92A v_mac_f32_e32 v16, v17, v11 ; 3E201711 v_sub_f32_e32 v18, 1.0, v52 ; 082468F2 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mac_f32_e32 v9, v16, v52 ; 3E126910 v_mul_f32_e32 v10, v10, v18 ; 1014250A v_mac_f32_e32 v10, v16, v52 ; 3E146910 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mac_f32_e32 v11, v16, v52 ; 3E166910 v_mul_legacy_f32_e32 v1, 0x40066666, v1 ; 0E0202FF 40066666 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v8, v8, v1 ; 10100308 v_mac_f32_e32 v12, v9, v8 ; 3E181109 v_mac_f32_e32 v13, v10, v8 ; 3E1A110A v_mac_f32_e32 v3, v11, v8 ; 3E06110B v_mad_f32 v0, -v0, v2, 1.0 ; D2820000 23CA0500 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mov_b32_e32 v1, s7 ; 7E020207 v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mov_b32_e32 v2, s13 ; 7E04020D v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_mac_f32_e32 v8, v15, v2 ; 3E10050F v_mov_b32_e32 v9, s14 ; 7E12020E v_mul_f32_e32 v9, s6, v9 ; 10121206 v_mac_f32_e32 v8, v17, v9 ; 3E101311 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mul_f32_e32 v1, v1, v18 ; 10022501 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mul_legacy_f32_e32 v7, s12, v7 ; 0E0E0E0C v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_mul_f32_e32 v7, v50, v7 ; 100E0F32 v_mac_f32_e32 v1, v8, v52 ; 3E026908 v_mac_f32_e32 v2, v8, v52 ; 3E046908 v_mac_f32_e32 v9, v8, v52 ; 3E126908 v_mad_f32 v4, s15, v7, v4 ; D2820004 04120E0F v_mac_f32_e32 v12, v1, v0 ; 3E180101 v_mad_f32 v1, s16, v7, v5 ; D2820001 04160E10 v_mac_f32_e32 v6, s17, v7 ; 3E0C0E11 v_mac_f32_e32 v13, v2, v0 ; 3E1A0102 v_mac_f32_e32 v3, v9, v0 ; 3E060109 v_mul_f32_e32 v0, v12, v4 ; 1000090C v_mul_f32_e32 v2, v13, v1 ; 1004030D v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_sub_f32_e32 v5, 1.0, v51 ; 080A66F2 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mac_f32_e32 v0, v4, v51 ; 3E006704 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mac_f32_e32 v2, v1, v51 ; 3E046701 v_mul_f32_e32 v1, v3, v5 ; 10020B03 v_mac_f32_e32 v1, v6, v51 ; 3E026706 v_add_f32_e64 v3, 0, v7 clamp ; D2060803 00020E80 v_mul_f32_e32 v3, v52, v3 ; 10060734 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 72 Code Size: 2228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 32, 16, 4} IMM[1] FLT32 { 0.0000, -2.0000, 0.5000, -1.0000} IMM[2] UINT32 {48, 0, 0, 0} IMM[3] FLT32 { 0.7500, 1.0000, -1.0000, 0.0000} IMM[4] FLT32 { 2.0000, 0.0000, 0.1250, 0.0000} 0: MUL TEMP[0], CONST[1][0].xyxy, CONST[1][2] 1: MAD TEMP[1].xy, CONST[1][0].xyyy, IMM[1].xyyy, IN[0].xyyy 2: MAX TEMP[1].xy, TEMP[1].xyyy, TEMP[0].xyyy 3: MIN TEMP[1].xy, TEMP[1].xyyy, TEMP[0].zwww 4: MOV TEMP[2].xy, TEMP[1].xyyy 5: MOV TEMP[2].w, IMM[1].xxxx 6: TXL TEMP[2].w, TEMP[2], SAMP[0], 2D 7: MUL TEMP[2].x, TEMP[2].wwww, CONST[1][1].xxxx 8: MOV_SAT TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][3].xyzz 10: MOV TEMP[3].xy, TEMP[1].xyyy 11: MOV TEMP[3].w, IMM[1].xxxx 12: TXL TEMP[3].yz, TEMP[3], SAMP[1], 2D 13: MOV TEMP[4].xyz, TEMP[2].xyzx 14: FSLT TEMP[5].x, IMM[1].xxxx, TEMP[3].yyyy 15: UIF TEMP[5].xxxx :0 16: MUL TEMP[3].x, TEMP[3].yyyy, TEMP[3].zzzz 17: MOV_SAT TEMP[3].x, TEMP[3].xxxx 18: MOV TEMP[1].xy, TEMP[1].xyyy 19: MOV TEMP[1].w, IMM[1].xxxx 20: TXL TEMP[1].xyz, TEMP[1], SAMP[2], 2D 21: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 22: ENDIF 23: MUL TEMP[1].xyz, TEMP[4].xyzz, IMM[1].zzzz 24: ADD TEMP[2].xy, -CONST[1][0].xyyy, IN[0].xyyy 25: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 26: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 27: MOV TEMP[3].xy, TEMP[2].xyyy 28: MOV TEMP[3].w, IMM[1].xxxx 29: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 30: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 31: MOV_SAT TEMP[3].x, TEMP[3].xxxx 32: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 33: MOV TEMP[4].xy, TEMP[2].xyyy 34: MOV TEMP[4].w, IMM[1].xxxx 35: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 36: MOV TEMP[5].xyz, TEMP[3].xyzx 37: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 38: UIF TEMP[6].xxxx :0 39: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 40: MOV_SAT TEMP[4].x, TEMP[4].xxxx 41: MOV TEMP[2].xy, TEMP[2].xyyy 42: MOV TEMP[2].w, IMM[1].xxxx 43: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 44: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 45: ENDIF 46: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 47: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].xwww, IN[0].xyyy 48: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 49: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 50: MOV TEMP[3].xy, TEMP[2].xyyy 51: MOV TEMP[3].w, IMM[1].xxxx 52: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 53: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 54: MOV_SAT TEMP[3].x, TEMP[3].xxxx 55: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 56: MOV TEMP[4].xy, TEMP[2].xyyy 57: MOV TEMP[4].w, IMM[1].xxxx 58: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 59: MOV TEMP[5].xyz, TEMP[3].xyzx 60: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 61: UIF TEMP[6].xxxx :0 62: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 63: MOV_SAT TEMP[4].x, TEMP[4].xxxx 64: MOV TEMP[2].xy, TEMP[2].xyyy 65: MOV TEMP[2].w, IMM[1].xxxx 66: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 67: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 68: ENDIF 69: MAD TEMP[1].xyz, TEMP[5].xyzz, IMM[3].xxxx, TEMP[1].xyzz 70: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].yzzz, IN[0].xyyy 71: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 72: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 73: MOV TEMP[3].xy, TEMP[2].xyyy 74: MOV TEMP[3].w, IMM[1].xxxx 75: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 76: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 77: MOV_SAT TEMP[3].x, TEMP[3].xxxx 78: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 79: MOV TEMP[4].xy, TEMP[2].xyyy 80: MOV TEMP[4].w, IMM[1].xxxx 81: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 82: MOV TEMP[5].xyz, TEMP[3].xyzx 83: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 84: UIF TEMP[6].xxxx :0 85: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 86: MOV_SAT TEMP[4].x, TEMP[4].xxxx 87: MOV TEMP[2].xy, TEMP[2].xyyy 88: MOV TEMP[2].w, IMM[1].xxxx 89: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 90: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 91: ENDIF 92: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 93: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].yxxx, IN[0].xyyy 94: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 95: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 96: MOV TEMP[3].xy, TEMP[2].xyyy 97: MOV TEMP[3].w, IMM[1].xxxx 98: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 99: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 100: MOV_SAT TEMP[3].x, TEMP[3].xxxx 101: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 102: MOV TEMP[4].xy, TEMP[2].xyyy 103: MOV TEMP[4].w, IMM[1].xxxx 104: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 105: MOV TEMP[5].xyz, TEMP[3].xyzx 106: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 107: UIF TEMP[6].xxxx :0 108: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 109: MOV_SAT TEMP[4].x, TEMP[4].xxxx 110: MOV TEMP[2].xy, TEMP[2].xyyy 111: MOV TEMP[2].w, IMM[1].xxxx 112: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 113: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 114: ENDIF 115: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 116: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].wxxx, IN[0].xyyy 117: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 118: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 119: MOV TEMP[3].xy, TEMP[2].xyyy 120: MOV TEMP[3].w, IMM[1].xxxx 121: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 122: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 123: MOV_SAT TEMP[3].x, TEMP[3].xxxx 124: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 125: MOV TEMP[4].xy, TEMP[2].xyyy 126: MOV TEMP[4].w, IMM[1].xxxx 127: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 128: MOV TEMP[5].xyz, TEMP[3].xyzx 129: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 130: UIF TEMP[6].xxxx :0 131: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 132: MOV_SAT TEMP[4].x, TEMP[4].xxxx 133: MOV TEMP[2].xy, TEMP[2].xyyy 134: MOV TEMP[2].w, IMM[1].xxxx 135: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 136: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 137: ENDIF 138: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 139: MAX TEMP[2].xy, IN[0].xyyy, TEMP[0].xyyy 140: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 141: MOV TEMP[3].xy, TEMP[2].xyyy 142: MOV TEMP[3].w, IMM[1].xxxx 143: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 144: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 145: MOV_SAT TEMP[3].x, TEMP[3].xxxx 146: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 147: MOV TEMP[4].xy, TEMP[2].xyyy 148: MOV TEMP[4].w, IMM[1].xxxx 149: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 150: MOV TEMP[5].xyz, TEMP[3].xyzx 151: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 152: UIF TEMP[6].xxxx :0 153: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 154: MOV_SAT TEMP[4].x, TEMP[4].xxxx 155: MOV TEMP[2].xy, TEMP[2].xyyy 156: MOV TEMP[2].w, IMM[1].xxxx 157: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 158: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 159: ENDIF 160: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz 161: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].ywww, IN[0].xyyy 162: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 163: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 164: MOV TEMP[3].xy, TEMP[2].xyyy 165: MOV TEMP[3].w, IMM[1].xxxx 166: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 167: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 168: MOV_SAT TEMP[3].x, TEMP[3].xxxx 169: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 170: MOV TEMP[4].xy, TEMP[2].xyyy 171: MOV TEMP[4].w, IMM[1].xxxx 172: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 173: MOV TEMP[5].xyz, TEMP[3].xyzx 174: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 175: UIF TEMP[6].xxxx :0 176: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 177: MOV_SAT TEMP[4].x, TEMP[4].xxxx 178: MOV TEMP[2].xy, TEMP[2].xyyy 179: MOV TEMP[2].w, IMM[1].xxxx 180: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 181: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 182: ENDIF 183: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 184: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[4].xyyy, IN[0].xyyy 185: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 186: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 187: MOV TEMP[3].xy, TEMP[2].xyyy 188: MOV TEMP[3].w, IMM[1].xxxx 189: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 190: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 191: MOV_SAT TEMP[3].x, TEMP[3].xxxx 192: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 193: MOV TEMP[4].xy, TEMP[2].xyyy 194: MOV TEMP[4].w, IMM[1].xxxx 195: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 196: MOV TEMP[5].xyz, TEMP[3].xyzx 197: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 198: UIF TEMP[6].xxxx :0 199: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 200: MOV_SAT TEMP[4].x, TEMP[4].xxxx 201: MOV TEMP[2].xy, TEMP[2].xyyy 202: MOV TEMP[2].w, IMM[1].xxxx 203: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 204: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 205: ENDIF 206: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 207: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].zyyy, IN[0].xyyy 208: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 209: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 210: MOV TEMP[3].xy, TEMP[2].xyyy 211: MOV TEMP[3].w, IMM[1].xxxx 212: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 213: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 214: MOV_SAT TEMP[3].x, TEMP[3].xxxx 215: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 216: MOV TEMP[4].xy, TEMP[2].xyyy 217: MOV TEMP[4].w, IMM[1].xxxx 218: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 219: MOV TEMP[5].xyz, TEMP[3].xyzx 220: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 221: UIF TEMP[6].xxxx :0 222: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 223: MOV_SAT TEMP[4].x, TEMP[4].xxxx 224: MOV TEMP[2].xy, TEMP[2].xyyy 225: MOV TEMP[2].w, IMM[1].xxxx 226: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 227: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 228: ENDIF 229: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 230: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].wyyy, IN[0].xyyy 231: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 232: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 233: MOV TEMP[3].xy, TEMP[2].xyyy 234: MOV TEMP[3].w, IMM[1].xxxx 235: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 236: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 237: MOV_SAT TEMP[3].x, TEMP[3].xxxx 238: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 239: MOV TEMP[4].xy, TEMP[2].xyyy 240: MOV TEMP[4].w, IMM[1].xxxx 241: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 242: MOV TEMP[5].xyz, TEMP[3].xyzx 243: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 244: UIF TEMP[6].xxxx :0 245: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 246: MOV_SAT TEMP[4].x, TEMP[4].xxxx 247: MOV TEMP[2].xy, TEMP[2].xyyy 248: MOV TEMP[2].w, IMM[1].xxxx 249: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 250: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 251: ENDIF 252: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 253: ADD TEMP[2].xy, IN[0].xyyy, CONST[1][0].xyyy 254: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 255: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 256: MOV TEMP[3].xy, TEMP[2].xyyy 257: MOV TEMP[3].w, IMM[1].xxxx 258: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 259: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 260: MOV_SAT TEMP[3].x, TEMP[3].xxxx 261: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 262: MOV TEMP[4].xy, TEMP[2].xyyy 263: MOV TEMP[4].w, IMM[1].xxxx 264: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 265: MOV TEMP[5].xyz, TEMP[3].xyzx 266: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 267: UIF TEMP[6].xxxx :0 268: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 269: MOV_SAT TEMP[4].x, TEMP[4].xxxx 270: MOV TEMP[2].xy, TEMP[2].xyyy 271: MOV TEMP[2].w, IMM[1].xxxx 272: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 273: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 274: ENDIF 275: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 276: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[4].yxxx, IN[0].xyyy 277: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 278: MIN TEMP[0].xy, TEMP[2].xyyy, TEMP[0].zwww 279: MOV TEMP[2].xy, TEMP[0].xyyy 280: MOV TEMP[2].w, IMM[1].xxxx 281: TXL TEMP[2].w, TEMP[2], SAMP[0], 2D 282: MUL TEMP[2].x, TEMP[2].wwww, CONST[1][1].xxxx 283: MOV_SAT TEMP[2].x, TEMP[2].xxxx 284: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][3].xyzz 285: MOV TEMP[3].xy, TEMP[0].xyyy 286: MOV TEMP[3].w, IMM[1].xxxx 287: TXL TEMP[3].yz, TEMP[3], SAMP[1], 2D 288: MOV TEMP[4].xyz, TEMP[2].xyzx 289: FSLT TEMP[5].x, IMM[1].xxxx, TEMP[3].yyyy 290: UIF TEMP[5].xxxx :0 291: MUL TEMP[3].x, TEMP[3].yyyy, TEMP[3].zzzz 292: MOV_SAT TEMP[3].x, TEMP[3].xxxx 293: MOV TEMP[0].xy, TEMP[0].xyyy 294: MOV TEMP[0].w, IMM[1].xxxx 295: TXL TEMP[0].xyz, TEMP[0], SAMP[2], 2D 296: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 297: ENDIF 298: MAD TEMP[0].xyz, IMM[1].zzzz, TEMP[4].xyzz, TEMP[1].xyzz 299: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[4].zzzz 300: MOV TEMP[1].w, IMM[3].yyyy 301: MOV TEMP[1].x, TEMP[0].xxxx 302: MOV TEMP[1].y, TEMP[0].yyyy 303: MOV TEMP[1].z, TEMP[0].zzzz 304: MOV OUT[0], TEMP[1] 305: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %32, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %32, i32 56) %36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = fmul float %24, %27 %51 = fmul float %25, %28 %52 = fmul float %24, %29 %53 = fmul float %25, %30 %54 = fmul float %24, 0.000000e+00 %55 = fadd float %54, %48 %56 = fmul float %25, -2.000000e+00 %57 = fadd float %56, %49 %58 = call float @llvm.maxnum.f32(float %55, float %50) %59 = call float @llvm.maxnum.f32(float %57, float %51) %60 = call float @llvm.minnum.f32(float %58, float %52) %61 = call float @llvm.minnum.f32(float %59, float %53) %62 = bitcast float %60 to i32 %63 = bitcast float %61 to i32 %64 = insertelement <4 x i32> undef, i32 %62, i32 0 %65 = insertelement <4 x i32> %64, i32 %63, i32 1 %66 = insertelement <4 x i32> %65, i32 0, i32 2 %67 = bitcast <8 x i32> %37 to <32 x i8> %68 = bitcast <4 x i32> %39 to <16 x i8> %69 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %66, <32 x i8> %67, <16 x i8> %68, i32 2) %70 = extractelement <4 x float> %69, i32 3 %71 = fmul float %70, %26 %72 = call float @llvm.AMDIL.clamp.(float %71, float 0.000000e+00, float 1.000000e+00) %73 = fmul float %72, %33 %74 = fmul float %72, %34 %75 = fmul float %72, %35 %76 = bitcast float %60 to i32 %77 = bitcast float %61 to i32 %78 = insertelement <4 x i32> undef, i32 %76, i32 0 %79 = insertelement <4 x i32> %78, i32 %77, i32 1 %80 = insertelement <4 x i32> %79, i32 0, i32 2 %81 = bitcast <8 x i32> %41 to <32 x i8> %82 = bitcast <4 x i32> %43 to <16 x i8> %83 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2) %84 = extractelement <4 x float> %83, i32 1 %85 = fcmp ogt float %84, 0.000000e+00 br i1 %85, label %IF, label %ENDIF IF: ; preds = %main_body %86 = extractelement <4 x float> %83, i32 2 %87 = fmul float %84, %86 %88 = call float @llvm.AMDIL.clamp.(float %87, float 0.000000e+00, float 1.000000e+00) %89 = bitcast float %60 to i32 %90 = bitcast float %61 to i32 %91 = insertelement <4 x i32> undef, i32 %89, i32 0 %92 = insertelement <4 x i32> %91, i32 %90, i32 1 %93 = insertelement <4 x i32> %92, i32 0, i32 2 %94 = bitcast <8 x i32> %45 to <32 x i8> %95 = bitcast <4 x i32> %47 to <16 x i8> %96 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %93, <32 x i8> %94, <16 x i8> %95, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = fmul float %88, %97 %101 = fadd float %100, %73 %102 = fmul float %88, %98 %103 = fadd float %102, %74 %104 = fmul float %88, %99 %105 = fadd float %104, %75 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp16.0 = phi float [ %101, %IF ], [ %73, %main_body ] %temp17.0 = phi float [ %103, %IF ], [ %74, %main_body ] %temp18.0 = phi float [ %105, %IF ], [ %75, %main_body ] %106 = fmul float %temp16.0, 5.000000e-01 %107 = fmul float %temp17.0, 5.000000e-01 %108 = fmul float %temp18.0, 5.000000e-01 %109 = fsub float %48, %24 %110 = fsub float %49, %25 %111 = call float @llvm.maxnum.f32(float %109, float %50) %112 = call float @llvm.maxnum.f32(float %110, float %51) %113 = call float @llvm.minnum.f32(float %111, float %52) %114 = call float @llvm.minnum.f32(float %112, float %53) %115 = bitcast float %113 to i32 %116 = bitcast float %114 to i32 %117 = insertelement <4 x i32> undef, i32 %115, i32 0 %118 = insertelement <4 x i32> %117, i32 %116, i32 1 %119 = insertelement <4 x i32> %118, i32 0, i32 2 %120 = bitcast <8 x i32> %37 to <32 x i8> %121 = bitcast <4 x i32> %39 to <16 x i8> %122 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 3 %124 = fmul float %123, %26 %125 = call float @llvm.AMDIL.clamp.(float %124, float 0.000000e+00, float 1.000000e+00) %126 = fmul float %125, %33 %127 = fmul float %125, %34 %128 = fmul float %125, %35 %129 = bitcast float %113 to i32 %130 = bitcast float %114 to i32 %131 = insertelement <4 x i32> undef, i32 %129, i32 0 %132 = insertelement <4 x i32> %131, i32 %130, i32 1 %133 = insertelement <4 x i32> %132, i32 0, i32 2 %134 = bitcast <8 x i32> %41 to <32 x i8> %135 = bitcast <4 x i32> %43 to <16 x i8> %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %133, <32 x i8> %134, <16 x i8> %135, i32 2) %137 = extractelement <4 x float> %136, i32 1 %138 = fcmp ogt float %137, 0.000000e+00 br i1 %138, label %IF29, label %ENDIF28 IF29: ; preds = %ENDIF %139 = extractelement <4 x float> %136, i32 2 %140 = fmul float %137, %139 %141 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %142 = bitcast float %113 to i32 %143 = bitcast float %114 to i32 %144 = insertelement <4 x i32> undef, i32 %142, i32 0 %145 = insertelement <4 x i32> %144, i32 %143, i32 1 %146 = insertelement <4 x i32> %145, i32 0, i32 2 %147 = bitcast <8 x i32> %45 to <32 x i8> %148 = bitcast <4 x i32> %47 to <16 x i8> %149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2) %150 = extractelement <4 x float> %149, i32 0 %151 = extractelement <4 x float> %149, i32 1 %152 = extractelement <4 x float> %149, i32 2 %153 = fmul float %141, %150 %154 = fadd float %153, %126 %155 = fmul float %141, %151 %156 = fadd float %155, %127 %157 = fmul float %141, %152 %158 = fadd float %157, %128 br label %ENDIF28 ENDIF28: ; preds = %ENDIF, %IF29 %temp20.0 = phi float [ %154, %IF29 ], [ %126, %ENDIF ] %temp21.0 = phi float [ %156, %IF29 ], [ %127, %ENDIF ] %temp22.0 = phi float [ %158, %IF29 ], [ %128, %ENDIF ] %159 = fmul float %temp20.0, 5.000000e-01 %160 = fadd float %159, %106 %161 = fmul float %temp21.0, 5.000000e-01 %162 = fadd float %161, %107 %163 = fmul float %temp22.0, 5.000000e-01 %164 = fadd float %163, %108 %165 = fmul float %24, 0.000000e+00 %166 = fadd float %165, %48 %167 = fsub float %49, %25 %168 = call float @llvm.maxnum.f32(float %166, float %50) %169 = call float @llvm.maxnum.f32(float %167, float %51) %170 = call float @llvm.minnum.f32(float %168, float %52) %171 = call float @llvm.minnum.f32(float %169, float %53) %172 = bitcast float %170 to i32 %173 = bitcast float %171 to i32 %174 = insertelement <4 x i32> undef, i32 %172, i32 0 %175 = insertelement <4 x i32> %174, i32 %173, i32 1 %176 = insertelement <4 x i32> %175, i32 0, i32 2 %177 = bitcast <8 x i32> %37 to <32 x i8> %178 = bitcast <4 x i32> %39 to <16 x i8> %179 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %176, <32 x i8> %177, <16 x i8> %178, i32 2) %180 = extractelement <4 x float> %179, i32 3 %181 = fmul float %180, %26 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = fmul float %182, %33 %184 = fmul float %182, %34 %185 = fmul float %182, %35 %186 = bitcast float %170 to i32 %187 = bitcast float %171 to i32 %188 = insertelement <4 x i32> undef, i32 %186, i32 0 %189 = insertelement <4 x i32> %188, i32 %187, i32 1 %190 = insertelement <4 x i32> %189, i32 0, i32 2 %191 = bitcast <8 x i32> %41 to <32 x i8> %192 = bitcast <4 x i32> %43 to <16 x i8> %193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %190, <32 x i8> %191, <16 x i8> %192, i32 2) %194 = extractelement <4 x float> %193, i32 1 %195 = fcmp ogt float %194, 0.000000e+00 br i1 %195, label %IF32, label %ENDIF31 IF32: ; preds = %ENDIF28 %196 = extractelement <4 x float> %193, i32 2 %197 = fmul float %194, %196 %198 = call float @llvm.AMDIL.clamp.(float %197, float 0.000000e+00, float 1.000000e+00) %199 = bitcast float %170 to i32 %200 = bitcast float %171 to i32 %201 = insertelement <4 x i32> undef, i32 %199, i32 0 %202 = insertelement <4 x i32> %201, i32 %200, i32 1 %203 = insertelement <4 x i32> %202, i32 0, i32 2 %204 = bitcast <8 x i32> %45 to <32 x i8> %205 = bitcast <4 x i32> %47 to <16 x i8> %206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %203, <32 x i8> %204, <16 x i8> %205, i32 2) %207 = extractelement <4 x float> %206, i32 0 %208 = extractelement <4 x float> %206, i32 1 %209 = extractelement <4 x float> %206, i32 2 %210 = fmul float %198, %207 %211 = fadd float %210, %183 %212 = fmul float %198, %208 %213 = fadd float %212, %184 %214 = fmul float %198, %209 %215 = fadd float %214, %185 br label %ENDIF31 ENDIF31: ; preds = %ENDIF28, %IF32 %temp20.1 = phi float [ %211, %IF32 ], [ %183, %ENDIF28 ] %temp21.1 = phi float [ %213, %IF32 ], [ %184, %ENDIF28 ] %temp22.1 = phi float [ %215, %IF32 ], [ %185, %ENDIF28 ] %216 = fmul float %temp20.1, 7.500000e-01 %217 = fadd float %216, %160 %218 = fmul float %temp21.1, 7.500000e-01 %219 = fadd float %218, %162 %220 = fmul float %temp22.1, 7.500000e-01 %221 = fadd float %220, %164 %222 = fadd float %24, %48 %223 = fsub float %49, %25 %224 = call float @llvm.maxnum.f32(float %222, float %50) %225 = call float @llvm.maxnum.f32(float %223, float %51) %226 = call float @llvm.minnum.f32(float %224, float %52) %227 = call float @llvm.minnum.f32(float %225, float %53) %228 = bitcast float %226 to i32 %229 = bitcast float %227 to i32 %230 = insertelement <4 x i32> undef, i32 %228, i32 0 %231 = insertelement <4 x i32> %230, i32 %229, i32 1 %232 = insertelement <4 x i32> %231, i32 0, i32 2 %233 = bitcast <8 x i32> %37 to <32 x i8> %234 = bitcast <4 x i32> %39 to <16 x i8> %235 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 2) %236 = extractelement <4 x float> %235, i32 3 %237 = fmul float %236, %26 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = fmul float %238, %33 %240 = fmul float %238, %34 %241 = fmul float %238, %35 %242 = bitcast float %226 to i32 %243 = bitcast float %227 to i32 %244 = insertelement <4 x i32> undef, i32 %242, i32 0 %245 = insertelement <4 x i32> %244, i32 %243, i32 1 %246 = insertelement <4 x i32> %245, i32 0, i32 2 %247 = bitcast <8 x i32> %41 to <32 x i8> %248 = bitcast <4 x i32> %43 to <16 x i8> %249 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 1 %251 = fcmp ogt float %250, 0.000000e+00 br i1 %251, label %IF35, label %ENDIF34 IF35: ; preds = %ENDIF31 %252 = extractelement <4 x float> %249, i32 2 %253 = fmul float %250, %252 %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00) %255 = bitcast float %226 to i32 %256 = bitcast float %227 to i32 %257 = insertelement <4 x i32> undef, i32 %255, i32 0 %258 = insertelement <4 x i32> %257, i32 %256, i32 1 %259 = insertelement <4 x i32> %258, i32 0, i32 2 %260 = bitcast <8 x i32> %45 to <32 x i8> %261 = bitcast <4 x i32> %47 to <16 x i8> %262 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %259, <32 x i8> %260, <16 x i8> %261, i32 2) %263 = extractelement <4 x float> %262, i32 0 %264 = extractelement <4 x float> %262, i32 1 %265 = extractelement <4 x float> %262, i32 2 %266 = fmul float %254, %263 %267 = fadd float %266, %239 %268 = fmul float %254, %264 %269 = fadd float %268, %240 %270 = fmul float %254, %265 %271 = fadd float %270, %241 br label %ENDIF34 ENDIF34: ; preds = %ENDIF31, %IF35 %temp20.2 = phi float [ %267, %IF35 ], [ %239, %ENDIF31 ] %temp21.2 = phi float [ %269, %IF35 ], [ %240, %ENDIF31 ] %temp22.2 = phi float [ %271, %IF35 ], [ %241, %ENDIF31 ] %272 = fmul float %temp20.2, 5.000000e-01 %273 = fadd float %272, %217 %274 = fmul float %temp21.2, 5.000000e-01 %275 = fadd float %274, %219 %276 = fmul float %temp22.2, 5.000000e-01 %277 = fadd float %276, %221 %278 = fmul float %24, -2.000000e+00 %279 = fadd float %278, %48 %280 = fmul float %25, 0.000000e+00 %281 = fadd float %280, %49 %282 = call float @llvm.maxnum.f32(float %279, float %50) %283 = call float @llvm.maxnum.f32(float %281, float %51) %284 = call float @llvm.minnum.f32(float %282, float %52) %285 = call float @llvm.minnum.f32(float %283, float %53) %286 = bitcast float %284 to i32 %287 = bitcast float %285 to i32 %288 = insertelement <4 x i32> undef, i32 %286, i32 0 %289 = insertelement <4 x i32> %288, i32 %287, i32 1 %290 = insertelement <4 x i32> %289, i32 0, i32 2 %291 = bitcast <8 x i32> %37 to <32 x i8> %292 = bitcast <4 x i32> %39 to <16 x i8> %293 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %290, <32 x i8> %291, <16 x i8> %292, i32 2) %294 = extractelement <4 x float> %293, i32 3 %295 = fmul float %294, %26 %296 = call float @llvm.AMDIL.clamp.(float %295, float 0.000000e+00, float 1.000000e+00) %297 = fmul float %296, %33 %298 = fmul float %296, %34 %299 = fmul float %296, %35 %300 = bitcast float %284 to i32 %301 = bitcast float %285 to i32 %302 = insertelement <4 x i32> undef, i32 %300, i32 0 %303 = insertelement <4 x i32> %302, i32 %301, i32 1 %304 = insertelement <4 x i32> %303, i32 0, i32 2 %305 = bitcast <8 x i32> %41 to <32 x i8> %306 = bitcast <4 x i32> %43 to <16 x i8> %307 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %304, <32 x i8> %305, <16 x i8> %306, i32 2) %308 = extractelement <4 x float> %307, i32 1 %309 = fcmp ogt float %308, 0.000000e+00 br i1 %309, label %IF38, label %ENDIF37 IF38: ; preds = %ENDIF34 %310 = extractelement <4 x float> %307, i32 2 %311 = fmul float %308, %310 %312 = call float @llvm.AMDIL.clamp.(float %311, float 0.000000e+00, float 1.000000e+00) %313 = bitcast float %284 to i32 %314 = bitcast float %285 to i32 %315 = insertelement <4 x i32> undef, i32 %313, i32 0 %316 = insertelement <4 x i32> %315, i32 %314, i32 1 %317 = insertelement <4 x i32> %316, i32 0, i32 2 %318 = bitcast <8 x i32> %45 to <32 x i8> %319 = bitcast <4 x i32> %47 to <16 x i8> %320 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = fmul float %312, %321 %325 = fadd float %324, %297 %326 = fmul float %312, %322 %327 = fadd float %326, %298 %328 = fmul float %312, %323 %329 = fadd float %328, %299 br label %ENDIF37 ENDIF37: ; preds = %ENDIF34, %IF38 %temp20.3 = phi float [ %325, %IF38 ], [ %297, %ENDIF34 ] %temp21.3 = phi float [ %327, %IF38 ], [ %298, %ENDIF34 ] %temp22.3 = phi float [ %329, %IF38 ], [ %299, %ENDIF34 ] %330 = fmul float %temp20.3, 5.000000e-01 %331 = fadd float %330, %273 %332 = fmul float %temp21.3, 5.000000e-01 %333 = fadd float %332, %275 %334 = fmul float %temp22.3, 5.000000e-01 %335 = fadd float %334, %277 %336 = fsub float %48, %24 %337 = fmul float %25, 0.000000e+00 %338 = fadd float %337, %49 %339 = call float @llvm.maxnum.f32(float %336, float %50) %340 = call float @llvm.maxnum.f32(float %338, float %51) %341 = call float @llvm.minnum.f32(float %339, float %52) %342 = call float @llvm.minnum.f32(float %340, float %53) %343 = bitcast float %341 to i32 %344 = bitcast float %342 to i32 %345 = insertelement <4 x i32> undef, i32 %343, i32 0 %346 = insertelement <4 x i32> %345, i32 %344, i32 1 %347 = insertelement <4 x i32> %346, i32 0, i32 2 %348 = bitcast <8 x i32> %37 to <32 x i8> %349 = bitcast <4 x i32> %39 to <16 x i8> %350 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %347, <32 x i8> %348, <16 x i8> %349, i32 2) %351 = extractelement <4 x float> %350, i32 3 %352 = fmul float %351, %26 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fmul float %353, %33 %355 = fmul float %353, %34 %356 = fmul float %353, %35 %357 = bitcast float %341 to i32 %358 = bitcast float %342 to i32 %359 = insertelement <4 x i32> undef, i32 %357, i32 0 %360 = insertelement <4 x i32> %359, i32 %358, i32 1 %361 = insertelement <4 x i32> %360, i32 0, i32 2 %362 = bitcast <8 x i32> %41 to <32 x i8> %363 = bitcast <4 x i32> %43 to <16 x i8> %364 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %361, <32 x i8> %362, <16 x i8> %363, i32 2) %365 = extractelement <4 x float> %364, i32 1 %366 = fcmp ogt float %365, 0.000000e+00 br i1 %366, label %IF41, label %ENDIF40 IF41: ; preds = %ENDIF37 %367 = extractelement <4 x float> %364, i32 2 %368 = fmul float %365, %367 %369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %370 = bitcast float %341 to i32 %371 = bitcast float %342 to i32 %372 = insertelement <4 x i32> undef, i32 %370, i32 0 %373 = insertelement <4 x i32> %372, i32 %371, i32 1 %374 = insertelement <4 x i32> %373, i32 0, i32 2 %375 = bitcast <8 x i32> %45 to <32 x i8> %376 = bitcast <4 x i32> %47 to <16 x i8> %377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2) %378 = extractelement <4 x float> %377, i32 0 %379 = extractelement <4 x float> %377, i32 1 %380 = extractelement <4 x float> %377, i32 2 %381 = fmul float %369, %378 %382 = fadd float %381, %354 %383 = fmul float %369, %379 %384 = fadd float %383, %355 %385 = fmul float %369, %380 %386 = fadd float %385, %356 br label %ENDIF40 ENDIF40: ; preds = %ENDIF37, %IF41 %temp20.4 = phi float [ %382, %IF41 ], [ %354, %ENDIF37 ] %temp21.4 = phi float [ %384, %IF41 ], [ %355, %ENDIF37 ] %temp22.4 = phi float [ %386, %IF41 ], [ %356, %ENDIF37 ] %387 = fmul float %temp20.4, 7.500000e-01 %388 = fadd float %387, %331 %389 = fmul float %temp21.4, 7.500000e-01 %390 = fadd float %389, %333 %391 = fmul float %temp22.4, 7.500000e-01 %392 = fadd float %391, %335 %393 = call float @llvm.maxnum.f32(float %48, float %50) %394 = call float @llvm.maxnum.f32(float %49, float %51) %395 = call float @llvm.minnum.f32(float %393, float %52) %396 = call float @llvm.minnum.f32(float %394, float %53) %397 = bitcast float %395 to i32 %398 = bitcast float %396 to i32 %399 = insertelement <4 x i32> undef, i32 %397, i32 0 %400 = insertelement <4 x i32> %399, i32 %398, i32 1 %401 = insertelement <4 x i32> %400, i32 0, i32 2 %402 = bitcast <8 x i32> %37 to <32 x i8> %403 = bitcast <4 x i32> %39 to <16 x i8> %404 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %401, <32 x i8> %402, <16 x i8> %403, i32 2) %405 = extractelement <4 x float> %404, i32 3 %406 = fmul float %405, %26 %407 = call float @llvm.AMDIL.clamp.(float %406, float 0.000000e+00, float 1.000000e+00) %408 = fmul float %407, %33 %409 = fmul float %407, %34 %410 = fmul float %407, %35 %411 = bitcast float %395 to i32 %412 = bitcast float %396 to i32 %413 = insertelement <4 x i32> undef, i32 %411, i32 0 %414 = insertelement <4 x i32> %413, i32 %412, i32 1 %415 = insertelement <4 x i32> %414, i32 0, i32 2 %416 = bitcast <8 x i32> %41 to <32 x i8> %417 = bitcast <4 x i32> %43 to <16 x i8> %418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %415, <32 x i8> %416, <16 x i8> %417, i32 2) %419 = extractelement <4 x float> %418, i32 1 %420 = fcmp ogt float %419, 0.000000e+00 br i1 %420, label %IF44, label %ENDIF43 IF44: ; preds = %ENDIF40 %421 = extractelement <4 x float> %418, i32 2 %422 = fmul float %419, %421 %423 = call float @llvm.AMDIL.clamp.(float %422, float 0.000000e+00, float 1.000000e+00) %424 = bitcast float %395 to i32 %425 = bitcast float %396 to i32 %426 = insertelement <4 x i32> undef, i32 %424, i32 0 %427 = insertelement <4 x i32> %426, i32 %425, i32 1 %428 = insertelement <4 x i32> %427, i32 0, i32 2 %429 = bitcast <8 x i32> %45 to <32 x i8> %430 = bitcast <4 x i32> %47 to <16 x i8> %431 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2) %432 = extractelement <4 x float> %431, i32 0 %433 = extractelement <4 x float> %431, i32 1 %434 = extractelement <4 x float> %431, i32 2 %435 = fmul float %423, %432 %436 = fadd float %435, %408 %437 = fmul float %423, %433 %438 = fadd float %437, %409 %439 = fmul float %423, %434 %440 = fadd float %439, %410 br label %ENDIF43 ENDIF43: ; preds = %ENDIF40, %IF44 %temp20.5 = phi float [ %436, %IF44 ], [ %408, %ENDIF40 ] %temp21.5 = phi float [ %438, %IF44 ], [ %409, %ENDIF40 ] %temp22.5 = phi float [ %440, %IF44 ], [ %410, %ENDIF40 ] %441 = fadd float %388, %temp20.5 %442 = fadd float %390, %temp21.5 %443 = fadd float %392, %temp22.5 %444 = fadd float %24, %48 %445 = fmul float %25, 0.000000e+00 %446 = fadd float %445, %49 %447 = call float @llvm.maxnum.f32(float %444, float %50) %448 = call float @llvm.maxnum.f32(float %446, float %51) %449 = call float @llvm.minnum.f32(float %447, float %52) %450 = call float @llvm.minnum.f32(float %448, float %53) %451 = bitcast float %449 to i32 %452 = bitcast float %450 to i32 %453 = insertelement <4 x i32> undef, i32 %451, i32 0 %454 = insertelement <4 x i32> %453, i32 %452, i32 1 %455 = insertelement <4 x i32> %454, i32 0, i32 2 %456 = bitcast <8 x i32> %37 to <32 x i8> %457 = bitcast <4 x i32> %39 to <16 x i8> %458 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %455, <32 x i8> %456, <16 x i8> %457, i32 2) %459 = extractelement <4 x float> %458, i32 3 %460 = fmul float %459, %26 %461 = call float @llvm.AMDIL.clamp.(float %460, float 0.000000e+00, float 1.000000e+00) %462 = fmul float %461, %33 %463 = fmul float %461, %34 %464 = fmul float %461, %35 %465 = bitcast float %449 to i32 %466 = bitcast float %450 to i32 %467 = insertelement <4 x i32> undef, i32 %465, i32 0 %468 = insertelement <4 x i32> %467, i32 %466, i32 1 %469 = insertelement <4 x i32> %468, i32 0, i32 2 %470 = bitcast <8 x i32> %41 to <32 x i8> %471 = bitcast <4 x i32> %43 to <16 x i8> %472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2) %473 = extractelement <4 x float> %472, i32 1 %474 = fcmp ogt float %473, 0.000000e+00 br i1 %474, label %IF47, label %ENDIF46 IF47: ; preds = %ENDIF43 %475 = extractelement <4 x float> %472, i32 2 %476 = fmul float %473, %475 %477 = call float @llvm.AMDIL.clamp.(float %476, float 0.000000e+00, float 1.000000e+00) %478 = bitcast float %449 to i32 %479 = bitcast float %450 to i32 %480 = insertelement <4 x i32> undef, i32 %478, i32 0 %481 = insertelement <4 x i32> %480, i32 %479, i32 1 %482 = insertelement <4 x i32> %481, i32 0, i32 2 %483 = bitcast <8 x i32> %45 to <32 x i8> %484 = bitcast <4 x i32> %47 to <16 x i8> %485 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %482, <32 x i8> %483, <16 x i8> %484, i32 2) %486 = extractelement <4 x float> %485, i32 0 %487 = extractelement <4 x float> %485, i32 1 %488 = extractelement <4 x float> %485, i32 2 %489 = fmul float %477, %486 %490 = fadd float %489, %462 %491 = fmul float %477, %487 %492 = fadd float %491, %463 %493 = fmul float %477, %488 %494 = fadd float %493, %464 br label %ENDIF46 ENDIF46: ; preds = %ENDIF43, %IF47 %temp20.6 = phi float [ %490, %IF47 ], [ %462, %ENDIF43 ] %temp21.6 = phi float [ %492, %IF47 ], [ %463, %ENDIF43 ] %temp22.6 = phi float [ %494, %IF47 ], [ %464, %ENDIF43 ] %495 = fmul float %temp20.6, 7.500000e-01 %496 = fadd float %495, %441 %497 = fmul float %temp21.6, 7.500000e-01 %498 = fadd float %497, %442 %499 = fmul float %temp22.6, 7.500000e-01 %500 = fadd float %499, %443 %501 = fmul float %24, 2.000000e+00 %502 = fadd float %501, %48 %503 = fmul float %25, 0.000000e+00 %504 = fadd float %503, %49 %505 = call float @llvm.maxnum.f32(float %502, float %50) %506 = call float @llvm.maxnum.f32(float %504, float %51) %507 = call float @llvm.minnum.f32(float %505, float %52) %508 = call float @llvm.minnum.f32(float %506, float %53) %509 = bitcast float %507 to i32 %510 = bitcast float %508 to i32 %511 = insertelement <4 x i32> undef, i32 %509, i32 0 %512 = insertelement <4 x i32> %511, i32 %510, i32 1 %513 = insertelement <4 x i32> %512, i32 0, i32 2 %514 = bitcast <8 x i32> %37 to <32 x i8> %515 = bitcast <4 x i32> %39 to <16 x i8> %516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2) %517 = extractelement <4 x float> %516, i32 3 %518 = fmul float %517, %26 %519 = call float @llvm.AMDIL.clamp.(float %518, float 0.000000e+00, float 1.000000e+00) %520 = fmul float %519, %33 %521 = fmul float %519, %34 %522 = fmul float %519, %35 %523 = bitcast float %507 to i32 %524 = bitcast float %508 to i32 %525 = insertelement <4 x i32> undef, i32 %523, i32 0 %526 = insertelement <4 x i32> %525, i32 %524, i32 1 %527 = insertelement <4 x i32> %526, i32 0, i32 2 %528 = bitcast <8 x i32> %41 to <32 x i8> %529 = bitcast <4 x i32> %43 to <16 x i8> %530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2) %531 = extractelement <4 x float> %530, i32 1 %532 = fcmp ogt float %531, 0.000000e+00 br i1 %532, label %IF50, label %ENDIF49 IF50: ; preds = %ENDIF46 %533 = extractelement <4 x float> %530, i32 2 %534 = fmul float %531, %533 %535 = call float @llvm.AMDIL.clamp.(float %534, float 0.000000e+00, float 1.000000e+00) %536 = bitcast float %507 to i32 %537 = bitcast float %508 to i32 %538 = insertelement <4 x i32> undef, i32 %536, i32 0 %539 = insertelement <4 x i32> %538, i32 %537, i32 1 %540 = insertelement <4 x i32> %539, i32 0, i32 2 %541 = bitcast <8 x i32> %45 to <32 x i8> %542 = bitcast <4 x i32> %47 to <16 x i8> %543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %541, <16 x i8> %542, i32 2) %544 = extractelement <4 x float> %543, i32 0 %545 = extractelement <4 x float> %543, i32 1 %546 = extractelement <4 x float> %543, i32 2 %547 = fmul float %535, %544 %548 = fadd float %547, %520 %549 = fmul float %535, %545 %550 = fadd float %549, %521 %551 = fmul float %535, %546 %552 = fadd float %551, %522 br label %ENDIF49 ENDIF49: ; preds = %ENDIF46, %IF50 %temp20.7 = phi float [ %548, %IF50 ], [ %520, %ENDIF46 ] %temp21.7 = phi float [ %550, %IF50 ], [ %521, %ENDIF46 ] %temp22.7 = phi float [ %552, %IF50 ], [ %522, %ENDIF46 ] %553 = fmul float %temp20.7, 5.000000e-01 %554 = fadd float %553, %496 %555 = fmul float %temp21.7, 5.000000e-01 %556 = fadd float %555, %498 %557 = fmul float %temp22.7, 5.000000e-01 %558 = fadd float %557, %500 %559 = fsub float %48, %24 %560 = fadd float %25, %49 %561 = call float @llvm.maxnum.f32(float %559, float %50) %562 = call float @llvm.maxnum.f32(float %560, float %51) %563 = call float @llvm.minnum.f32(float %561, float %52) %564 = call float @llvm.minnum.f32(float %562, float %53) %565 = bitcast float %563 to i32 %566 = bitcast float %564 to i32 %567 = insertelement <4 x i32> undef, i32 %565, i32 0 %568 = insertelement <4 x i32> %567, i32 %566, i32 1 %569 = insertelement <4 x i32> %568, i32 0, i32 2 %570 = bitcast <8 x i32> %37 to <32 x i8> %571 = bitcast <4 x i32> %39 to <16 x i8> %572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2) %573 = extractelement <4 x float> %572, i32 3 %574 = fmul float %573, %26 %575 = call float @llvm.AMDIL.clamp.(float %574, float 0.000000e+00, float 1.000000e+00) %576 = fmul float %575, %33 %577 = fmul float %575, %34 %578 = fmul float %575, %35 %579 = bitcast float %563 to i32 %580 = bitcast float %564 to i32 %581 = insertelement <4 x i32> undef, i32 %579, i32 0 %582 = insertelement <4 x i32> %581, i32 %580, i32 1 %583 = insertelement <4 x i32> %582, i32 0, i32 2 %584 = bitcast <8 x i32> %41 to <32 x i8> %585 = bitcast <4 x i32> %43 to <16 x i8> %586 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %583, <32 x i8> %584, <16 x i8> %585, i32 2) %587 = extractelement <4 x float> %586, i32 1 %588 = fcmp ogt float %587, 0.000000e+00 br i1 %588, label %IF53, label %ENDIF52 IF53: ; preds = %ENDIF49 %589 = extractelement <4 x float> %586, i32 2 %590 = fmul float %587, %589 %591 = call float @llvm.AMDIL.clamp.(float %590, float 0.000000e+00, float 1.000000e+00) %592 = bitcast float %563 to i32 %593 = bitcast float %564 to i32 %594 = insertelement <4 x i32> undef, i32 %592, i32 0 %595 = insertelement <4 x i32> %594, i32 %593, i32 1 %596 = insertelement <4 x i32> %595, i32 0, i32 2 %597 = bitcast <8 x i32> %45 to <32 x i8> %598 = bitcast <4 x i32> %47 to <16 x i8> %599 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %596, <32 x i8> %597, <16 x i8> %598, i32 2) %600 = extractelement <4 x float> %599, i32 0 %601 = extractelement <4 x float> %599, i32 1 %602 = extractelement <4 x float> %599, i32 2 %603 = fmul float %591, %600 %604 = fadd float %603, %576 %605 = fmul float %591, %601 %606 = fadd float %605, %577 %607 = fmul float %591, %602 %608 = fadd float %607, %578 br label %ENDIF52 ENDIF52: ; preds = %ENDIF49, %IF53 %temp20.8 = phi float [ %604, %IF53 ], [ %576, %ENDIF49 ] %temp21.8 = phi float [ %606, %IF53 ], [ %577, %ENDIF49 ] %temp22.8 = phi float [ %608, %IF53 ], [ %578, %ENDIF49 ] %609 = fmul float %temp20.8, 5.000000e-01 %610 = fadd float %609, %554 %611 = fmul float %temp21.8, 5.000000e-01 %612 = fadd float %611, %556 %613 = fmul float %temp22.8, 5.000000e-01 %614 = fadd float %613, %558 %615 = fmul float %24, 0.000000e+00 %616 = fadd float %615, %48 %617 = fadd float %25, %49 %618 = call float @llvm.maxnum.f32(float %616, float %50) %619 = call float @llvm.maxnum.f32(float %617, float %51) %620 = call float @llvm.minnum.f32(float %618, float %52) %621 = call float @llvm.minnum.f32(float %619, float %53) %622 = bitcast float %620 to i32 %623 = bitcast float %621 to i32 %624 = insertelement <4 x i32> undef, i32 %622, i32 0 %625 = insertelement <4 x i32> %624, i32 %623, i32 1 %626 = insertelement <4 x i32> %625, i32 0, i32 2 %627 = bitcast <8 x i32> %37 to <32 x i8> %628 = bitcast <4 x i32> %39 to <16 x i8> %629 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %626, <32 x i8> %627, <16 x i8> %628, i32 2) %630 = extractelement <4 x float> %629, i32 3 %631 = fmul float %630, %26 %632 = call float @llvm.AMDIL.clamp.(float %631, float 0.000000e+00, float 1.000000e+00) %633 = fmul float %632, %33 %634 = fmul float %632, %34 %635 = fmul float %632, %35 %636 = bitcast float %620 to i32 %637 = bitcast float %621 to i32 %638 = insertelement <4 x i32> undef, i32 %636, i32 0 %639 = insertelement <4 x i32> %638, i32 %637, i32 1 %640 = insertelement <4 x i32> %639, i32 0, i32 2 %641 = bitcast <8 x i32> %41 to <32 x i8> %642 = bitcast <4 x i32> %43 to <16 x i8> %643 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %640, <32 x i8> %641, <16 x i8> %642, i32 2) %644 = extractelement <4 x float> %643, i32 1 %645 = fcmp ogt float %644, 0.000000e+00 br i1 %645, label %IF56, label %ENDIF55 IF56: ; preds = %ENDIF52 %646 = extractelement <4 x float> %643, i32 2 %647 = fmul float %644, %646 %648 = call float @llvm.AMDIL.clamp.(float %647, float 0.000000e+00, float 1.000000e+00) %649 = bitcast float %620 to i32 %650 = bitcast float %621 to i32 %651 = insertelement <4 x i32> undef, i32 %649, i32 0 %652 = insertelement <4 x i32> %651, i32 %650, i32 1 %653 = insertelement <4 x i32> %652, i32 0, i32 2 %654 = bitcast <8 x i32> %45 to <32 x i8> %655 = bitcast <4 x i32> %47 to <16 x i8> %656 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %653, <32 x i8> %654, <16 x i8> %655, i32 2) %657 = extractelement <4 x float> %656, i32 0 %658 = extractelement <4 x float> %656, i32 1 %659 = extractelement <4 x float> %656, i32 2 %660 = fmul float %648, %657 %661 = fadd float %660, %633 %662 = fmul float %648, %658 %663 = fadd float %662, %634 %664 = fmul float %648, %659 %665 = fadd float %664, %635 br label %ENDIF55 ENDIF55: ; preds = %ENDIF52, %IF56 %temp20.9 = phi float [ %661, %IF56 ], [ %633, %ENDIF52 ] %temp21.9 = phi float [ %663, %IF56 ], [ %634, %ENDIF52 ] %temp22.9 = phi float [ %665, %IF56 ], [ %635, %ENDIF52 ] %666 = fmul float %temp20.9, 7.500000e-01 %667 = fadd float %666, %610 %668 = fmul float %temp21.9, 7.500000e-01 %669 = fadd float %668, %612 %670 = fmul float %temp22.9, 7.500000e-01 %671 = fadd float %670, %614 %672 = fadd float %48, %24 %673 = fadd float %49, %25 %674 = call float @llvm.maxnum.f32(float %672, float %50) %675 = call float @llvm.maxnum.f32(float %673, float %51) %676 = call float @llvm.minnum.f32(float %674, float %52) %677 = call float @llvm.minnum.f32(float %675, float %53) %678 = bitcast float %676 to i32 %679 = bitcast float %677 to i32 %680 = insertelement <4 x i32> undef, i32 %678, i32 0 %681 = insertelement <4 x i32> %680, i32 %679, i32 1 %682 = insertelement <4 x i32> %681, i32 0, i32 2 %683 = bitcast <8 x i32> %37 to <32 x i8> %684 = bitcast <4 x i32> %39 to <16 x i8> %685 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %682, <32 x i8> %683, <16 x i8> %684, i32 2) %686 = extractelement <4 x float> %685, i32 3 %687 = fmul float %686, %26 %688 = call float @llvm.AMDIL.clamp.(float %687, float 0.000000e+00, float 1.000000e+00) %689 = fmul float %688, %33 %690 = fmul float %688, %34 %691 = fmul float %688, %35 %692 = bitcast float %676 to i32 %693 = bitcast float %677 to i32 %694 = insertelement <4 x i32> undef, i32 %692, i32 0 %695 = insertelement <4 x i32> %694, i32 %693, i32 1 %696 = insertelement <4 x i32> %695, i32 0, i32 2 %697 = bitcast <8 x i32> %41 to <32 x i8> %698 = bitcast <4 x i32> %43 to <16 x i8> %699 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %696, <32 x i8> %697, <16 x i8> %698, i32 2) %700 = extractelement <4 x float> %699, i32 1 %701 = fcmp ogt float %700, 0.000000e+00 br i1 %701, label %IF59, label %ENDIF58 IF59: ; preds = %ENDIF55 %702 = extractelement <4 x float> %699, i32 2 %703 = fmul float %700, %702 %704 = call float @llvm.AMDIL.clamp.(float %703, float 0.000000e+00, float 1.000000e+00) %705 = bitcast float %676 to i32 %706 = bitcast float %677 to i32 %707 = insertelement <4 x i32> undef, i32 %705, i32 0 %708 = insertelement <4 x i32> %707, i32 %706, i32 1 %709 = insertelement <4 x i32> %708, i32 0, i32 2 %710 = bitcast <8 x i32> %45 to <32 x i8> %711 = bitcast <4 x i32> %47 to <16 x i8> %712 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %709, <32 x i8> %710, <16 x i8> %711, i32 2) %713 = extractelement <4 x float> %712, i32 0 %714 = extractelement <4 x float> %712, i32 1 %715 = extractelement <4 x float> %712, i32 2 %716 = fmul float %704, %713 %717 = fadd float %716, %689 %718 = fmul float %704, %714 %719 = fadd float %718, %690 %720 = fmul float %704, %715 %721 = fadd float %720, %691 br label %ENDIF58 ENDIF58: ; preds = %ENDIF55, %IF59 %temp20.10 = phi float [ %717, %IF59 ], [ %689, %ENDIF55 ] %temp21.10 = phi float [ %719, %IF59 ], [ %690, %ENDIF55 ] %temp22.10 = phi float [ %721, %IF59 ], [ %691, %ENDIF55 ] %722 = fmul float %temp20.10, 5.000000e-01 %723 = fadd float %722, %667 %724 = fmul float %temp21.10, 5.000000e-01 %725 = fadd float %724, %669 %726 = fmul float %temp22.10, 5.000000e-01 %727 = fadd float %726, %671 %728 = fmul float %24, 0.000000e+00 %729 = fadd float %728, %48 %730 = fmul float %25, 2.000000e+00 %731 = fadd float %730, %49 %732 = call float @llvm.maxnum.f32(float %729, float %50) %733 = call float @llvm.maxnum.f32(float %731, float %51) %734 = call float @llvm.minnum.f32(float %732, float %52) %735 = call float @llvm.minnum.f32(float %733, float %53) %736 = bitcast float %734 to i32 %737 = bitcast float %735 to i32 %738 = insertelement <4 x i32> undef, i32 %736, i32 0 %739 = insertelement <4 x i32> %738, i32 %737, i32 1 %740 = insertelement <4 x i32> %739, i32 0, i32 2 %741 = bitcast <8 x i32> %37 to <32 x i8> %742 = bitcast <4 x i32> %39 to <16 x i8> %743 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %740, <32 x i8> %741, <16 x i8> %742, i32 2) %744 = extractelement <4 x float> %743, i32 3 %745 = fmul float %744, %26 %746 = call float @llvm.AMDIL.clamp.(float %745, float 0.000000e+00, float 1.000000e+00) %747 = fmul float %746, %33 %748 = fmul float %746, %34 %749 = fmul float %746, %35 %750 = bitcast float %734 to i32 %751 = bitcast float %735 to i32 %752 = insertelement <4 x i32> undef, i32 %750, i32 0 %753 = insertelement <4 x i32> %752, i32 %751, i32 1 %754 = insertelement <4 x i32> %753, i32 0, i32 2 %755 = bitcast <8 x i32> %41 to <32 x i8> %756 = bitcast <4 x i32> %43 to <16 x i8> %757 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %754, <32 x i8> %755, <16 x i8> %756, i32 2) %758 = extractelement <4 x float> %757, i32 1 %759 = fcmp ogt float %758, 0.000000e+00 br i1 %759, label %IF62, label %ENDIF61 IF62: ; preds = %ENDIF58 %760 = extractelement <4 x float> %757, i32 2 %761 = fmul float %758, %760 %762 = call float @llvm.AMDIL.clamp.(float %761, float 0.000000e+00, float 1.000000e+00) %763 = bitcast float %734 to i32 %764 = bitcast float %735 to i32 %765 = insertelement <4 x i32> undef, i32 %763, i32 0 %766 = insertelement <4 x i32> %765, i32 %764, i32 1 %767 = insertelement <4 x i32> %766, i32 0, i32 2 %768 = bitcast <8 x i32> %45 to <32 x i8> %769 = bitcast <4 x i32> %47 to <16 x i8> %770 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %767, <32 x i8> %768, <16 x i8> %769, i32 2) %771 = extractelement <4 x float> %770, i32 0 %772 = extractelement <4 x float> %770, i32 1 %773 = extractelement <4 x float> %770, i32 2 %774 = fmul float %762, %771 %775 = fadd float %774, %747 %776 = fmul float %762, %772 %777 = fadd float %776, %748 %778 = fmul float %762, %773 %779 = fadd float %778, %749 br label %ENDIF61 ENDIF61: ; preds = %ENDIF58, %IF62 %temp16.1 = phi float [ %775, %IF62 ], [ %747, %ENDIF58 ] %temp17.1 = phi float [ %777, %IF62 ], [ %748, %ENDIF58 ] %temp18.1 = phi float [ %779, %IF62 ], [ %749, %ENDIF58 ] %780 = fmul float %temp16.1, 5.000000e-01 %781 = fadd float %780, %723 %782 = fmul float %temp17.1, 5.000000e-01 %783 = fadd float %782, %725 %784 = fmul float %temp18.1, 5.000000e-01 %785 = fadd float %784, %727 %786 = fmul float %781, 1.250000e-01 %787 = fmul float %783, 1.250000e-01 %788 = fmul float %785, 1.250000e-01 %789 = call i32 @llvm.SI.packf16(float %786, float %787) %790 = bitcast i32 %789 to float %791 = call i32 @llvm.SI.packf16(float %788, float 1.000000e+00) %792 = bitcast i32 %791 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %790, float %792, float %790, float %792) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s17, s[12:15], 0x0 ; C2088D00 s_buffer_load_dword s16, s[12:15], 0x1 ; C2080D01 s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04 s_buffer_load_dword s8, s[12:15], 0x8 ; C2040D08 s_buffer_load_dword s10, s[12:15], 0x9 ; C2050D09 s_buffer_load_dword s11, s[12:15], 0xa ; C2058D0A s_buffer_load_dword s12, s[12:15], 0xb ; C2060D0B s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s1, s[20:23], 0xc ; C200950C s_buffer_load_dword s2, s[20:23], 0xd ; C201150D s_buffer_load_dword s3, s[20:23], 0xe ; C201950E v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mov_b32_e32 v0, s10 ; 7E00020A v_mul_f32_e32 v6, s16, v0 ; 100C0010 v_mov_b32_e32 v0, s11 ; 7E00020B v_mul_f32_e32 v7, s17, v0 ; 100E0011 v_mov_b32_e32 v0, s12 ; 7E00020C v_mul_f32_e32 v8, s16, v0 ; 10100010 v_mad_f32 v0, 0, s17, v3 ; D2820000 040C2280 v_max_f32_e32 v0, v5, v0 ; 20000105 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_mad_f32 v1, -2.0, s16, v4 ; D2820001 041020F5 v_max_f32_e32 v1, v6, v1 ; 20020306 v_min_f32_e32 v9, v7, v0 ; 1E120107 v_min_f32_e32 v10, v8, v1 ; 1E140308 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[4:11], s[6:7], 0x10 ; C0C20710 v_mov_b32_e32 v11, 0 ; 7E160280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v0, 8, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[36:43], s[32:35] ; F0900800 01090009 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_add_f32_e64 v2, 0, v0 clamp ; D2060802 00020080 v_mul_f32_e32 v0, s1, v2 ; 10000401 v_mul_f32_e32 v1, s2, v2 ; 10020402 v_mul_f32_e32 v2, s3, v2 ; 10040403 image_sample_l v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[24:31], s[20:23] ; F0900F00 00A60B09 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mov_b32_e32 v11, 0 ; 7E160280 image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[4:11], s[12:15] ; F0900700 00610909 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v9, v12 ; 3E001909 v_mac_f32_e32 v1, v10, v12 ; 3E02190A v_mac_f32_e32 v2, v11, v12 ; 3E04190B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v9, s17, v3 ; 0A120611 v_subrev_f32_e32 v10, s16, v4 ; 0A140810 v_max_f32_e32 v9, v5, v9 ; 20121305 v_max_f32_e32 v10, v6, v10 ; 20141506 v_min_f32_e32 v12, v7, v9 ; 1E181307 v_min_f32_e32 v13, v8, v10 ; 1E1A1508 v_mov_b32_e32 v14, 0 ; 7E1C0280 image_sample_l v9, 8, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[36:43], s[32:35] ; F0900800 0109090C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, s0, v9 ; 10121200 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mul_f32_e32 v10, s1, v9 ; 10141201 v_mul_f32_e32 v11, s2, v9 ; 10161202 v_mul_f32_e32 v9, s3, v9 ; 10121203 image_sample_l v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[24:31], s[20:23] ; F0900F00 00A60E0C s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_mov_b32_e32 v14, 0 ; 7E1C0280 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[4:11], s[12:15] ; F0900700 00610C0C v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v12, v15 ; 3E141F0C v_mac_f32_e32 v11, v13, v15 ; 3E161F0D v_mac_f32_e32 v9, v14, v15 ; 3E121F0E s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v12, 0, s17, v3 ; D282000C 040C2280 v_subrev_f32_e32 v13, s16, v4 ; 0A1A0810 v_max_f32_e32 v12, v5, v12 ; 20181905 v_max_f32_e32 v13, v6, v13 ; 201A1B06 v_min_f32_e32 v15, v7, v12 ; 1E1E1907 v_min_f32_e32 v16, v8, v13 ; 1E201B08 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v12, 8, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[36:43], s[32:35] ; F0900800 01090C0F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, s0, v12 ; 10181800 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v14, s1, v12 ; 101C1801 v_mul_f32_e32 v13, s2, v12 ; 101A1802 v_mul_f32_e32 v12, s3, v12 ; 10181803 image_sample_l v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[24:31], s[20:23] ; F0900F00 00A6110F s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v18, v19, v18 ; 10242513 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[12:15] ; F0900700 00610F0F v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v14, v15, v18 ; 3E1C250F v_mac_f32_e32 v13, v16, v18 ; 3E1A2510 v_mac_f32_e32 v12, v17, v18 ; 3E182511 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v15, s17, v3 ; 061E0611 v_subrev_f32_e32 v16, s16, v4 ; 0A200810 v_max_f32_e32 v15, v5, v15 ; 201E1F05 v_max_f32_e32 v16, v6, v16 ; 20202106 v_min_f32_e32 v18, v7, v15 ; 1E241F07 v_min_f32_e32 v19, v8, v16 ; 1E262108 v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v15, 8, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[36:43], s[32:35] ; F0900800 01090F12 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, s0, v15 ; 101E1E00 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mul_f32_e32 v17, s1, v15 ; 10221E01 v_mul_f32_e32 v16, s2, v15 ; 10201E02 v_mul_f32_e32 v15, s3, v15 ; 101E1E03 image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[24:31], s[20:23] ; F0900F00 00A61412 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[4:11], s[12:15] ; F0900700 00611212 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v17, v18, v21 ; 3E222B12 v_mac_f32_e32 v16, v19, v21 ; 3E202B13 v_mac_f32_e32 v15, v20, v21 ; 3E1E2B14 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v18, -2.0, s17, v3 ; D2820012 040C22F5 v_mad_f32 v19, 0, s16, v4 ; D2820013 04102080 v_max_f32_e32 v18, v5, v18 ; 20242505 v_max_f32_e32 v19, v6, v19 ; 20262706 v_min_f32_e32 v21, v7, v18 ; 1E2A2507 v_min_f32_e32 v22, v8, v19 ; 1E2C2708 v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v18, 8, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[36:43], s[32:35] ; F0900800 01091215 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s0, v18 ; 10242400 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_mul_f32_e32 v20, s1, v18 ; 10282401 v_mul_f32_e32 v19, s2, v18 ; 10262402 v_mul_f32_e32 v18, s3, v18 ; 10242403 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[24:31], s[20:23] ; F0900F00 00A61715 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v24, v25, v24 ; 10303119 v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[4:11], s[12:15] ; F0900700 00611515 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v20, v21, v24 ; 3E283115 v_mac_f32_e32 v19, v22, v24 ; 3E263116 v_mac_f32_e32 v18, v23, v24 ; 3E243117 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v21, s17, v3 ; 0A2A0611 v_mad_f32 v22, 0, s16, v4 ; D2820016 04102080 v_max_f32_e32 v21, v5, v21 ; 202A2B05 v_max_f32_e32 v22, v6, v22 ; 202C2D06 v_min_f32_e32 v24, v7, v21 ; 1E302B07 v_min_f32_e32 v25, v8, v22 ; 1E322D08 v_mov_b32_e32 v26, 0 ; 7E340280 image_sample_l v21, 8, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[32:35] ; F0900800 01091518 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, s0, v21 ; 102A2A00 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_mul_f32_e32 v23, s1, v21 ; 102E2A01 v_mul_f32_e32 v22, s2, v21 ; 102C2A02 v_mul_f32_e32 v21, s3, v21 ; 102A2A03 image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[24:31], s[20:23] ; F0900F00 00A61A18 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mov_b32_e32 v26, 0 ; 7E340280 image_sample_l v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[4:11], s[12:15] ; F0900700 00611818 v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v23, v24, v27 ; 3E2E3718 v_mac_f32_e32 v22, v25, v27 ; 3E2C3719 v_mac_f32_e32 v21, v26, v27 ; 3E2A371A s_or_b64 exec, exec, s[18:19] ; 88FE127E v_max_f32_e32 v24, v5, v3 ; 20300705 v_max_f32_e32 v25, v6, v4 ; 20320906 v_min_f32_e32 v27, v7, v24 ; 1E363107 v_min_f32_e32 v28, v8, v25 ; 1E383308 v_mov_b32_e32 v29, 0 ; 7E3A0280 image_sample_l v24, 8, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0900800 0109181B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, s0, v24 ; 10303000 v_add_f32_e64 v26, 0, v24 clamp ; D206081A 00023080 v_mul_f32_e32 v24, s1, v26 ; 10303401 v_mul_f32_e32 v25, s2, v26 ; 10323402 v_mul_f32_e32 v26, s3, v26 ; 10343403 image_sample_l v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[24:31], s[20:23] ; F0900F00 00A61D1B s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_mov_b32_e32 v29, 0 ; 7E3A0280 image_sample_l v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[4:11], s[12:15] ; F0900700 00611B1B v_add_f32_e64 v30, 0, v30 clamp ; D206081E 00023C80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v24, v27, v30 ; 3E303D1B v_mac_f32_e32 v25, v28, v30 ; 3E323D1C v_mac_f32_e32 v26, v29, v30 ; 3E343D1D s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v27, s17, v3 ; 06360611 v_mad_f32 v28, 0, s16, v4 ; D282001C 04102080 v_max_f32_e32 v27, v5, v27 ; 20363705 v_max_f32_e32 v28, v6, v28 ; 20383906 v_min_f32_e32 v30, v7, v27 ; 1E3C3707 v_min_f32_e32 v31, v8, v28 ; 1E3E3908 v_mov_b32_e32 v32, 0 ; 7E400280 image_sample_l v27, 8, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0900800 01091B1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, s0, v27 ; 10363600 v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 v_mul_f32_e32 v29, s1, v27 ; 103A3601 v_mul_f32_e32 v28, s2, v27 ; 10383602 v_mul_f32_e32 v27, s3, v27 ; 10363603 image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[24:31], s[20:23] ; F0900F00 00A6201E s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v33, v34, v33 ; 10424322 v_mov_b32_e32 v32, 0 ; 7E400280 image_sample_l v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[4:11], s[12:15] ; F0900700 00611E1E v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v29, v30, v33 ; 3E3A431E v_mac_f32_e32 v28, v31, v33 ; 3E38431F v_mac_f32_e32 v27, v32, v33 ; 3E364320 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v30, 2.0, s17, v3 ; D282001E 040C22F4 v_mad_f32 v31, 0, s16, v4 ; D282001F 04102080 v_max_f32_e32 v30, v5, v30 ; 203C3D05 v_max_f32_e32 v31, v6, v31 ; 203E3F06 v_min_f32_e32 v33, v7, v30 ; 1E423D07 v_min_f32_e32 v34, v8, v31 ; 1E443F08 v_mov_b32_e32 v35, 0 ; 7E460280 image_sample_l v30, 8, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[36:43], s[32:35] ; F0900800 01091E21 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, s0, v30 ; 103C3C00 v_add_f32_e64 v30, 0, v30 clamp ; D206081E 00023C80 v_mul_f32_e32 v32, s1, v30 ; 10403C01 v_mul_f32_e32 v31, s2, v30 ; 103E3C02 v_mul_f32_e32 v30, s3, v30 ; 103C3C03 image_sample_l v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[24:31], s[20:23] ; F0900F00 00A62321 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v36 ; 7C024880 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v36, v37, v36 ; 10484925 v_mov_b32_e32 v35, 0 ; 7E460280 image_sample_l v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[4:11], s[12:15] ; F0900700 00612121 v_add_f32_e64 v36, 0, v36 clamp ; D2060824 00024880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v32, v33, v36 ; 3E404921 v_mac_f32_e32 v31, v34, v36 ; 3E3E4922 v_mac_f32_e32 v30, v35, v36 ; 3E3C4923 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v33, s17, v3 ; 0A420611 v_add_f32_e32 v34, s16, v4 ; 06440810 v_max_f32_e32 v33, v5, v33 ; 20424305 v_max_f32_e32 v34, v6, v34 ; 20444506 v_min_f32_e32 v36, v7, v33 ; 1E484307 v_min_f32_e32 v37, v8, v34 ; 1E4A4508 v_mov_b32_e32 v38, 0 ; 7E4C0280 image_sample_l v33, 8, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[36:43], s[32:35] ; F0900800 01092124 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, s0, v33 ; 10424200 v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 v_mul_f32_e32 v35, s1, v33 ; 10464201 v_mul_f32_e32 v34, s2, v33 ; 10444202 v_mul_f32_e32 v33, s3, v33 ; 10424203 image_sample_l v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[24:31], s[20:23] ; F0900F00 00A62624 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v39 ; 7C024E80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v39, v40, v39 ; 104E4F28 v_mov_b32_e32 v38, 0 ; 7E4C0280 image_sample_l v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[4:11], s[12:15] ; F0900700 00612424 v_add_f32_e64 v39, 0, v39 clamp ; D2060827 00024E80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v35, v36, v39 ; 3E464F24 v_mac_f32_e32 v34, v37, v39 ; 3E444F25 v_mac_f32_e32 v33, v38, v39 ; 3E424F26 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v36, 0, s17, v3 ; D2820024 040C2280 v_add_f32_e32 v37, s16, v4 ; 064A0810 v_max_f32_e32 v36, v5, v36 ; 20484905 v_max_f32_e32 v37, v6, v37 ; 204A4B06 v_min_f32_e32 v39, v7, v36 ; 1E4E4907 v_min_f32_e32 v40, v8, v37 ; 1E504B08 v_mov_b32_e32 v41, 0 ; 7E520280 image_sample_l v36, 8, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[36:43], s[32:35] ; F0900800 01092427 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, s0, v36 ; 10484800 v_add_f32_e64 v36, 0, v36 clamp ; D2060824 00024880 v_mul_f32_e32 v38, s1, v36 ; 104C4801 v_mul_f32_e32 v37, s2, v36 ; 104A4802 v_mul_f32_e32 v36, s3, v36 ; 10484803 image_sample_l v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[24:31], s[20:23] ; F0900F00 00A62927 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v42 ; 7C025480 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v42, v43, v42 ; 1054552B v_mov_b32_e32 v41, 0 ; 7E520280 image_sample_l v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[4:11], s[12:15] ; F0900700 00612727 v_add_f32_e64 v42, 0, v42 clamp ; D206082A 00025480 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v38, v39, v42 ; 3E4C5527 v_mac_f32_e32 v37, v40, v42 ; 3E4A5528 v_mac_f32_e32 v36, v41, v42 ; 3E485529 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v39, s17, v3 ; 064E0611 v_add_f32_e32 v40, s16, v4 ; 06500810 v_max_f32_e32 v39, v5, v39 ; 204E4F05 v_max_f32_e32 v40, v6, v40 ; 20505106 v_min_f32_e32 v42, v7, v39 ; 1E544F07 v_min_f32_e32 v43, v8, v40 ; 1E565108 v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v39, 8, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[36:43], s[32:35] ; F0900800 0109272A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v39, s0, v39 ; 104E4E00 v_add_f32_e64 v39, 0, v39 clamp ; D2060827 00024E80 v_mul_f32_e32 v41, s1, v39 ; 10524E01 v_mul_f32_e32 v40, s2, v39 ; 10504E02 v_mul_f32_e32 v39, s3, v39 ; 104E4E03 image_sample_l v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[20:23] ; F0900F00 00A62C2A s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v45, v46, v45 ; 105A5B2E v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[4:11], s[12:15] ; F0900700 00612A2A v_add_f32_e64 v45, 0, v45 clamp ; D206082D 00025A80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v41, v42, v45 ; 3E525B2A v_mac_f32_e32 v40, v43, v45 ; 3E505B2B v_mac_f32_e32 v39, v44, v45 ; 3E4E5B2C s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mac_f32_e64 v3, 0, s17 ; D23E0003 00002280 v_mac_f32_e64 v4, 2.0, s16 ; D23E0004 000020F4 v_max_f32_e32 v3, v5, v3 ; 20060705 v_max_f32_e32 v4, v6, v4 ; 20080906 v_min_f32_e32 v42, v7, v3 ; 1E540707 v_min_f32_e32 v43, v8, v4 ; 1E560908 v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v3, 8, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[36:43], s[32:35] ; F0900800 0109032A image_sample_l v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[20:23] ; F0900F00 00A62C2A s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v5, s1, v3 ; 100A0601 v_mul_f32_e32 v4, s2, v3 ; 10080602 v_mul_f32_e32 v3, s3, v3 ; 10060603 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 s_and_saveexec_b64 s[0:1], vcc ; BE80246A s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E v_mul_f32_e32 v6, v46, v45 ; 100C5B2E v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[4:11], s[12:15] ; F0900700 00612A2A v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, v42, v6 ; 3E0A0D2A v_mac_f32_e32 v4, v43, v6 ; 3E080D2B v_mac_f32_e32 v3, v44, v6 ; 3E060D2C s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mac_f32_e32 v0, 0.5, v10 ; 3E0014F0 v_mul_f32_e32 v1, 0.5, v1 ; 100202F0 v_mac_f32_e32 v1, 0.5, v11 ; 3E0216F0 v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 v_mac_f32_e32 v2, 0.5, v9 ; 3E0412F0 v_mov_b32_e32 v6, 0x3f400000 ; 7E0C02FF 3F400000 v_mac_f32_e32 v0, v6, v14 ; 3E001D06 v_mac_f32_e32 v1, v6, v13 ; 3E021B06 v_mac_f32_e32 v2, v6, v12 ; 3E041906 v_mac_f32_e32 v0, 0.5, v17 ; 3E0022F0 v_mac_f32_e32 v1, 0.5, v16 ; 3E0220F0 v_mac_f32_e32 v2, 0.5, v15 ; 3E041EF0 v_mac_f32_e32 v0, 0.5, v20 ; 3E0028F0 v_mac_f32_e32 v1, 0.5, v19 ; 3E0226F0 v_mac_f32_e32 v2, 0.5, v18 ; 3E0424F0 v_mac_f32_e32 v0, v6, v23 ; 3E002F06 v_mac_f32_e32 v1, v6, v22 ; 3E022D06 v_mac_f32_e32 v2, v6, v21 ; 3E042B06 v_add_f32_e32 v0, v24, v0 ; 06000118 v_add_f32_e32 v1, v25, v1 ; 06020319 v_add_f32_e32 v2, v26, v2 ; 0604051A v_mac_f32_e32 v0, v6, v29 ; 3E003B06 v_mac_f32_e32 v1, v6, v28 ; 3E023906 v_mac_f32_e32 v2, v6, v27 ; 3E043706 v_mac_f32_e32 v0, 0.5, v32 ; 3E0040F0 v_mac_f32_e32 v1, 0.5, v31 ; 3E023EF0 v_mac_f32_e32 v2, 0.5, v30 ; 3E043CF0 v_mac_f32_e32 v0, 0.5, v35 ; 3E0046F0 v_mac_f32_e32 v1, 0.5, v34 ; 3E0244F0 v_mac_f32_e32 v2, 0.5, v33 ; 3E0442F0 v_mac_f32_e32 v0, v6, v38 ; 3E004D06 v_mac_f32_e32 v1, v6, v37 ; 3E024B06 v_mac_f32_e32 v2, v6, v36 ; 3E044906 v_mac_f32_e32 v0, 0.5, v41 ; 3E0052F0 v_mac_f32_e32 v1, 0.5, v40 ; 3E0250F0 v_mac_f32_e32 v2, 0.5, v39 ; 3E044EF0 v_mac_f32_e32 v0, 0.5, v5 ; 3E000AF0 v_mac_f32_e32 v1, 0.5, v4 ; 3E0208F0 v_mac_f32_e32 v2, 0.5, v3 ; 3E0406F0 v_mov_b32_e32 v3, 0x3e000000 ; 7E0602FF 3E000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 48 Code Size: 2100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {1, 416, 424, 0} IMM[1] FLT32 { 0.0000, -3.0962, -1.2769, 1.2769} IMM[2] FLT32 { 3.0962, 0.0198, 0.3206, 0.3192} 0: ADD TEMP[0].xy, CONST[2][26].xyyy, CONST[2][26].zwww 1: MOV TEMP[1].x, CONST[2][26].xxxx 2: MOV TEMP[1].y, CONST[2][26].yyyy 3: MOV TEMP[1].z, TEMP[0].xxxx 4: MOV TEMP[1].w, TEMP[0].yyyy 5: MUL TEMP[0].xz, TEMP[1], CONST[1][0].xyxy 6: MOV TEMP[1].y, IMM[1].xxxx 7: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[1].yyyy 8: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 9: MOV TEMP[2].y, TEMP[1].yyyy 10: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 11: MIN TEMP[2].x, TEMP[1].xxxx, TEMP[0].zzzz 12: MOV TEMP[1].y, IMM[1].xxxx 13: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[1].zzzz 14: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 15: MOV TEMP[3].y, TEMP[1].yyyy 16: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 17: MIN TEMP[3].x, TEMP[1].xxxx, TEMP[0].zzzz 18: MOV TEMP[1].y, IN[0].yyyy 19: MAX TEMP[4].x, IN[0].xxxx, TEMP[0].xxxx 20: MIN TEMP[1].x, TEMP[4].xxxx, TEMP[0].zzzz 21: MOV TEMP[4].y, IMM[1].xxxx 22: MUL TEMP[4].x, CONST[1][0].xxxx, IMM[1].wwww 23: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 24: MOV TEMP[5].y, TEMP[4].yyyy 25: MAX TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 26: MIN TEMP[5].x, TEMP[4].xxxx, TEMP[0].zzzz 27: MOV TEMP[4].y, IMM[1].xxxx 28: MUL TEMP[4].x, CONST[1][0].xxxx, IMM[2].xxxx 29: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 30: MOV TEMP[6].y, TEMP[4].yyyy 31: MAX TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 32: MIN TEMP[6].x, TEMP[4].xxxx, TEMP[0].zzzz 33: MOV TEMP[0].xy, TEMP[6].xyyy 34: TEX TEMP[0], TEMP[0], SAMP[0], 2D 35: MOV TEMP[4].xy, TEMP[5].xyyy 36: TEX TEMP[4], TEMP[4], SAMP[0], 2D 37: MOV TEMP[1].xy, TEMP[1].xyyy 38: TEX TEMP[1], TEMP[1], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[2].xyyy 40: TEX TEMP[2], TEMP[2], SAMP[0], 2D 41: MOV TEMP[3].xy, TEMP[3].xyyy 42: TEX TEMP[3], TEMP[3], SAMP[0], 2D 43: MUL TEMP[3], IMM[2].zzzz, TEMP[3] 44: MAD TEMP[2], IMM[2].yyyy, TEMP[2], TEMP[3] 45: MAD TEMP[1], IMM[2].wwww, TEMP[1], TEMP[2] 46: MAD TEMP[1], IMM[2].zzzz, TEMP[4], TEMP[1] 47: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 48: MOV OUT[0], TEMP[0] 49: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 416) %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 424) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fadd float %27, %28 %36 = fmul float %27, %24 %37 = fmul float %35, %24 %38 = fmul float %24, 0xC008C50F00000000 %39 = fadd float %38, %33 %40 = fadd float %34, 0.000000e+00 %41 = call float @llvm.maxnum.f32(float %39, float %36) %42 = call float @llvm.minnum.f32(float %41, float %37) %43 = fmul float %24, 0xBFF46E19C0000000 %44 = fadd float %43, %33 %45 = fadd float %34, 0.000000e+00 %46 = call float @llvm.maxnum.f32(float %44, float %36) %47 = call float @llvm.minnum.f32(float %46, float %37) %48 = call float @llvm.maxnum.f32(float %33, float %36) %49 = call float @llvm.minnum.f32(float %48, float %37) %50 = fmul float %24, 0x3FF46E19C0000000 %51 = fadd float %50, %33 %52 = fadd float %34, 0.000000e+00 %53 = call float @llvm.maxnum.f32(float %51, float %36) %54 = call float @llvm.minnum.f32(float %53, float %37) %55 = fmul float %24, 0x4008C50F00000000 %56 = fadd float %55, %33 %57 = fadd float %34, 0.000000e+00 %58 = call float @llvm.maxnum.f32(float %56, float %36) %59 = call float @llvm.minnum.f32(float %58, float %37) %60 = bitcast float %59 to i32 %61 = bitcast float %57 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %30 to <32 x i8> %65 = bitcast <4 x i32> %32 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %54 to i32 %72 = bitcast float %52 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %30 to <32 x i8> %76 = bitcast <4 x i32> %32 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = bitcast float %49 to i32 %83 = bitcast float %34 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = bitcast <8 x i32> %30 to <32 x i8> %87 = bitcast <4 x i32> %32 to <16 x i8> %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %86, <16 x i8> %87, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = extractelement <4 x float> %88, i32 3 %93 = bitcast float %42 to i32 %94 = bitcast float %40 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %30 to <32 x i8> %98 = bitcast <4 x i32> %32 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = bitcast float %47 to i32 %105 = bitcast float %45 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <8 x i32> %30 to <32 x i8> %109 = bitcast <4 x i32> %32 to <16 x i8> %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %108, <16 x i8> %109, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %111, 0x3FD4841240000000 %116 = fmul float %112, 0x3FD4841240000000 %117 = fmul float %113, 0x3FD4841240000000 %118 = fmul float %114, 0x3FD4841240000000 %119 = fmul float %100, 0x3F944D8780000000 %120 = fadd float %119, %115 %121 = fmul float %101, 0x3F944D8780000000 %122 = fadd float %121, %116 %123 = fmul float %102, 0x3F944D8780000000 %124 = fadd float %123, %117 %125 = fmul float %103, 0x3F944D8780000000 %126 = fadd float %125, %118 %127 = fmul float %89, 0x3FD46E2A80000000 %128 = fadd float %127, %120 %129 = fmul float %90, 0x3FD46E2A80000000 %130 = fadd float %129, %122 %131 = fmul float %91, 0x3FD46E2A80000000 %132 = fadd float %131, %124 %133 = fmul float %92, 0x3FD46E2A80000000 %134 = fadd float %133, %126 %135 = fmul float %78, 0x3FD4841240000000 %136 = fadd float %135, %128 %137 = fmul float %79, 0x3FD4841240000000 %138 = fadd float %137, %130 %139 = fmul float %80, 0x3FD4841240000000 %140 = fadd float %139, %132 %141 = fmul float %81, 0x3FD4841240000000 %142 = fadd float %141, %134 %143 = fmul float %67, 0x3F944D8780000000 %144 = fadd float %143, %136 %145 = fmul float %68, 0x3F944D8780000000 %146 = fadd float %145, %138 %147 = fmul float %69, 0x3F944D8780000000 %148 = fadd float %147, %140 %149 = fmul float %70, 0x3F944D8780000000 %150 = fadd float %149, %142 %151 = call i32 @llvm.SI.packf16(float %144, float %146) %152 = bitcast i32 %151 to float %153 = call i32 @llvm.SI.packf16(float %148, float %150) %154 = bitcast i32 %153 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %152, float %154, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[12:15], 0x0 ; C2020D00 s_buffer_load_dword s5, s[0:3], 0x6a ; C202816A s_buffer_load_dword s0, s[0:3], 0x68 ; C2000168 v_mov_b32_e32 v2, 0xc0462878 ; 7E0402FF C0462878 v_mov_b32_e32 v3, 0xbfa370ce ; 7E0602FF BFA370CE v_mov_b32_e32 v4, 0x3fa370ce ; 7E0802FF 3FA370CE v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_add_f32_e32 v0, s0, v0 ; 06000000 v_mov_b32_e32 v1, s4 ; 7E020204 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mad_f32 v2, s4, v2, v5 ; D2820002 04160404 v_mad_f32 v3, s4, v3, v5 ; D2820003 04160604 v_mad_f32 v4, s4, v4, v5 ; D2820004 04160804 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_max_f32_e32 v2, v1, v2 ; 20040501 v_max_f32_e32 v6, v1, v5 ; 200C0B01 v_mov_b32_e32 v8, 0x40462878 ; 7E1002FF 40462878 v_mac_f32_e32 v5, s4, v8 ; 3E0A1004 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_max_f32_e32 v3, v1, v3 ; 20060701 v_max_f32_e32 v4, v1, v4 ; 20080901 v_max_f32_e32 v1, v1, v5 ; 20020B01 v_add_f32_e32 v9, 0, v7 ; 06120E80 v_min_f32_e32 v8, v0, v1 ; 1E100300 v_min_f32_e32 v1, v0, v2 ; 1E020500 v_min_f32_e32 v10, v0, v3 ; 1E140700 v_min_f32_e32 v3, v0, v4 ; 1E060900 v_min_f32_e32 v6, v0, v6 ; 1E0C0D00 v_mov_b32_e32 v4, v9 ; 7E080309 v_mov_b32_e32 v2, v9 ; 7E040309 v_mov_b32_e32 v11, v9 ; 7E160309 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[0:7], s[8:11] ; F0800F00 00400C08 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[0:7], s[8:11] ; F0800F00 00401003 image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400306 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[0:7], s[8:11] ; F0800F00 00401401 image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[0:7], s[8:11] ; F0800F00 0040070A v_mov_b32_e32 v0, 0x3ea42092 ; 7E0002FF 3EA42092 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v7 ; 10020F00 v_mul_f32_e32 v2, v0, v8 ; 10041100 v_mul_f32_e32 v7, v0, v9 ; 100E1300 v_mul_f32_e32 v8, v0, v10 ; 10101500 v_mov_b32_e32 v9, 0x3ca26c3c ; 7E1202FF 3CA26C3C v_mac_f32_e32 v1, v9, v20 ; 3E022909 v_mac_f32_e32 v2, v9, v21 ; 3E042B09 v_mac_f32_e32 v7, v9, v22 ; 3E0E2D09 v_mac_f32_e32 v8, v9, v23 ; 3E102F09 v_mov_b32_e32 v10, 0x3ea37154 ; 7E1402FF 3EA37154 v_mac_f32_e32 v1, v10, v3 ; 3E02070A v_mac_f32_e32 v2, v10, v4 ; 3E04090A v_mac_f32_e32 v7, v10, v5 ; 3E0E0B0A v_mac_f32_e32 v8, v10, v6 ; 3E100D0A v_mac_f32_e32 v1, v0, v16 ; 3E022100 v_mac_f32_e32 v2, v0, v17 ; 3E042300 v_mac_f32_e32 v7, v0, v18 ; 3E0E2500 v_mac_f32_e32 v8, v0, v19 ; 3E102700 v_mac_f32_e32 v1, v9, v12 ; 3E021909 v_mac_f32_e32 v2, v9, v13 ; 3E041B09 v_mac_f32_e32 v7, v9, v14 ; 3E0E1D09 v_mac_f32_e32 v8, v9, v15 ; 3E101F09 v_cvt_pkrtz_f16_f32_e32 v0, v1, v2 ; 5E000501 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 368 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {1, 416, 424, 0} IMM[1] FLT32 { 0.0000, -3.0962, -1.2769, 1.2769} IMM[2] FLT32 { 3.0962, 0.0198, 0.3206, 0.3192} 0: ADD TEMP[0].xy, CONST[2][26].xyyy, CONST[2][26].zwww 1: MOV TEMP[1].x, CONST[2][26].xxxx 2: MOV TEMP[1].y, CONST[2][26].yyyy 3: MOV TEMP[1].z, TEMP[0].xxxx 4: MOV TEMP[1].w, TEMP[0].yyyy 5: MUL TEMP[0].yw, TEMP[1], CONST[1][0].xyxy 6: MOV TEMP[1].x, IMM[1].xxxx 7: MUL TEMP[2].x, CONST[1][0].yyyy, IMM[1].yyyy 8: MOV TEMP[1].y, TEMP[2].xxxx 9: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 10: MOV TEMP[2].x, TEMP[1].xxxx 11: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[0].yyyy 12: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww 13: MOV TEMP[2].y, TEMP[1].xxxx 14: MOV TEMP[1].x, IMM[1].xxxx 15: MUL TEMP[3].x, CONST[1][0].yyyy, IMM[1].zzzz 16: MOV TEMP[1].y, TEMP[3].xxxx 17: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 18: MOV TEMP[3].x, TEMP[1].xxxx 19: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[0].yyyy 20: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww 21: MOV TEMP[3].y, TEMP[1].xxxx 22: MOV TEMP[1].x, IN[0].xxxx 23: MAX TEMP[4].x, IN[0].yyyy, TEMP[0].yyyy 24: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[0].wwww 25: MOV TEMP[1].y, TEMP[4].xxxx 26: MOV TEMP[4].x, IMM[1].xxxx 27: MUL TEMP[5].x, CONST[1][0].yyyy, IMM[1].wwww 28: MOV TEMP[4].y, TEMP[5].xxxx 29: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 30: MOV TEMP[5].x, TEMP[4].xxxx 31: MAX TEMP[4].x, TEMP[4].yyyy, TEMP[0].yyyy 32: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[0].wwww 33: MOV TEMP[5].y, TEMP[4].xxxx 34: MOV TEMP[4].x, IMM[1].xxxx 35: MUL TEMP[6].x, CONST[1][0].yyyy, IMM[2].xxxx 36: MOV TEMP[4].y, TEMP[6].xxxx 37: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 38: MOV TEMP[6].x, TEMP[4].xxxx 39: MAX TEMP[4].x, TEMP[4].yyyy, TEMP[0].yyyy 40: MIN TEMP[0].x, TEMP[4].xxxx, TEMP[0].wwww 41: MOV TEMP[6].y, TEMP[0].xxxx 42: MOV TEMP[0].xy, TEMP[6].xyyy 43: TEX TEMP[0], TEMP[0], SAMP[0], 2D 44: MOV TEMP[4].xy, TEMP[5].xyyy 45: TEX TEMP[4], TEMP[4], SAMP[0], 2D 46: MOV TEMP[1].xy, TEMP[1].xyyy 47: TEX TEMP[1], TEMP[1], SAMP[0], 2D 48: MOV TEMP[2].xy, TEMP[2].xyyy 49: TEX TEMP[2], TEMP[2], SAMP[0], 2D 50: MOV TEMP[3].xy, TEMP[3].xyyy 51: TEX TEMP[3], TEMP[3], SAMP[0], 2D 52: MUL TEMP[3], IMM[2].zzzz, TEMP[3] 53: MAD TEMP[2], IMM[2].yyyy, TEMP[2], TEMP[3] 54: MAD TEMP[1], IMM[2].wwww, TEMP[1], TEMP[2] 55: MAD TEMP[1], IMM[2].zzzz, TEMP[4], TEMP[1] 56: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 57: MOV OUT[0], TEMP[0] 58: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 420) %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 428) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fadd float %27, %28 %36 = fmul float %27, %24 %37 = fmul float %35, %24 %38 = fmul float %24, 0xC008C50F00000000 %39 = fadd float %33, 0.000000e+00 %40 = fadd float %38, %34 %41 = call float @llvm.maxnum.f32(float %40, float %36) %42 = call float @llvm.minnum.f32(float %41, float %37) %43 = fmul float %24, 0xBFF46E19C0000000 %44 = fadd float %33, 0.000000e+00 %45 = fadd float %43, %34 %46 = call float @llvm.maxnum.f32(float %45, float %36) %47 = call float @llvm.minnum.f32(float %46, float %37) %48 = call float @llvm.maxnum.f32(float %34, float %36) %49 = call float @llvm.minnum.f32(float %48, float %37) %50 = fmul float %24, 0x3FF46E19C0000000 %51 = fadd float %33, 0.000000e+00 %52 = fadd float %50, %34 %53 = call float @llvm.maxnum.f32(float %52, float %36) %54 = call float @llvm.minnum.f32(float %53, float %37) %55 = fmul float %24, 0x4008C50F00000000 %56 = fadd float %33, 0.000000e+00 %57 = fadd float %55, %34 %58 = call float @llvm.maxnum.f32(float %57, float %36) %59 = call float @llvm.minnum.f32(float %58, float %37) %60 = bitcast float %56 to i32 %61 = bitcast float %59 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %30 to <32 x i8> %65 = bitcast <4 x i32> %32 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %51 to i32 %72 = bitcast float %54 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %30 to <32 x i8> %76 = bitcast <4 x i32> %32 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = bitcast float %33 to i32 %83 = bitcast float %49 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = bitcast <8 x i32> %30 to <32 x i8> %87 = bitcast <4 x i32> %32 to <16 x i8> %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %86, <16 x i8> %87, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = extractelement <4 x float> %88, i32 3 %93 = bitcast float %39 to i32 %94 = bitcast float %42 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %30 to <32 x i8> %98 = bitcast <4 x i32> %32 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = bitcast float %44 to i32 %105 = bitcast float %47 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <8 x i32> %30 to <32 x i8> %109 = bitcast <4 x i32> %32 to <16 x i8> %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %108, <16 x i8> %109, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %111, 0x3FD4841240000000 %116 = fmul float %112, 0x3FD4841240000000 %117 = fmul float %113, 0x3FD4841240000000 %118 = fmul float %114, 0x3FD4841240000000 %119 = fmul float %100, 0x3F944D8780000000 %120 = fadd float %119, %115 %121 = fmul float %101, 0x3F944D8780000000 %122 = fadd float %121, %116 %123 = fmul float %102, 0x3F944D8780000000 %124 = fadd float %123, %117 %125 = fmul float %103, 0x3F944D8780000000 %126 = fadd float %125, %118 %127 = fmul float %89, 0x3FD46E2A80000000 %128 = fadd float %127, %120 %129 = fmul float %90, 0x3FD46E2A80000000 %130 = fadd float %129, %122 %131 = fmul float %91, 0x3FD46E2A80000000 %132 = fadd float %131, %124 %133 = fmul float %92, 0x3FD46E2A80000000 %134 = fadd float %133, %126 %135 = fmul float %78, 0x3FD4841240000000 %136 = fadd float %135, %128 %137 = fmul float %79, 0x3FD4841240000000 %138 = fadd float %137, %130 %139 = fmul float %80, 0x3FD4841240000000 %140 = fadd float %139, %132 %141 = fmul float %81, 0x3FD4841240000000 %142 = fadd float %141, %134 %143 = fmul float %67, 0x3F944D8780000000 %144 = fadd float %143, %136 %145 = fmul float %68, 0x3F944D8780000000 %146 = fadd float %145, %138 %147 = fmul float %69, 0x3F944D8780000000 %148 = fadd float %147, %140 %149 = fmul float %70, 0x3F944D8780000000 %150 = fadd float %149, %142 %151 = call i32 @llvm.SI.packf16(float %144, float %146) %152 = bitcast i32 %151 to float %153 = call i32 @llvm.SI.packf16(float %148, float %150) %154 = bitcast i32 %153 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %152, float %154, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 v_mov_b32_e32 v2, 0xc0462878 ; 7E0402FF C0462878 v_mov_b32_e32 v3, 0xbfa370ce ; 7E0602FF BFA370CE v_mov_b32_e32 v4, 0x3fa370ce ; 7E0802FF 3FA370CE v_mov_b32_e32 v5, 0x40462878 ; 7E0A02FF 40462878 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[12:15], 0x1 ; C2060D01 s_buffer_load_dword s13, s[0:3], 0x69 ; C2068169 s_buffer_load_dword s14, s[0:3], 0x6b ; C207016B v_add_f32_e32 v8, 0, v6 ; 06100C80 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v2, v0 ; D2820001 0402040C v_mov_b32_e32 v2, s12 ; 7E04020C v_mul_f32_e32 v2, s13, v2 ; 1004040D v_mov_b32_e32 v7, s14 ; 7E0E020E v_add_f32_e32 v7, s13, v7 ; 060E0E0D v_mad_f32 v3, s12, v3, v0 ; D2820003 0402060C v_mad_f32 v4, s12, v4, v0 ; D2820004 0402080C v_max_f32_e32 v9, v2, v0 ; 20120102 v_mac_f32_e32 v0, s12, v5 ; 3E000A0C v_mul_f32_e32 v5, s12, v7 ; 100A0E0C v_min_f32_e32 v7, v5, v9 ; 1E0E1305 v_max_f32_e32 v1, v2, v1 ; 20020302 v_max_f32_e32 v3, v2, v3 ; 20060702 v_max_f32_e32 v4, v2, v4 ; 20080902 v_max_f32_e32 v0, v2, v0 ; 20000102 v_min_f32_e32 v9, v5, v0 ; 1E120105 v_min_f32_e32 v0, v5, v1 ; 1E000305 v_min_f32_e32 v1, v5, v3 ; 1E020705 v_min_f32_e32 v2, v5, v4 ; 1E040905 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[0:7], s[8:11] ; F0800F00 00400A08 v_mov_b32_e32 v9, v2 ; 7E120302 image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[0:7], s[8:11] ; F0800F00 00400208 v_mov_b32_e32 v9, v0 ; 7E120300 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400E06 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[0:7], s[8:11] ; F0800F00 00401208 v_mov_b32_e32 v9, v1 ; 7E120301 image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[0:7], s[8:11] ; F0800F00 00400608 v_mov_b32_e32 v0, 0x3ea42092 ; 7E0002FF 3EA42092 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v6 ; 10020D00 v_mul_f32_e32 v6, v0, v7 ; 100C0F00 v_mul_f32_e32 v7, v0, v8 ; 100E1100 v_mul_f32_e32 v8, v0, v9 ; 10101300 v_mov_b32_e32 v9, 0x3ca26c3c ; 7E1202FF 3CA26C3C v_mac_f32_e32 v1, v9, v18 ; 3E022509 v_mac_f32_e32 v6, v9, v19 ; 3E0C2709 v_mac_f32_e32 v7, v9, v20 ; 3E0E2909 v_mac_f32_e32 v8, v9, v21 ; 3E102B09 v_mov_b32_e32 v18, 0x3ea37154 ; 7E2402FF 3EA37154 v_mac_f32_e32 v1, v18, v14 ; 3E021D12 v_mac_f32_e32 v6, v18, v15 ; 3E0C1F12 v_mac_f32_e32 v7, v18, v16 ; 3E0E2112 v_mac_f32_e32 v8, v18, v17 ; 3E102312 v_mac_f32_e32 v1, v0, v2 ; 3E020500 v_mac_f32_e32 v6, v0, v3 ; 3E0C0700 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 v_mac_f32_e32 v8, v0, v5 ; 3E100B00 v_mac_f32_e32 v1, v9, v10 ; 3E021509 v_mac_f32_e32 v6, v9, v11 ; 3E0C1709 v_mac_f32_e32 v7, v9, v12 ; 3E0E1909 v_mac_f32_e32 v8, v9, v13 ; 3E101B09 v_cvt_pkrtz_f16_f32_e32 v0, v1, v6 ; 5E000D01 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..6], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.0000} IMM[1] INT32 {1, 0, 2, 0} IMM[2] UINT32 {1, 0, 16, 32} IMM[3] UINT32 {48, 4, 20, 36} IMM[4] UINT32 {52, 8, 24, 40} IMM[5] UINT32 {56, 12, 28, 44} IMM[6] UINT32 {60, 0, 0, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].w, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[0].z, IN[0].zzzz 7: MOV TEMP[2].xy, IN[1].xyyy 8: MOV TEMP[2].w, IMM[0].xxxx 9: TXL TEMP[2], TEMP[2], SAMP[0], 2D 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].xy, IN[1].xyyy 12: MOV TEMP[3].w, IMM[0].xxxx 13: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].xyx 14: DP4 TEMP[3].x, TEMP[0], TEMP[3] 15: MOV TEMP[4].xy, IN[1].xyyy 16: MOV TEMP[4].w, IMM[0].xxxx 17: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].zyz 18: DP4 TEMP[0].x, TEMP[0], TEMP[4] 19: MOV TEMP[4].x, TEMP[2].xxxx 20: MOV TEMP[4].y, TEMP[3].xxxx 21: MOV TEMP[4].z, TEMP[0].xxxx 22: MOV TEMP[5].w, IMM[0].yyyy 23: MOV TEMP[5].x, TEMP[2].xxxx 24: MOV TEMP[5].y, TEMP[3].xxxx 25: MOV TEMP[5].z, TEMP[0].xxxx 26: MOV TEMP[0].x, CONST[2][0].xxxx 27: MOV TEMP[0].y, CONST[2][1].xxxx 28: MOV TEMP[0].z, CONST[2][2].xxxx 29: MOV TEMP[0].w, CONST[2][3].xxxx 30: DP4 TEMP[0].x, TEMP[5], TEMP[0] 31: MOV TEMP[2].x, CONST[2][0].yyyy 32: MOV TEMP[2].y, CONST[2][1].yyyy 33: MOV TEMP[2].z, CONST[2][2].yyyy 34: MOV TEMP[2].w, CONST[2][3].yyyy 35: DP4 TEMP[2].x, TEMP[5], TEMP[2] 36: MOV TEMP[3].x, CONST[2][0].zzzz 37: MOV TEMP[3].y, CONST[2][1].zzzz 38: MOV TEMP[3].z, CONST[2][2].zzzz 39: MOV TEMP[3].w, CONST[2][3].zzzz 40: DP4 TEMP[3].x, TEMP[5], TEMP[3] 41: MOV TEMP[6].x, CONST[2][0].wwww 42: MOV TEMP[6].y, CONST[2][1].wwww 43: MOV TEMP[6].z, CONST[2][2].wwww 44: MOV TEMP[6].w, CONST[2][3].wwww 45: DP4 TEMP[5].x, TEMP[5], TEMP[6] 46: MOV TEMP[6].x, TEMP[0].xxxx 47: MOV TEMP[6].y, TEMP[2].xxxx 48: MOV TEMP[6].z, TEMP[3].xxxx 49: MOV TEMP[6].w, TEMP[5].xxxx 50: MOV TEMP[0].x, TEMP[0].xxxx 51: MOV TEMP[0].y, TEMP[2].xxxx 52: MOV TEMP[0].z, TEMP[5].xxxx 53: MOV TEMP[0].w, TEMP[5].xxxx 54: RCP TEMP[2].x, TEMP[5].xxxx 55: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 56: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 57: UIF TEMP[2].xxxx :0 58: MOV TEMP[0], TEMP[0] 59: ELSE :0 60: MOV TEMP[0], TEMP[6] 61: ENDIF 62: MOV TEMP[2].xw, TEMP[0].xxxw 63: MOV TEMP[3].x, -TEMP[0].yyyy 64: MAD TEMP[0].x, TEMP[0].zzzz, IMM[0].zzzz, -TEMP[0].wwww 65: MOV TEMP[3].y, TEMP[0].xxxx 66: MOV TEMP[2].yz, TEMP[3].yxyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[2] 69: MOV OUT[2].xyz, TEMP[4].xyzx 70: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %10, %6 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = bitcast float %44 to i32 %47 = bitcast float %45 to i32 %48 = insertelement <4 x i32> undef, i32 %46, i32 0 %49 = insertelement <4 x i32> %48, i32 %47, i32 1 %50 = insertelement <4 x i32> %49, i32 0, i32 2 %51 = bitcast <8 x i32> %30 to <32 x i8> %52 = bitcast <4 x i32> %32 to <16 x i8> %53 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %37, %54 %59 = fmul float %38, %55 %60 = fadd float %58, %59 %61 = fmul float %39, %56 %62 = fadd float %60, %61 %63 = fadd float %62, %57 %64 = bitcast float %44 to i32 %65 = bitcast float %45 to i32 %66 = insertelement <4 x i32> , i32 %64, i32 1 %67 = insertelement <4 x i32> %66, i32 %65, i32 2 %68 = insertelement <4 x i32> %67, i32 0, i32 3 %69 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %68, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = fmul float %37, %70 %75 = fmul float %38, %71 %76 = fadd float %74, %75 %77 = fmul float %39, %72 %78 = fadd float %76, %77 %79 = fadd float %78, %73 %80 = bitcast float %44 to i32 %81 = bitcast float %45 to i32 %82 = insertelement <4 x i32> , i32 %80, i32 1 %83 = insertelement <4 x i32> %82, i32 %81, i32 2 %84 = insertelement <4 x i32> %83, i32 0, i32 3 %85 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %84, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = fmul float %37, %86 %91 = fmul float %38, %87 %92 = fadd float %90, %91 %93 = fmul float %39, %88 %94 = fadd float %92, %93 %95 = fadd float %94, %89 %96 = fmul float %63, %13 %97 = fmul float %79, %17 %98 = fadd float %96, %97 %99 = fmul float %95, %21 %100 = fadd float %98, %99 %101 = fadd float %100, %25 %102 = fmul float %63, %14 %103 = fmul float %79, %18 %104 = fadd float %102, %103 %105 = fmul float %95, %22 %106 = fadd float %104, %105 %107 = fadd float %106, %26 %108 = fmul float %63, %15 %109 = fmul float %79, %19 %110 = fadd float %108, %109 %111 = fmul float %95, %23 %112 = fadd float %110, %111 %113 = fadd float %112, %27 %114 = fmul float %63, %16 %115 = fmul float %79, %20 %116 = fadd float %114, %115 %117 = fmul float %95, %24 %118 = fadd float %116, %117 %119 = fadd float %118, %28 %120 = fdiv float 1.000000e+00, %119 %121 = fmul float %113, %120 %122 = fcmp ogt float %121, 1.000000e+00 %. = select i1 %122, float %119, float %113 %123 = fsub float -0.000000e+00, %107 %124 = fmul float %., 2.000000e+00 %125 = fsub float %124, %119 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %63, float %79, float %95, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %101, float %123, float %125, float %119) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 v_add_i32_e32 v0, s11, v3 ; 4A00060B buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[12:19], s[8:11] ; F0900F00 0043000B v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[8:11] ; F0D00F00 00430E0A v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mac_f32_e32 v1, v0, v4 ; 3E020900 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v15, v5 ; 10000B0F v_mac_f32_e32 v0, v14, v4 ; 3E00090E s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 image_sample_l_o v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[8:11] ; F0D00F00 0043070A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mac_f32_e32 v5, v7, v4 ; 3E0A0907 v_mac_f32_e32 v1, v2, v6 ; 3E020D02 v_mac_f32_e32 v0, v16, v6 ; 3E000D10 v_mac_f32_e32 v5, v9, v6 ; 3E0A0D09 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v0, v17, v0 ; 06000111 v_add_f32_e32 v2, v10, v5 ; 06040B0A s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s8, v0 ; 10060008 v_mul_f32_e32 v4, s9, v0 ; 10080009 v_mul_f32_e32 v5, s10, v0 ; 100A000A v_mul_f32_e32 v6, s11, v0 ; 100C000B exp 15, 33, 0, 0, 0, v1, v0, v2, v13 ; F800021F 0D020001 v_mac_f32_e32 v3, s4, v1 ; 3E060204 v_mac_f32_e32 v4, s5, v1 ; 3E080205 v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 v_mac_f32_e32 v6, s7, v1 ; 3E0C0207 v_mac_f32_e32 v3, s12, v2 ; 3E06040C v_mac_f32_e32 v6, s15, v2 ; 3E0C040F s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s0, v6 ; 06000C00 v_rcp_f32_e32 v1, v0 ; 7E025500 v_mac_f32_e32 v4, s13, v2 ; 3E08040D v_mac_f32_e32 v5, s14, v2 ; 3E0A040E v_add_f32_e32 v2, s18, v5 ; 06040A12 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_cmp_lt_f32_e32 vcc, 1.0, v1 ; 7C0202F2 v_cndmask_b32_e32 v1, v2, v0 ; 00020102 v_add_f32_e32 v2, s16, v3 ; 06040610 v_add_f32_e32 v3, s17, v4 ; 06060811 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 v_mad_f32 v1, 2.0, v1, -v0 ; D2820001 840202F4 exp 15, 12, 0, 0, 0, v2, v3, v1, v0 ; F80000CF 00010302 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[3] DCL CONST[1][0..8] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..5], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, -0.5000, 2.0000} IMM[1] UINT32 {3, 400, 304, 0} IMM[2] UINT32 {320, 12, 36, 16} IMM[3] FLT32 { 1.0000, 3.0000, 0.0000, 0.0000} IMM[4] UINT32 {28, 0, 0, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[3].xxxx, CONST[3].yyyy 2: ADD TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4][25].xyyy 4: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[4][19].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].xyz, TEMP[3], SAMP[1], 2D 11: MOV TEMP[4].xy, TEMP[1].xyyy 12: MOV TEMP[4].w, IMM[0].yyyy 13: TXL TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz 15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww 16: MOV TEMP[1].xy, TEMP[1].xyyy 17: MOV TEMP[1].w, IMM[0].yyyy 18: TXL TEMP[1].x, TEMP[1], SAMP[0], 2D 19: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[2].xyzz 20: RCP TEMP[5].xyz, TEMP[5].xxxx 21: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 22: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[4][19].xyzz 23: ADD TEMP[1].xyz, CONST[1][0].xyzz, -TEMP[1].xyzz 24: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 25: RSQ TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[2].xxxx 27: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 28: SQRT TEMP[1].x, TEMP[1].xxxx 29: ADD TEMP[1].x, CONST[1][0].wwww, -TEMP[1].xxxx 30: FSLT TEMP[5].x, TEMP[1].xxxx, IMM[0].yyyy 31: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 32: KILL_IF -TEMP[5].xxxx 33: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz 34: FSLT TEMP[4].x, TEMP[2].xxxx, IMM[0].yyyy 35: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx 36: KILL_IF -TEMP[4].xxxx 37: RCP TEMP[4].x, CONST[1][0].wwww 38: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 39: RCP TEMP[4].x, CONST[1][2].yyyy 40: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 41: MOV_SAT TEMP[1].x, TEMP[1].xxxx 42: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[1][1].xyzz 43: MUL TEMP[4].x, IMM[0].wwww, TEMP[1].xxxx 44: ADD TEMP[4].x, IMM[3].yyyy, -TEMP[4].xxxx 45: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx 46: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 47: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xxxx 48: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][1].wwww 49: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz 50: MOV TEMP[2].w, IMM[3].xxxx 51: MOV TEMP[2].x, TEMP[1].xxxx 52: MOV TEMP[2].y, TEMP[1].yyyy 53: MOV TEMP[2].z, TEMP[1].zzzz 54: MOV OUT[0], TEMP[2] 55: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 4) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 8) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 12) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 24) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 28) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %38, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %38, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %38, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %38, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %38, i32 400) %46 = call float @llvm.SI.load.const(<16 x i8> %38, i32 404) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %66 = fmul float %24, %15 %67 = fadd float %66, %25 %68 = fadd float %14, 5.000000e-01 %69 = fadd float %67, 5.000000e-01 %70 = fmul float %68, %45 %71 = fmul float %69, %46 %72 = fsub float %63, %39 %73 = fsub float %64, %40 %74 = fsub float %65, %41 %75 = fmul float %72, %72 %76 = fmul float %73, %73 %77 = fadd float %76, %75 %78 = fmul float %74, %74 %79 = fadd float %77, %78 %80 = call float @llvm.AMDGPU.rsq.clamped.f32(float %79) %81 = fmul float %72, %80 %82 = fmul float %73, %80 %83 = fmul float %74, %80 %84 = bitcast float %70 to i32 %85 = bitcast float %71 to i32 %86 = insertelement <4 x i32> undef, i32 %84, i32 0 %87 = insertelement <4 x i32> %86, i32 %85, i32 1 %88 = insertelement <4 x i32> %87, i32 0, i32 2 %89 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %88, <32 x i8> %53, <16 x i8> %56, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = bitcast float %70 to i32 %94 = bitcast float %71 to i32 %95 = insertelement <4 x i32> undef, i32 %93, i32 0 %96 = insertelement <4 x i32> %95, i32 %94, i32 1 %97 = insertelement <4 x i32> %96, i32 0, i32 2 %98 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %97, <32 x i8> %59, <16 x i8> %62, i32 2) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = fadd float %99, -5.000000e-01 %103 = fadd float %100, -5.000000e-01 %104 = fadd float %101, -5.000000e-01 %105 = fmul float %102, 2.000000e+00 %106 = fmul float %103, 2.000000e+00 %107 = fmul float %104, 2.000000e+00 %108 = bitcast float %70 to i32 %109 = bitcast float %71 to i32 %110 = insertelement <4 x i32> undef, i32 %108, i32 0 %111 = insertelement <4 x i32> %110, i32 %109, i32 1 %112 = insertelement <4 x i32> %111, i32 0, i32 2 %113 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %112, <32 x i8> %48, <16 x i8> %50, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = fmul float %42, %81 %116 = fmul float %43, %82 %117 = fadd float %116, %115 %118 = fmul float %44, %83 %119 = fadd float %117, %118 %120 = fdiv float 1.000000e+00, %119 %121 = fmul float %81, %120 %122 = fmul float %82, %120 %123 = fmul float %83, %120 %124 = fmul float %114, %121 %125 = fadd float %124, %39 %126 = fmul float %114, %122 %127 = fadd float %126, %40 %128 = fmul float %114, %123 %129 = fadd float %128, %41 %130 = fsub float %28, %125 %131 = fsub float %29, %127 %132 = fsub float %30, %129 %133 = fmul float %130, %130 %134 = fmul float %131, %131 %135 = fadd float %134, %133 %136 = fmul float %132, %132 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %130, %138 %140 = fmul float %131, %138 %141 = fmul float %132, %138 %142 = fmul float %130, %130 %143 = fmul float %131, %131 %144 = fadd float %143, %142 %145 = fmul float %132, %132 %146 = fadd float %144, %145 %147 = call float @llvm.sqrt.f32(float %146) %148 = fsub float %31, %147 %149 = fcmp olt float %148, 0.000000e+00 %150 = select i1 %149, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %150) %151 = fmul float %105, %139 %152 = fmul float %106, %140 %153 = fadd float %152, %151 %154 = fmul float %107, %141 %155 = fadd float %153, %154 %156 = fcmp olt float %155, 0.000000e+00 %157 = select i1 %156, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %157) %158 = fdiv float 1.000000e+00, %31 %159 = fmul float %148, %158 %160 = fdiv float 1.000000e+00, %36 %161 = fmul float %159, %160 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %155, %32 %164 = fmul float %155, %33 %165 = fmul float %155, %34 %166 = fmul float %162, 2.000000e+00 %167 = fsub float 3.000000e+00, %166 %168 = fmul float %162, %167 %169 = fmul float %162, %168 %170 = fmul float %90, %169 %171 = fmul float %91, %169 %172 = fmul float %92, %169 %173 = fmul float %170, %35 %174 = fmul float %171, %35 %175 = fmul float %172, %35 %176 = fmul float %163, %173 %177 = fmul float %164, %174 %178 = fmul float %165, %175 %179 = call i32 @llvm.SI.packf16(float %176, float %177) %180 = bitcast i32 %179 to float %181 = call i32 @llvm.SI.packf16(float %178, float 1.000000e+00) %182 = bitcast i32 %181 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %180, float %182, float %180, float %182) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C s_buffer_load_dword s10, s[12:15], 0xd ; C2050D0D s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s9, s[16:19], 0x4c ; C204914C s_buffer_load_dword s11, s[16:19], 0x4d ; C205914D s_buffer_load_dword s12, s[16:19], 0x4e ; C206114E s_buffer_load_dword s13, s[16:19], 0x50 ; C2069150 s_buffer_load_dword s14, s[16:19], 0x51 ; C2071151 s_buffer_load_dword s15, s[16:19], 0x52 ; C2079152 s_buffer_load_dword s20, s[16:19], 0x64 ; C20A1164 s_buffer_load_dword s16, s[16:19], 0x65 ; C2081165 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s10 ; 7E02020A v_mac_f32_e32 v1, s8, v3 ; 3E020608 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_mul_f32_e32 v6, s20, v2 ; 100C0414 v_mul_f32_e32 v7, s16, v1 ; 100E0210 v_subrev_f32_e32 v1, s9, v4 ; 0A020809 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_subrev_f32_e32 v2, s11, v5 ; 0A040A0B v_subrev_f32_e32 v0, s12, v0 ; 0A00000C v_mul_f32_e32 v3, v1, v1 ; 10060301 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mov_b32_e32 v8, 0 ; 7E100280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[32:39], s[16:19] ; F0900700 00880906 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[40:47], s[20:23] ; F0900700 00AA0C06 image_sample_l v4, 1, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[24:31], s[4:7] ; F0900100 00260406 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v3, s13, v1 ; 1006020D v_mac_f32_e32 v3, s14, v2 ; 3E06040E v_mac_f32_e32 v3, s15, v0 ; 3E06000F v_rcp_f32_e32 v3, v3 ; 7E065503 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s0, s[0:3], 0x9 ; C2000109 v_mul_f32_e32 v1, v3, v1 ; 10020303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v1, s9 ; D2820001 00260304 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mad_f32 v2, v4, v2, s11 ; D2820002 002E0504 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v0, v4, v0, s12 ; D2820000 00320104 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s4, v1 ; 08020204 v_sub_f32_e32 v2, s5, v2 ; 08040405 v_sub_f32_e32 v0, s6, v0 ; 08000006 v_add_f32_e32 v3, -0.5, v12 ; 060618F1 v_add_f32_e32 v4, -0.5, v13 ; 06081AF1 v_add_f32_e32 v5, -0.5, v14 ; 060A1CF1 v_mul_f32_e32 v6, v1, v1 ; 100C0301 v_mac_f32_e32 v6, v2, v2 ; 3E0C0502 v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v7, v6 ; 7E0E5906 v_add_f32_e32 v3, v3, v3 ; 06060703 v_add_f32_e32 v4, v4, v4 ; 06080904 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_sub_f32_e32 v6, s7, v6 ; 080C0C07 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mac_f32_e32 v1, v0, v5 ; 3E020B00 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_rcp_f32_e32 v0, s7 ; 7E005407 v_rcp_f32_e32 v2, s0 ; 7E045400 v_mul_f32_e32 v3, s8, v1 ; 10060208 v_mul_f32_e32 v4, s10, v1 ; 1008020A v_mul_f32_e32 v1, s13, v1 ; 1002020D v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v2, -2.0, v0, 0x40400000 ; 420400F5 40400000 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mul_f32_e32 v2, v0, v9 ; 10041300 v_mul_f32_e32 v5, v0, v10 ; 100A1500 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_mul_f32_e32 v2, s14, v2 ; 1004040E v_mul_f32_e32 v5, s14, v5 ; 100A0A0E v_mul_f32_e32 v0, s14, v0 ; 1000000E v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mul_f32_e32 v3, v5, v4 ; 10060905 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 16 Code Size: 536 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[1][0..5] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {3, 304, 4, 160} IMM[1] FLT32 { 1.0000, 0.3000, 0.5900, 0.1100} IMM[2] UINT32 {176, 128, 156, 152} IMM[3] FLT32 { 0.5000, 0.2500, 0.0000, 0.0000} IMM[4] UINT32 {448, 468, 464, 460} IMM[5] UINT32 {472, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D 2: MAD TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx, CONST[4][19].xyzz 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D 5: MOV TEMP[2].xy, IN[0].xyyy 6: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 7: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 8: MAD TEMP[2].xy, CONST[5][10].zwww, TEMP[0].xyyy, CONST[5][10].xyyy 9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[4][19].xyzz 10: MOV TEMP[3].w, IMM[1].xxxx 11: DP3 TEMP[4].x, TEMP[1].xyzz, IMM[1].yzww 12: POW TEMP[4].x, TEMP[4].xxxx, CONST[5][9].wwww 13: MAX TEMP[4].xyz, TEMP[4].xxxx, IMM[3].xxxx 14: MUL TEMP[5].xyz, CONST[5][9].zzzz, TEMP[1].xyzz 15: MAD TEMP[4].xyz, CONST[5][8].xyzz, TEMP[4].xyzz, TEMP[5].xyzz 16: ADD TEMP[5].xy, TEMP[2].xyyy, CONST[5][11].xyyy 17: MOV TEMP[5].xy, TEMP[5].xyyy 18: TEX TEMP[5].x, TEMP[5], SAMP[0], 2D 19: ADD TEMP[6].xy, TEMP[2].xyyy, CONST[5][11].zwww 20: MOV TEMP[6].xy, TEMP[6].xyyy 21: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 22: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: ADD TEMP[6].xy, TEMP[2].xyyy, -CONST[5][11].xyyy 24: MOV TEMP[6].xy, TEMP[6].xyyy 25: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 26: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 27: ADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5][11].zwww 28: MOV TEMP[2].xy, TEMP[2].xyyy 29: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D 30: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[2].xxxx 31: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 32: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, TEMP[4].xyzz 33: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz 34: SQRT TEMP[0].x, TEMP[0].xxxx 35: MAD TEMP[0].x, TEMP[0].xxxx, CONST[4][29].xxxx, CONST[4][28].wwww 36: MOV_SAT TEMP[0].x, TEMP[0].xxxx 37: POW TEMP[0].x, TEMP[0].xxxx, CONST[4][29].zzzz 38: MIN TEMP[0].x, CONST[4][29].yyyy, TEMP[0].xxxx 39: MOV_SAT TEMP[0].x, TEMP[0].xxxx 40: LRP TEMP[3].xyz, TEMP[0].xxxx, CONST[4][28].xyzz, TEMP[1].xyzz 41: MOV OUT[0], TEMP[3] 42: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 460) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %34 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 188) %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %76 = bitcast float %71 to i32 %77 = bitcast float %72 to i32 %78 = insertelement <2 x i32> undef, i32 %76, i32 0 %79 = insertelement <2 x i32> %78, i32 %77, i32 1 %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %79, <32 x i8> %55, <16 x i8> %58, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = fmul float %73, %81 %83 = fadd float %82, %24 %84 = fmul float %74, %81 %85 = fadd float %84, %25 %86 = fmul float %75, %81 %87 = fadd float %86, %26 %88 = bitcast float %71 to i32 %89 = bitcast float %72 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %67, <16 x i8> %70, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = bitcast float %71 to i32 %97 = bitcast float %72 to i32 %98 = insertelement <2 x i32> undef, i32 %96, i32 0 %99 = insertelement <2 x i32> %98, i32 %97, i32 1 %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %61, <16 x i8> %64, i32 2) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = fadd float %93, %101 %105 = fadd float %94, %102 %106 = fadd float %95, %103 %107 = fmul float %43, %83 %108 = fadd float %107, %41 %109 = fmul float %44, %85 %110 = fadd float %109, %42 %111 = fsub float %83, %24 %112 = fsub float %85, %25 %113 = fsub float %87, %26 %114 = fmul float %104, 0x3FD3333340000000 %115 = fmul float %105, 0x3FE2E147A0000000 %116 = fadd float %115, %114 %117 = fmul float %106, 0x3FBC28F5C0000000 %118 = fadd float %116, %117 %119 = call float @llvm.pow.f32(float %118, float %40) %120 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %121 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %122 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %123 = fmul float %39, %104 %124 = fmul float %39, %105 %125 = fmul float %39, %106 %126 = fmul float %36, %120 %127 = fadd float %126, %123 %128 = fmul float %37, %121 %129 = fadd float %128, %124 %130 = fmul float %38, %122 %131 = fadd float %130, %125 %132 = fadd float %108, %45 %133 = fadd float %110, %46 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %50 to <32 x i8> %139 = bitcast <4 x i32> %52 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = fadd float %108, %47 %143 = fadd float %110, %48 %144 = bitcast float %142 to i32 %145 = bitcast float %143 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = bitcast <8 x i32> %50 to <32 x i8> %149 = bitcast <4 x i32> %52 to <16 x i8> %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %148, <16 x i8> %149, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = fadd float %141, %151 %153 = fsub float %108, %45 %154 = fsub float %110, %46 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %50 to <32 x i8> %160 = bitcast <4 x i32> %52 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = fadd float %152, %162 %164 = fsub float %108, %47 %165 = fsub float %110, %48 %166 = bitcast float %164 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %50 to <32 x i8> %171 = bitcast <4 x i32> %52 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = fadd float %163, %173 %175 = fmul float %174, 2.500000e-01 %176 = call float @llvm.AMDGPU.lrp(float %175, float %104, float %127) %177 = call float @llvm.AMDGPU.lrp(float %175, float %105, float %129) %178 = call float @llvm.AMDGPU.lrp(float %175, float %106, float %131) %179 = fmul float %111, %111 %180 = fmul float %112, %112 %181 = fadd float %180, %179 %182 = fmul float %113, %113 %183 = fadd float %181, %182 %184 = call float @llvm.sqrt.f32(float %183) %185 = fmul float %184, %31 %186 = fadd float %185, %30 %187 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00) %188 = call float @llvm.pow.f32(float %187, float %33) %189 = call float @llvm.minnum.f32(float %32, float %188) %190 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00) %191 = call float @llvm.AMDGPU.lrp(float %190, float %27, float %176) %192 = call float @llvm.AMDGPU.lrp(float %190, float %28, float %177) %193 = call float @llvm.AMDGPU.lrp(float %190, float %29, float %178) %194 = call i32 @llvm.SI.packf16(float %191, float %192) %195 = bitcast i32 %194 to float %196 = call i32 @llvm.SI.packf16(float %193, float 1.000000e+00) %197 = bitcast i32 %196 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %195, float %197, float %195, float %197) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800100 00660102 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800700 00A80602 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[16:19] ; F0800700 008A0902 s_buffer_load_dword s12, s[8:11], 0x4c ; C206094C s_buffer_load_dword s13, s[0:3], 0x28 ; C2068128 s_buffer_load_dword s14, s[0:3], 0x2a ; C207012A s_buffer_load_dword s15, s[0:3], 0x29 ; C2078129 s_buffer_load_dword s16, s[8:11], 0x4d ; C208094D s_buffer_load_dword s17, s[8:11], 0x4e ; C208894E s_buffer_load_dword s18, s[8:11], 0x70 ; C2090970 s_buffer_load_dword s19, s[8:11], 0x71 ; C2098971 s_buffer_load_dword s20, s[0:3], 0x2b ; C20A012B s_buffer_load_dword s21, s[0:3], 0x2c ; C20A812C s_buffer_load_dword s22, s[0:3], 0x2d ; C20B012D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, v4, v1, s12 ; D2820002 00320304 v_mov_b32_e32 v3, s13 ; 7E06020D v_mac_f32_e32 v3, s14, v2 ; 3E06040E s_buffer_load_dword s13, s[0:3], 0x2e ; C206812E v_mad_f32 v4, v5, v1, s16 ; D2820004 00420305 v_mov_b32_e32 v5, s15 ; 7E0A020F s_buffer_load_dword s14, s[0:3], 0x2f ; C207012F s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mac_f32_e32 v5, s20, v4 ; 3E0A0814 v_add_f32_e32 v12, s21, v3 ; 06180615 v_add_f32_e32 v13, s22, v5 ; 061A0A16 v_subrev_f32_e32 v14, s21, v3 ; 0A1C0615 v_subrev_f32_e32 v15, s22, v5 ; 0A1E0A16 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v16, s13, v3 ; 0620060D v_subrev_f32_e32 v18, s13, v3 ; 0A24060D v_add_f32_e32 v17, s14, v5 ; 06220A0E v_subrev_f32_e32 v19, s14, v5 ; 0A260A0E image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[28:35], s[24:27] ; F0800100 00C7030C image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[28:35], s[24:27] ; F0800100 00C70510 image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[28:35], s[24:27] ; F0800100 00C70C0E image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[28:35], s[24:27] ; F0800100 00C70D12 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B s_buffer_load_dword s4, s[8:11], 0x72 ; C2020972 s_buffer_load_dword s5, s[8:11], 0x73 ; C2028973 s_buffer_load_dword s6, s[8:11], 0x74 ; C2030974 s_buffer_load_dword s7, s[8:11], 0x75 ; C2038975 s_buffer_load_dword s8, s[8:11], 0x76 ; C2040976 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s13, s[0:3], 0x26 ; C2068126 s_buffer_load_dword s0, s[0:3], 0x27 ; C2000127 v_subrev_f32_e32 v2, s12, v2 ; 0A04040C v_subrev_f32_e32 v4, s16, v4 ; 0A080810 v_mad_f32 v0, v0, v1, s17 ; D2820000 00460300 v_subrev_f32_e32 v0, s17, v0 ; 0A000011 v_mul_f32_e32 v1, v2, v2 ; 10020502 v_mac_f32_e32 v1, v4, v4 ; 3E020904 v_mul_f32_e32 v2, 0x3e99999a, v6 ; 10040CFF 3E99999A v_madmk_f32_e32 v2, v7, v2, 0x3f170a3d ; 40040507 3F170A3D v_madmk_f32_e32 v2, v8, v2, 0x3de147ae ; 40040508 3DE147AE v_log_f32_e32 v2, v2 ; 7E044F02 v_mac_f32_e32 v1, v0, v0 ; 3E020100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mul_legacy_f32_e32 v1, s0, v2 ; 0E020400 v_exp_f32_e32 v1, v1 ; 7E024B01 v_max_f32_e32 v1, 0.5, v1 ; 200202F0 v_mul_f32_e32 v2, s13, v6 ; 10040C0D v_mac_f32_e32 v2, s9, v1 ; 3E040209 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v4, s13, v7 ; 10080E0D v_mac_f32_e32 v4, s10, v1 ; 3E08020A v_mul_f32_e32 v9, s13, v8 ; 1012100D v_mac_f32_e32 v9, s11, v1 ; 3E12020B v_mul_legacy_f32_e32 v0, s8, v0 ; 0E000008 v_exp_f32_e32 v0, v0 ; 7E004B00 v_min_f32_e32 v0, s7, v0 ; 1E000007 v_add_f32_e32 v1, v5, v3 ; 06020705 v_add_f32_e32 v1, v12, v1 ; 0602030C v_add_f32_e32 v1, v13, v1 ; 0602030D v_mov_b32_e32 v3, 0x3e800000 ; 7E0602FF 3E800000 v_mul_f32_e32 v5, v3, v1 ; 100A0303 v_mad_f32 v1, -v1, v3, 1.0 ; D2820001 23CA0701 v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v6, v5 ; 3E040B06 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v3, 1.0, v0 ; 080600F2 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mac_f32_e32 v2, s18, v0 ; 3E040012 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v7, v5 ; 3E080B07 v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mac_f32_e32 v4, s19, v0 ; 3E080013 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v1, v8, v5 ; 3E020B08 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, s4, v0 ; 3E020004 v_cvt_pkrtz_f16_f32_e32 v0, v2, v4 ; 5E000902 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 576 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..4] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 64, 56, 3} IMM[1] UINT32 {348, 16, 4, 160} IMM[2] UINT32 {128, 156, 152, 0} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 0.5000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yw, TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].xy, CONST[1][4].xxxx, TEMP[0].yyyy 3: MUL TEMP[2].xy, CONST[1][3].zwww, CONST[4][21].wwww 4: MAD TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, IN[0].xyyy 5: MOV TEMP[1].xy, TEMP[1].xyyy 6: TEX TEMP[1], TEMP[1], SAMP[2], 2D 7: MOV TEMP[2].w, TEMP[1].wwww 8: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 9: MUL TEMP[1].xyz, TEMP[2], IN[2] 10: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[3].xyzz 11: POW TEMP[2].x, TEMP[2].xxxx, CONST[5][9].wwww 12: MAX TEMP[2].xyz, TEMP[2].xxxx, IMM[3].wwww 13: MUL TEMP[3].xyz, CONST[5][9].zzzz, TEMP[1].xyzz 14: MAD TEMP[2].xyz, CONST[5][8].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 15: MAD TEMP[3].xy, IN[1].xyyy, CONST[5][10].zwww, CONST[5][10].xyyy 16: MOV TEMP[3].xy, TEMP[3].xyyy 17: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 18: LRP TEMP[1].xyz, TEMP[3].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 19: MOV TEMP[2].x, TEMP[1].xxxx 20: MOV TEMP[2].y, TEMP[1].yyyy 21: MOV TEMP[2].z, TEMP[1].zzzz 22: MUL TEMP[0].x, IN[2].wwww, TEMP[0].wwww 23: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][0].yyyy 24: MOV TEMP[2].w, TEMP[0].xxxx 25: MOV OUT[1], IN[1].wwww 26: MOV OUT[0], TEMP[2] 27: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 348) %34 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %70 = bitcast float %61 to i32 %71 = bitcast float %62 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %51, <16 x i8> %54, i32 2) %75 = extractelement <4 x float> %74, i32 1 %76 = extractelement <4 x float> %74, i32 3 %77 = fmul float %30, %75 %78 = fmul float %30, %75 %79 = fmul float %28, %33 %80 = fmul float %29, %33 %81 = fmul float %77, %79 %82 = fadd float %81, %61 %83 = fmul float %78, %80 %84 = fadd float %83, %62 %85 = bitcast float %82 to i32 %86 = bitcast float %84 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %57, <16 x i8> %60, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = fmul float %25, %90 %94 = fmul float %26, %91 %95 = fmul float %27, %92 %96 = fmul float %93, %66 %97 = fmul float %94, %67 %98 = fmul float %95, %68 %99 = fmul float %96, 0x3FD3333340000000 %100 = fmul float %97, 0x3FE2E147A0000000 %101 = fadd float %100, %99 %102 = fmul float %98, 0x3FBC28F5C0000000 %103 = fadd float %101, %102 %104 = call float @llvm.pow.f32(float %103, float %40) %105 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %106 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %107 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %108 = fmul float %39, %96 %109 = fmul float %39, %97 %110 = fmul float %39, %98 %111 = fmul float %36, %105 %112 = fadd float %111, %108 %113 = fmul float %37, %106 %114 = fadd float %113, %109 %115 = fmul float %38, %107 %116 = fadd float %115, %110 %117 = fmul float %63, %43 %118 = fadd float %117, %41 %119 = fmul float %64, %44 %120 = fadd float %119, %42 %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = insertelement <2 x i32> undef, i32 %121, i32 0 %124 = insertelement <2 x i32> %123, i32 %122, i32 1 %125 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %124, <32 x i8> %46, <16 x i8> %48, i32 2) %126 = extractelement <4 x float> %125, i32 0 %127 = call float @llvm.AMDGPU.lrp(float %126, float %96, float %112) %128 = call float @llvm.AMDGPU.lrp(float %126, float %97, float %114) %129 = call float @llvm.AMDGPU.lrp(float %126, float %98, float %116) %130 = fmul float %69, %76 %131 = fmul float %130, %24 %132 = call i32 @llvm.SI.packf16(float %127, float %128) %133 = bitcast i32 %132 to float %134 = call i32 @llvm.SI.packf16(float %129, float %131) %135 = bitcast i32 %134 to float %136 = call i32 @llvm.SI.packf16(float %65, float %65) %137 = bitcast i32 %136 to float %138 = call i32 @llvm.SI.packf16(float %65, float %65) %139 = bitcast i32 %138 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %133, float %135, float %133, float %135) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %137, float %139, float %137, float %139) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[12:15], s[2:3], 0x14 ; C0860314 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[16:19], 0x57 ; C2041157 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 s_buffer_load_dword s9, s[12:15], 0x28 ; C2048D28 s_buffer_load_dword s10, s[12:15], 0x2a ; C2050D2A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s11, s[0:3], 0xe ; C205810E v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 image_sample v[10:11], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800A00 00C70A02 s_buffer_load_dword s24, s[12:15], 0x29 ; C20C0D29 s_buffer_load_dword s25, s[0:3], 0xf ; C20C810F s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110 s_buffer_load_dword s27, s[12:15], 0x2b ; C20D8D2B v_mov_b32_e32 v12, s9 ; 7E180209 v_mac_f32_e32 v12, s10, v4 ; 3E18080A v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mov_b32_e32 v4, s8 ; 7E080208 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mul_f32_e32 v10, s26, v10 ; 1014141A v_mac_f32_e32 v2, v1, v10 ; 3E041501 v_mac_f32_e32 v3, v4, v10 ; 3E061504 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[36:39] ; F0800700 01240102 v_mov_b32_e32 v13, s24 ; 7E1A0218 v_mac_f32_e32 v13, s27, v5 ; 3E1A0A1B s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[28:35], s[8:11] ; F0800100 0047040C s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v3, s6, v3 ; 10060606 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v2, v8, v2 ; 10040508 s_buffer_load_dword s4, s[12:15], 0x27 ; C2020D27 v_mul_f32_e32 v3, v9, v3 ; 10060709 s_buffer_load_dword s5, s[12:15], 0x26 ; C2028D26 v_mul_f32_e32 v5, 0x3e99999a, v1 ; 100A02FF 3E99999A v_madmk_f32_e32 v5, v2, v5, 0x3f170a3d ; 400A0B02 3F170A3D v_madmk_f32_e32 v5, v3, v5, 0x3de147ae ; 400A0B03 3DE147AE v_log_f32_e32 v5, v5 ; 7E0A4F05 s_buffer_load_dword s6, s[12:15], 0x20 ; C2030D20 s_buffer_load_dword s7, s[12:15], 0x21 ; C2038D21 s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_legacy_f32_e32 v5, s4, v5 ; 0E0A0A04 v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mul_f32_e32 v8, s5, v2 ; 10100405 v_mul_f32_e32 v9, s5, v3 ; 10120605 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_max_f32_e32 v5, 0.5, v5 ; 200A0AF0 v_mac_f32_e32 v7, s6, v5 ; 3E0E0A06 v_mac_f32_e32 v8, s7, v5 ; 3E100A07 v_mac_f32_e32 v9, s8, v5 ; 3E120A08 v_mul_f32_e32 v0, v11, v0 ; 1000010B v_mul_f32_e32 v0, s0, v0 ; 10000000 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v7, v7, v5 ; 100E0B07 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mul_f32_e32 v1, v8, v5 ; 10020B08 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mul_f32_e32 v2, v9, v5 ; 10040B09 v_mac_f32_e32 v2, v3, v4 ; 3E040903 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v6, v6 ; 5E000D06 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..47], LOCAL IMM[0] FLT32 { 0.0000, 1.9632, 0.0417, 0.1250} IMM[1] INT32 {0, -1, 1, 15} IMM[2] FLT32 { 0.7500, 0.2500, -0.2500, 1.3333} IMM[3] FLT32 { 0.1111, -0.5000, 0.5000, 1.0000} IMM[4] UINT32 {0, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0], TEMP[0], SAMP[0], 2D, IMM[1].xyx 3: MOV TEMP[1], TEMP[0] 4: MOV TEMP[2].xy, IN[0].xyyy 5: MOV TEMP[2].w, IMM[0].xxxx 6: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].yxy 7: MOV TEMP[3], TEMP[2] 8: MOV TEMP[4].xy, IN[0].xyyy 9: MOV TEMP[4].w, IMM[0].xxxx 10: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].xxx 11: MOV TEMP[5], TEMP[4] 12: MOV TEMP[1].xy, IN[0].xyyy 13: MOV TEMP[1].w, IMM[0].xxxx 14: TXL TEMP[1], TEMP[1], SAMP[0], 2D, IMM[1].zxz 15: MOV TEMP[6], TEMP[1] 16: MOV TEMP[7].xy, IN[0].xyyy 17: MOV TEMP[7].w, IMM[0].xxxx 18: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[1].xzx 19: MOV TEMP[8], TEMP[7] 20: MAD TEMP[9].x, TEMP[0].yyyy, IMM[0].yyyy, TEMP[0].xxxx 21: MAD TEMP[3].x, IMM[0].yyyy, TEMP[2].yyyy, TEMP[2].xxxx 22: MAD TEMP[10].x, IMM[0].yyyy, TEMP[4].yyyy, TEMP[4].xxxx 23: MAD TEMP[11].x, IMM[0].yyyy, TEMP[1].yyyy, TEMP[1].xxxx 24: MAD TEMP[5].x, IMM[0].yyyy, TEMP[7].yyyy, TEMP[7].xxxx 25: MAX TEMP[12].x, TEMP[10].xxxx, TEMP[9].xxxx 26: MAX TEMP[13].x, TEMP[3].xxxx, TEMP[5].xxxx 27: MAX TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx 28: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 29: MIN TEMP[13].x, TEMP[10].xxxx, TEMP[9].xxxx 30: MIN TEMP[6].x, TEMP[3].xxxx, TEMP[5].xxxx 31: MIN TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx 32: MIN TEMP[13].x, TEMP[13].xxxx, TEMP[6].xxxx 33: ADD TEMP[13].x, TEMP[12].xxxx, -TEMP[13].xxxx 34: MUL TEMP[12].x, TEMP[12].xxxx, IMM[0].wwww 35: MAX TEMP[12].x, IMM[0].zzzz, TEMP[12].xxxx 36: FSLT TEMP[12].x, TEMP[13].xxxx, TEMP[12].xxxx 37: UIF TEMP[12].xxxx :0 38: MOV TEMP[12].xyz, TEMP[4].xyzx 39: ELSE :0 40: ADD TEMP[6].x, TEMP[5].xxxx, TEMP[9].xxxx 41: ADD TEMP[14].x, TEMP[3].xxxx, TEMP[11].xxxx 42: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[14].xxxx 43: MAD TEMP[6].x, TEMP[6].xxxx, IMM[2].yyyy, -TEMP[10].xxxx 44: ABS TEMP[6].x, TEMP[6].xxxx 45: RCP TEMP[13].x, TEMP[13].xxxx 46: MAD TEMP[13].x, TEMP[6].xxxx, TEMP[13].xxxx, IMM[2].zzzz 47: MAX TEMP[13].x, IMM[0].xxxx, TEMP[13].xxxx 48: MUL TEMP[13].x, TEMP[13].xxxx, IMM[2].wwww 49: MIN TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 50: MOV TEMP[6].xy, IN[0].xyyy 51: MOV TEMP[6].w, IMM[0].xxxx 52: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[1].yyy 53: MOV TEMP[15], TEMP[6] 54: MOV TEMP[14].xy, IN[0].xyyy 55: MOV TEMP[14].w, IMM[0].xxxx 56: TXL TEMP[14], TEMP[14], SAMP[0], 2D, IMM[1].zyz 57: MOV TEMP[16], TEMP[14] 58: MOV TEMP[17].xy, IN[0].xyyy 59: MOV TEMP[17].w, IMM[0].xxxx 60: TXL TEMP[17], TEMP[17], SAMP[0], 2D, IMM[1].yzy 61: MOV TEMP[18], TEMP[17] 62: MOV TEMP[8].xy, IN[0].xyyy 63: MOV TEMP[8].w, IMM[0].xxxx 64: TXL TEMP[8], TEMP[8], SAMP[0], 2D, IMM[1].zzz 65: MOV TEMP[19], TEMP[8] 66: ADD TEMP[20].xyz, TEMP[14].xyzz, TEMP[6].xyzz 67: ADD TEMP[21].xyz, TEMP[17].xyzz, TEMP[8].xyzz 68: ADD TEMP[20].xyz, TEMP[20].xyzz, TEMP[21].xyzz 69: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz 70: ADD TEMP[2].xyz, TEMP[4].xyzz, TEMP[1].xyzz 71: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xyzz 72: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz 73: ADD TEMP[0].xyz, TEMP[20].xyzz, TEMP[0].xyzz 74: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[3].xxxx 75: MAD TEMP[2].x, IMM[0].yyyy, TEMP[6].yyyy, TEMP[6].xxxx 76: MUL TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 77: MAD TEMP[4].x, IMM[0].yyyy, TEMP[14].yyyy, TEMP[14].xxxx 78: MUL TEMP[4].x, IMM[2].yyyy, TEMP[4].xxxx 79: MOV TEMP[1].x, -TEMP[10].xxxx 80: MAD TEMP[7].x, IMM[0].yyyy, TEMP[17].yyyy, TEMP[17].xxxx 81: MUL TEMP[7].x, IMM[2].yyyy, TEMP[7].xxxx 82: MAD TEMP[6].x, IMM[0].yyyy, TEMP[8].yyyy, TEMP[8].xxxx 83: MUL TEMP[6].x, IMM[2].yyyy, TEMP[6].xxxx 84: MAD TEMP[14].x, IMM[3].yyyy, TEMP[3].xxxx, TEMP[2].xxxx 85: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[7].xxxx 86: ABS TEMP[14].x, TEMP[14].xxxx 87: MAD TEMP[17].x, IMM[3].zzzz, TEMP[9].xxxx, TEMP[1].xxxx 88: MAD TEMP[17].x, IMM[3].zzzz, TEMP[5].xxxx, TEMP[17].xxxx 89: ABS TEMP[17].x, TEMP[17].xxxx 90: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[17].xxxx 91: MAD TEMP[17].x, IMM[3].yyyy, TEMP[11].xxxx, TEMP[4].xxxx 92: ADD TEMP[17].x, TEMP[6].xxxx, TEMP[17].xxxx 93: ABS TEMP[17].x, TEMP[17].xxxx 94: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[17].xxxx 95: MAD TEMP[2].x, TEMP[9].xxxx, IMM[3].yyyy, TEMP[2].xxxx 96: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 97: ABS TEMP[2].x, TEMP[2].xxxx 98: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].zzzz, TEMP[1].xxxx 99: MAD TEMP[4].x, IMM[3].zzzz, TEMP[11].xxxx, TEMP[4].xxxx 100: ABS TEMP[4].x, TEMP[4].xxxx 101: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 102: MAD TEMP[4].x, IMM[3].yyyy, TEMP[5].xxxx, TEMP[7].xxxx 103: ADD TEMP[4].x, TEMP[6].xxxx, TEMP[4].xxxx 104: ABS TEMP[4].x, TEMP[4].xxxx 105: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 106: FSGE TEMP[2].x, TEMP[14].xxxx, TEMP[2].xxxx 107: UIF TEMP[2].xxxx :0 108: MOV TEMP[22], CONST[1][0] 109: MOV TEMP[4].x, CONST[1][0].yyyy 110: ELSE :0 111: MOV TEMP[23], CONST[1][0] 112: MOV TEMP[4].x, CONST[1][0].xxxx 113: ENDIF 114: MOV TEMP[1].x, -TEMP[4].xxxx 115: UIF TEMP[2].xxxx :0 116: MOV TEMP[7].x, TEMP[9].xxxx 117: ELSE :0 118: MOV TEMP[7].x, TEMP[3].xxxx 119: ENDIF 120: UIF TEMP[2].xxxx :0 121: MOV TEMP[9].x, TEMP[5].xxxx 122: ELSE :0 123: MOV TEMP[9].x, TEMP[11].xxxx 124: ENDIF 125: ADD TEMP[3].x, TEMP[7].xxxx, -TEMP[10].xxxx 126: ABS TEMP[3].x, TEMP[3].xxxx 127: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[10].xxxx 128: ABS TEMP[11].x, TEMP[11].xxxx 129: FSGE TEMP[5].x, TEMP[3].xxxx, TEMP[11].xxxx 130: UIF TEMP[5].xxxx :0 131: MOV TEMP[7].x, TEMP[7].xxxx 132: ELSE :0 133: MOV TEMP[7].x, TEMP[9].xxxx 134: ENDIF 135: ADD TEMP[7].x, TEMP[10].xxxx, TEMP[7].xxxx 136: MUL TEMP[7].x, IMM[3].zzzz, TEMP[7].xxxx 137: MOV TEMP[9].x, TEMP[7].xxxx 138: UIF TEMP[5].xxxx :0 139: MOV TEMP[3].x, TEMP[3].xxxx 140: ELSE :0 141: MOV TEMP[3].x, TEMP[11].xxxx 142: ENDIF 143: UIF TEMP[5].xxxx :0 144: MOV TEMP[1].x, TEMP[1].xxxx 145: ELSE :0 146: MOV TEMP[1].x, TEMP[4].xxxx 147: ENDIF 148: MUL TEMP[4].x, IMM[3].zzzz, TEMP[1].xxxx 149: UIF TEMP[2].xxxx :0 150: MOV TEMP[11].x, IMM[0].xxxx 151: ELSE :0 152: MOV TEMP[11].x, TEMP[4].xxxx 153: ENDIF 154: ADD TEMP[11].x, IN[0].xxxx, TEMP[11].xxxx 155: UIF TEMP[2].xxxx :0 156: MOV TEMP[4].x, TEMP[4].xxxx 157: ELSE :0 158: MOV TEMP[4].x, IMM[0].xxxx 159: ENDIF 160: MOV TEMP[11].x, TEMP[11].xxxx 161: ADD TEMP[4].x, IN[0].yyyy, TEMP[4].xxxx 162: MOV TEMP[11].y, TEMP[4].xxxx 163: MUL TEMP[4].x, IMM[2].yyyy, TEMP[3].xxxx 164: MOV TEMP[3].y, IMM[0].xxxx 165: MOV TEMP[24], CONST[1][0] 166: MOV TEMP[3].x, CONST[1][0].xxxx 167: MOV TEMP[5].x, IMM[0].xxxx 168: MOV TEMP[5].y, CONST[1][0].yyyy 169: UIF TEMP[2].xxxx :0 170: MOV TEMP[3].xy, TEMP[3].xyxx 171: ELSE :0 172: MOV TEMP[3].xy, TEMP[5].xyxx 173: ENDIF 174: MOV TEMP[5].xy, TEMP[3].xyxx 175: MOV TEMP[6].x, IMM[1].xxxx 176: MOV TEMP[14].x, IMM[4].xxxx 177: MOV TEMP[17].x, IMM[4].xxxx 178: MOV TEMP[8].x, TEMP[7].xxxx 179: MOV TEMP[20].x, TEMP[7].xxxx 180: ADD TEMP[21].xy, TEMP[11].xyyy, TEMP[3].xyyy 181: ADD TEMP[3].xy, TEMP[11].xyyy, -TEMP[3].xyyy 182: BGNLOOP :0 183: ISLT TEMP[11].x, IMM[1].wwww, TEMP[6].xxxx 184: UIF TEMP[11].xxxx :0 185: BRK 186: ENDIF 187: MOV TEMP[25].x, TEMP[20].xxxx 188: NOT TEMP[26].x, TEMP[17].xxxx 189: UIF TEMP[26].xxxx :0 190: MOV TEMP[27].xy, TEMP[3].xyyy 191: MOV TEMP[27].w, IMM[0].xxxx 192: TXL TEMP[28], TEMP[27], SAMP[0], 2D 193: MOV TEMP[29], TEMP[28] 194: MAD TEMP[25].x, IMM[0].yyyy, TEMP[28].yyyy, TEMP[28].xxxx 195: ENDIF 196: MOV TEMP[30].x, TEMP[8].xxxx 197: NOT TEMP[31].x, TEMP[14].xxxx 198: UIF TEMP[31].xxxx :0 199: MOV TEMP[32].xy, TEMP[21].xyyy 200: MOV TEMP[32].w, IMM[0].xxxx 201: TXL TEMP[33], TEMP[32], SAMP[0], 2D 202: MOV TEMP[34], TEMP[33] 203: MAD TEMP[30].x, IMM[0].yyyy, TEMP[33].yyyy, TEMP[33].xxxx 204: ENDIF 205: ADD TEMP[35].x, TEMP[25].xxxx, -TEMP[9].xxxx 206: ABS TEMP[36].x, TEMP[35].xxxx 207: FSGE TEMP[37].x, TEMP[36].xxxx, TEMP[4].xxxx 208: OR TEMP[38].x, TEMP[17].xxxx, TEMP[37].xxxx 209: ADD TEMP[39].x, TEMP[30].xxxx, -TEMP[9].xxxx 210: ABS TEMP[40].x, TEMP[39].xxxx 211: FSGE TEMP[41].x, TEMP[40].xxxx, TEMP[4].xxxx 212: OR TEMP[42].x, TEMP[14].xxxx, TEMP[41].xxxx 213: AND TEMP[43].x, TEMP[38].xxxx, TEMP[42].xxxx 214: UIF TEMP[43].xxxx :0 215: BRK 216: ENDIF 217: ADD TEMP[44].xy, TEMP[3].xyyy, -TEMP[5].xyyy 218: UIF TEMP[38].xxxx :0 219: MOV TEMP[45].xy, TEMP[3].xyxx 220: ELSE :0 221: MOV TEMP[45].xy, TEMP[44].xyxx 222: ENDIF 223: ADD TEMP[46].xy, TEMP[21].xyyy, TEMP[5].xyyy 224: UIF TEMP[42].xxxx :0 225: MOV TEMP[47].xy, TEMP[21].xyxx 226: ELSE :0 227: MOV TEMP[47].xy, TEMP[46].xyxx 228: ENDIF 229: UADD TEMP[6].x, TEMP[6].xxxx, IMM[1].zzzz 230: MOV TEMP[14].x, TEMP[42].xxxx 231: MOV TEMP[17].x, TEMP[38].xxxx 232: MOV TEMP[8].x, TEMP[30].xxxx 233: MOV TEMP[20].x, TEMP[25].xxxx 234: MOV TEMP[21].xy, TEMP[47].xyxx 235: MOV TEMP[3].xy, TEMP[45].xyxx 236: ENDLOOP :0 237: ADD TEMP[4].x, IN[0].xxxx, -TEMP[3].xxxx 238: ADD TEMP[9].x, IN[0].yyyy, -TEMP[3].yyyy 239: UIF TEMP[2].xxxx :0 240: MOV TEMP[4].x, TEMP[4].xxxx 241: ELSE :0 242: MOV TEMP[4].x, TEMP[9].xxxx 243: ENDIF 244: ADD TEMP[9].x, TEMP[21].xxxx, -IN[0].xxxx 245: ADD TEMP[3].x, TEMP[21].yyyy, -IN[0].yyyy 246: UIF TEMP[2].xxxx :0 247: MOV TEMP[9].x, TEMP[9].xxxx 248: ELSE :0 249: MOV TEMP[9].x, TEMP[3].xxxx 250: ENDIF 251: FSLT TEMP[3].x, TEMP[4].xxxx, TEMP[9].xxxx 252: UIF TEMP[3].xxxx :0 253: MOV TEMP[11].x, TEMP[20].xxxx 254: ELSE :0 255: MOV TEMP[11].x, TEMP[8].xxxx 256: ENDIF 257: FSLT TEMP[10].x, TEMP[10].xxxx, TEMP[7].xxxx 258: FSLT TEMP[7].x, TEMP[11].xxxx, TEMP[7].xxxx 259: XOR TEMP[7].x, TEMP[10].xxxx, TEMP[7].xxxx 260: UIF TEMP[7].xxxx :0 261: MOV TEMP[1].x, TEMP[1].xxxx 262: ELSE :0 263: MOV TEMP[1].x, IMM[0].xxxx 264: ENDIF 265: ADD TEMP[7].x, TEMP[4].xxxx, TEMP[9].xxxx 266: UIF TEMP[3].xxxx :0 267: MOV TEMP[4].x, TEMP[4].xxxx 268: ELSE :0 269: MOV TEMP[4].x, TEMP[9].xxxx 270: ENDIF 271: RCP TEMP[7].x, TEMP[7].xxxx 272: MAD TEMP[4].x, -TEMP[7].xxxx, TEMP[4].xxxx, IMM[3].zzzz 273: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 274: UIF TEMP[2].xxxx :0 275: MOV TEMP[1].x, IMM[0].xxxx 276: ELSE :0 277: MOV TEMP[1].x, TEMP[4].xxxx 278: ENDIF 279: ADD TEMP[1].x, IN[0].xxxx, TEMP[1].xxxx 280: UIF TEMP[2].xxxx :0 281: MOV TEMP[2].x, TEMP[4].xxxx 282: ELSE :0 283: MOV TEMP[2].x, IMM[0].xxxx 284: ENDIF 285: MOV TEMP[4].x, TEMP[1].xxxx 286: ADD TEMP[2].x, IN[0].yyyy, TEMP[2].xxxx 287: MOV TEMP[4].y, TEMP[2].xxxx 288: MOV TEMP[2].xy, TEMP[4].xyyy 289: MOV TEMP[2].w, IMM[0].xxxx 290: TXL TEMP[2].xyz, TEMP[2], SAMP[0], 2D 291: MAD TEMP[0].xyz, TEMP[13].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 292: MAD TEMP[12].xyz, -TEMP[13].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 293: ENDIF 294: MOV TEMP[0].w, IMM[3].wwww 295: MOV TEMP[0].x, TEMP[12].xxxx 296: MOV TEMP[0].y, TEMP[12].yyyy 297: MOV TEMP[0].z, TEMP[12].zzzz 298: MOV OUT[0], TEMP[0] 299: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <4 x i32> , i32 %32, i32 1 %35 = insertelement <4 x i32> %34, i32 %33, i32 2 %36 = insertelement <4 x i32> %35, i32 0, i32 3 %37 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %36, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = bitcast float %30 to i32 %41 = bitcast float %31 to i32 %42 = insertelement <4 x i32> , i32 %40, i32 1 %43 = insertelement <4 x i32> %42, i32 %41, i32 2 %44 = insertelement <4 x i32> %43, i32 0, i32 3 %45 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %44, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = bitcast float %30 to i32 %49 = bitcast float %31 to i32 %50 = insertelement <4 x i32> , i32 %48, i32 1 %51 = insertelement <4 x i32> %50, i32 %49, i32 2 %52 = insertelement <4 x i32> %51, i32 0, i32 3 %53 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %52, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = bitcast float %30 to i32 %58 = bitcast float %31 to i32 %59 = insertelement <4 x i32> , i32 %57, i32 1 %60 = insertelement <4 x i32> %59, i32 %58, i32 2 %61 = insertelement <4 x i32> %60, i32 0, i32 3 %62 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %61, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = bitcast float %30 to i32 %66 = bitcast float %31 to i32 %67 = insertelement <4 x i32> , i32 %65, i32 1 %68 = insertelement <4 x i32> %67, i32 %66, i32 2 %69 = insertelement <4 x i32> %68, i32 0, i32 3 %70 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %69, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = fmul float %39, 0x3FFF694EE0000000 %74 = fadd float %73, %38 %75 = fmul float %47, 0x3FFF694EE0000000 %76 = fadd float %75, %46 %77 = fmul float %55, 0x3FFF694EE0000000 %78 = fadd float %77, %54 %79 = fmul float %64, 0x3FFF694EE0000000 %80 = fadd float %79, %63 %81 = fmul float %72, 0x3FFF694EE0000000 %82 = fadd float %81, %71 %83 = call float @llvm.maxnum.f32(float %78, float %74) %84 = call float @llvm.maxnum.f32(float %76, float %82) %85 = call float @llvm.maxnum.f32(float %84, float %80) %86 = call float @llvm.maxnum.f32(float %83, float %85) %87 = call float @llvm.minnum.f32(float %78, float %74) %88 = call float @llvm.minnum.f32(float %76, float %82) %89 = call float @llvm.minnum.f32(float %88, float %80) %90 = call float @llvm.minnum.f32(float %87, float %89) %91 = fsub float %86, %90 %92 = fmul float %86, 1.250000e-01 %93 = call float @llvm.maxnum.f32(float %92, float 0x3FA5555680000000) %94 = fcmp olt float %91, %93 br i1 %94, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %95 = extractelement <4 x float> %70, i32 2 %96 = extractelement <4 x float> %62, i32 2 %97 = extractelement <4 x float> %45, i32 2 %98 = extractelement <4 x float> %37, i32 2 %99 = fadd float %82, %74 %100 = fadd float %76, %80 %101 = fadd float %99, %100 %102 = fmul float %101, 2.500000e-01 %103 = fsub float %102, %78 %104 = call float @fabs(float %103) %105 = fdiv float 1.000000e+00, %91 %106 = fmul float %104, %105 %107 = fadd float %106, -2.500000e-01 %108 = call float @llvm.maxnum.f32(float %107, float 0.000000e+00) %109 = fmul float %108, 0x3FF55551E0000000 %110 = call float @llvm.minnum.f32(float %109, float 7.500000e-01) %111 = bitcast float %30 to i32 %112 = bitcast float %31 to i32 %113 = insertelement <4 x i32> , i32 %111, i32 1 %114 = insertelement <4 x i32> %113, i32 %112, i32 2 %115 = insertelement <4 x i32> %114, i32 0, i32 3 %116 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %115, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = bitcast float %30 to i32 %121 = bitcast float %31 to i32 %122 = insertelement <4 x i32> , i32 %120, i32 1 %123 = insertelement <4 x i32> %122, i32 %121, i32 2 %124 = insertelement <4 x i32> %123, i32 0, i32 3 %125 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %124, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %126 = extractelement <4 x float> %125, i32 0 %127 = extractelement <4 x float> %125, i32 1 %128 = extractelement <4 x float> %125, i32 2 %129 = bitcast float %30 to i32 %130 = bitcast float %31 to i32 %131 = insertelement <4 x i32> , i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 %130, i32 2 %133 = insertelement <4 x i32> %132, i32 0, i32 3 %134 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %133, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = bitcast float %30 to i32 %139 = bitcast float %31 to i32 %140 = insertelement <4 x i32> , i32 %138, i32 1 %141 = insertelement <4 x i32> %140, i32 %139, i32 2 %142 = insertelement <4 x i32> %141, i32 0, i32 3 %143 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %142, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = fadd float %126, %117 %148 = fadd float %127, %118 %149 = fadd float %128, %119 %150 = fadd float %135, %144 %151 = fadd float %136, %145 %152 = fadd float %137, %146 %153 = fadd float %147, %150 %154 = fadd float %148, %151 %155 = fadd float %149, %152 %156 = fadd float %38, %46 %157 = fadd float %39, %47 %158 = fadd float %98, %97 %159 = fadd float %54, %63 %160 = fadd float %55, %64 %161 = fadd float %56, %96 %162 = fadd float %159, %71 %163 = fadd float %160, %72 %164 = fadd float %161, %95 %165 = fadd float %156, %162 %166 = fadd float %157, %163 %167 = fadd float %158, %164 %168 = fadd float %153, %165 %169 = fadd float %154, %166 %170 = fadd float %155, %167 %171 = fmul float %168, 0x3FBC71C540000000 %172 = fmul float %169, 0x3FBC71C540000000 %173 = fmul float %170, 0x3FBC71C540000000 %174 = fmul float %118, 0x3FFF694EE0000000 %175 = fadd float %174, %117 %176 = fmul float %175, 2.500000e-01 %177 = fmul float %127, 0x3FFF694EE0000000 %178 = fadd float %177, %126 %179 = fmul float %178, 2.500000e-01 %180 = fmul float %136, 0x3FFF694EE0000000 %181 = fadd float %180, %135 %182 = fmul float %181, 2.500000e-01 %183 = fmul float %145, 0x3FFF694EE0000000 %184 = fadd float %183, %144 %185 = fmul float %184, 2.500000e-01 %186 = fmul float %76, -5.000000e-01 %187 = fadd float %186, %176 %188 = fadd float %187, %182 %189 = call float @fabs(float %188) %190 = fmul float %74, 5.000000e-01 %191 = fsub float %190, %78 %192 = fmul float %82, 5.000000e-01 %193 = fadd float %192, %191 %194 = call float @fabs(float %193) %195 = fadd float %189, %194 %196 = fmul float %80, -5.000000e-01 %197 = fadd float %196, %179 %198 = fadd float %185, %197 %199 = call float @fabs(float %198) %200 = fadd float %195, %199 %201 = fmul float %74, -5.000000e-01 %202 = fadd float %201, %176 %203 = fadd float %202, %179 %204 = call float @fabs(float %203) %205 = fmul float %76, 5.000000e-01 %206 = fsub float %205, %78 %207 = fmul float %80, 5.000000e-01 %208 = fadd float %207, %206 %209 = call float @fabs(float %208) %210 = fadd float %204, %209 %211 = fmul float %82, -5.000000e-01 %212 = fadd float %211, %182 %213 = fadd float %185, %212 %214 = call float @fabs(float %213) %215 = fadd float %210, %214 %216 = fcmp oge float %200, %215 %. = select i1 %216, float %25, float %24 %217 = fsub float -0.000000e+00, %. %temp28.0 = select i1 %216, float %74, float %76 %.270 = select i1 %216, float %82, float %80 %218 = fsub float %temp28.0, %78 %219 = call float @fabs(float %218) %220 = fsub float %.270, %78 %221 = call float @fabs(float %220) %222 = fcmp oge float %219, %221 %temp28.1 = select i1 %222, float %temp28.0, float %.270 %223 = fadd float %78, %temp28.1 %224 = fmul float %223, 5.000000e-01 %.271 = select i1 %222, float %219, float %221 %temp4.0 = select i1 %222, float %217, float %. %225 = fmul float %temp4.0, 5.000000e-01 %.272 = select i1 %216, float 0.000000e+00, float %225 %226 = fadd float %30, %.272 %temp16.1 = select i1 %216, float %225, float 0.000000e+00 %227 = fadd float %31, %temp16.1 %228 = fmul float %.271, 2.500000e-01 %.273 = select i1 %216, float 0.000000e+00, float %25 %.274 = select i1 %216, float %24, float 0.000000e+00 %229 = fadd float %226, %.274 %230 = fadd float %227, %.273 %231 = fsub float %226, %.274 %232 = fsub float %227, %.273 %233 = bitcast <8 x i32> %27 to <32 x i8> %234 = bitcast <4 x i32> %29 to <16 x i8> %235 = bitcast <8 x i32> %27 to <32 x i8> %236 = bitcast <4 x i32> %29 to <16 x i8> br label %LOOP ENDIF: ; preds = %main_body, %ENDLOOP %temp50.0 = phi float [ %280, %ENDLOOP ], [ %56, %main_body ] %temp49.0 = phi float [ %278, %ENDLOOP ], [ %55, %main_body ] %temp48.0 = phi float [ %276, %ENDLOOP ], [ %54, %main_body ] %237 = call i32 @llvm.SI.packf16(float %temp48.0, float %temp49.0) %238 = bitcast i32 %237 to float %239 = call i32 @llvm.SI.packf16(float %temp50.0, float 1.000000e+00) %240 = bitcast i32 %239 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %238, float %240, float %238, float %240) ret void LOOP: ; preds = %ENDIF228, %ELSE %temp68.0 = phi float [ 0.000000e+00, %ELSE ], [ %308, %ENDIF228 ] %temp80.0 = phi float [ %224, %ELSE ], [ %temp100.0, %ENDIF228 ] %temp84.0 = phi float [ %229, %ELSE ], [ %temp188.0, %ENDIF228 ] %temp85.0 = phi float [ %230, %ELSE ], [ %temp189.0, %ENDIF228 ] %temp56.0 = phi float [ 0.000000e+00, %ELSE ], [ %312, %ENDIF228 ] %temp32.0 = phi float [ %224, %ELSE ], [ %temp120.0, %ENDIF228 ] %temp24.0 = phi float [ 0.000000e+00, %ELSE ], [ %327, %ENDIF228 ] %temp13.1 = phi float [ %232, %ELSE ], [ %temp13.1., %ENDIF228 ] %temp12.2 = phi float [ %231, %ELSE ], [ %temp12.2., %ENDIF228 ] %241 = bitcast float %temp24.0 to i32 %242 = icmp sgt i32 %241, 15 br i1 %242, label %ENDLOOP, label %ENDIF219 ENDLOOP: ; preds = %ENDIF225, %LOOP %243 = fsub float %30, %temp12.2 %244 = fsub float %31, %temp13.1 %.275 = select i1 %216, float %243, float %244 %245 = fsub float %temp84.0, %30 %246 = fsub float %temp85.0, %31 %temp36.1 = select i1 %216, float %245, float %246 %247 = fcmp olt float %.275, %temp36.1 %temp80.0259.temp32.0265 = select i1 %247, float %temp80.0, float %temp32.0 %248 = fcmp olt float %78, %224 %249 = fcmp olt float %temp80.0259.temp32.0265, %224 %250 = xor i1 %248, %249 %temp4.1 = select i1 %250, float %temp4.0, float 0.000000e+00 %251 = fadd float %.275, %temp36.1 %.275.temp36.1 = select i1 %247, float %.275, float %temp36.1 %252 = fdiv float 1.000000e+00, %251 %253 = fmul float %252, %.275.temp36.1 %254 = fsub float 5.000000e-01, %253 %255 = fmul float %254, %temp4.1 %temp4.2 = select i1 %216, float 0.000000e+00, float %255 %256 = fadd float %30, %temp4.2 %.276 = select i1 %216, float %255, float 0.000000e+00 %257 = fadd float %31, %.276 %258 = bitcast float %256 to i32 %259 = bitcast float %257 to i32 %260 = insertelement <4 x i32> undef, i32 %258, i32 0 %261 = insertelement <4 x i32> %260, i32 %259, i32 1 %262 = insertelement <4 x i32> %261, i32 0, i32 2 %263 = bitcast <8 x i32> %27 to <32 x i8> %264 = bitcast <4 x i32> %29 to <16 x i8> %265 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %262, <32 x i8> %263, <16 x i8> %264, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = fmul float %110, %171 %270 = fadd float %269, %266 %271 = fmul float %110, %172 %272 = fadd float %271, %267 %273 = fmul float %110, %173 %274 = fadd float %273, %268 %275 = fmul float %110, %266 %276 = fsub float %270, %275 %277 = fmul float %110, %267 %278 = fsub float %272, %277 %279 = fmul float %110, %268 %280 = fsub float %274, %279 br label %ENDIF ENDIF219: ; preds = %LOOP %281 = bitcast float %temp68.0 to i32 %282 = icmp eq i32 %281, -1 br i1 %282, label %ENDIF222, label %IF223 IF223: ; preds = %ENDIF219 %283 = bitcast float %temp12.2 to i32 %284 = bitcast float %temp13.1 to i32 %285 = insertelement <4 x i32> undef, i32 %283, i32 0 %286 = insertelement <4 x i32> %285, i32 %284, i32 1 %287 = insertelement <4 x i32> %286, i32 0, i32 2 %288 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %287, <32 x i8> %233, <16 x i8> %234, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = fmul float %290, 0x3FFF694EE0000000 %292 = fadd float %291, %289 br label %ENDIF222 ENDIF222: ; preds = %ENDIF219, %IF223 %temp100.0 = phi float [ %292, %IF223 ], [ %temp80.0, %ENDIF219 ] %293 = bitcast float %temp56.0 to i32 %294 = icmp eq i32 %293, -1 br i1 %294, label %ENDIF225, label %IF226 IF226: ; preds = %ENDIF222 %295 = bitcast float %temp84.0 to i32 %296 = bitcast float %temp85.0 to i32 %297 = insertelement <4 x i32> undef, i32 %295, i32 0 %298 = insertelement <4 x i32> %297, i32 %296, i32 1 %299 = insertelement <4 x i32> %298, i32 0, i32 2 %300 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %299, <32 x i8> %235, <16 x i8> %236, i32 2) %301 = extractelement <4 x float> %300, i32 0 %302 = extractelement <4 x float> %300, i32 1 %303 = fmul float %302, 0x3FFF694EE0000000 %304 = fadd float %303, %301 br label %ENDIF225 ENDIF225: ; preds = %ENDIF222, %IF226 %temp120.0 = phi float [ %304, %IF226 ], [ %temp32.0, %ENDIF222 ] %305 = fsub float %temp100.0, %224 %306 = call float @fabs(float %305) %307 = fcmp oge float %306, %228 %308 = select i1 %307, float 0xFFFFFFFFE0000000, float %temp68.0 %309 = fsub float %temp120.0, %224 %310 = call float @fabs(float %309) %311 = fcmp oge float %310, %228 %312 = select i1 %311, float 0xFFFFFFFFE0000000, float %temp56.0 %313 = bitcast float %308 to i32 %314 = bitcast float %312 to i32 %315 = and i32 %313, %314 %316 = icmp eq i32 %315, 0 br i1 %316, label %ENDIF228, label %ENDLOOP ENDIF228: ; preds = %ENDIF225 %317 = fsub float %temp12.2, %.274 %318 = fsub float %temp13.1, %.273 %319 = bitcast float %308 to i32 %320 = icmp ne i32 %319, 0 %temp12.2. = select i1 %320, float %temp12.2, float %317 %temp13.1. = select i1 %320, float %temp13.1, float %318 %321 = fadd float %temp84.0, %.274 %322 = fadd float %temp85.0, %.273 %323 = bitcast float %312 to i32 %324 = icmp ne i32 %323, 0 %temp188.0 = select i1 %324, float %temp84.0, float %321 %temp189.0 = select i1 %324, float %temp85.0, float %322 %325 = bitcast float %temp24.0 to i32 %326 = add i32 %325, 1 %327 = bitcast i32 %326 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_mov_b32_e32 v2, 0x3f00 ; 7E0402FF 00003F00 v_mov_b32_e32 v9, 0x3f003f ; 7E1202FF 003F003F v_mov_b32_e32 v10, v3 ; 7E140303 v_mov_b32_e32 v11, v4 ; 7E160304 v_mov_b32_e32 v12, v5 ; 7E180305 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[8:11] ; F0D00F00 00430E02 v_mov_b32_e32 v6, v3 ; 7E0C0303 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v24, 0x10001 ; 7E3002FF 00010001 v_mov_b32_e32 v8, v5 ; 7E100305 v_mov_b32_e32 v25, v3 ; 7E320303 v_mov_b32_e32 v28, 0x100 ; 7E3802FF 00000100 v_mov_b32_e32 v29, v3 ; 7E3A0303 v_mov_b32_e32 v26, v4 ; 7E340304 image_sample_l_o v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[12:19], s[8:11] ; F0D00F00 00431409 v_mov_b32_e32 v30, v4 ; 7E3C0304 v_mov_b32_e32 v27, v5 ; 7E360305 s_waitcnt vmcnt(1) ; BF8C0771 image_sample_l_o v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[12:19], s[8:11] ; F0D00700 00431105 s_waitcnt vmcnt(1) ; BF8C0771 image_sample_l_o v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[12:19], s[8:11] ; F0D00F00 00431718 v_mov_b32_e32 v31, v5 ; 7E3E0305 image_sample_l_o v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[8:11] ; F0D00F00 0043051C v_mov_b32_e32 v0, 0x3ffb4a77 ; 7E0002FF 3FFB4A77 v_mad_f32 v10, v0, v15, v14 ; D282000A 043A1F00 v_mad_f32 v11, v0, v21, v20 ; D282000B 04522B00 s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v1, v0, v18, v17 ; D2820001 04462500 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v12, v0, v24, v23 ; D282000C 045E3100 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v0, v6, v5 ; D282000D 04160D00 v_max3_f32 v0, v11, v13, v12 ; D2A80000 04321B0B v_max3_f32 v2, v1, v10, v0 ; D2A80002 04021501 v_min3_f32 v0, v11, v13, v12 ; D2A20000 04321B0B v_min3_f32 v0, v1, v10, v0 ; D2A20000 04021501 v_subrev_f32_e32 v0, v0, v2 ; 0A000500 v_mul_f32_e32 v2, 0x3e000000, v2 ; 100404FF 3E000000 v_max_f32_e32 v2, 0x3d2aaab4, v2 ; 200404FF 3D2AAAB4 v_cmp_nlt_f32_e32 vcc, v0, v2 ; 7C1C0500 s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_3 ; BF880000 v_rcp_f32_e32 v0, v0 ; 7E005500 s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 v_add_f32_e32 v2, v10, v13 ; 06041B0A v_add_f32_e32 v8, v12, v11 ; 0610170C v_add_f32_e32 v2, v8, v2 ; 06040508 v_mov_b32_e32 v26, 0x3e800000 ; 7E3402FF 3E800000 v_mad_f32 v2, v2, v26, -v1 ; D2820002 84063502 v_mov_b32_e32 v8, 0xbe800000 ; 7E1002FF BE800000 v_mad_f32 v0, |v2|, v0, v8 ; D2820100 04220102 v_max_f32_e32 v0, 0, v0 ; 20000080 v_mul_f32_e32 v0, 0x3faaaa8f, v0 ; 100000FF 3FAAAA8F v_min_f32_e32 v0, 0x3f400000, v0 ; 1E0000FF 3F400000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x0 ; C2021500 s_buffer_load_dword s5, s[20:23], 0x1 ; C2029501 v_mov_b32_e32 v2, 0 ; 7E040280 v_add_f32_e32 v8, v20, v14 ; 06101D14 v_add_f32_e32 v9, v21, v15 ; 06121F15 v_add_f32_e32 v14, v22, v16 ; 061C2116 v_add_f32_e32 v15, v23, v17 ; 061E2317 v_add_f32_e32 v5, v5, v15 ; 060A1F05 v_add_f32_e32 v15, v24, v18 ; 061E2518 v_add_f32_e32 v16, v25, v19 ; 06202719 v_add_f32_e32 v6, v6, v15 ; 060C1F06 v_mov_b32_e32 v20, 0 ; 7E280280 v_mov_b32_e32 v17, 0x3f3f3f ; 7E2202FF 003F3F3F v_mov_b32_e32 v18, v3 ; 7E240303 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431511 v_mov_b32_e32 v17, 0x13f01 ; 7E2202FF 00013F01 v_mov_b32_e32 v18, v3 ; 7E240303 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431B11 v_mov_b32_e32 v17, 0x3f013f ; 7E2202FF 003F013F v_mov_b32_e32 v18, v3 ; 7E240303 v_add_f32_e32 v7, v7, v16 ; 060E2107 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431E11 v_mov_b32_e32 v17, 0x10101 ; 7E2202FF 00010101 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_add_f32_e32 v15, v21, v27 ; 061E3715 v_add_f32_e32 v16, v22, v28 ; 06203916 v_add_f32_e32 v23, v23, v29 ; 062E3B17 v_mov_b32_e32 v24, 0x3ffb4a77 ; 7E3002FF 3FFB4A77 v_mad_f32 v21, v24, v22, v21 ; D2820015 04562D18 v_mov_b32_e32 v18, v3 ; 7E240303 v_mad_f32 v22, v24, v28, v27 ; D2820016 046E3918 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431111 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v20, v17, v30 ; 06283D11 v_add_f32_e32 v25, v18, v31 ; 06323F12 v_add_f32_e32 v19, v19, v32 ; 06264113 v_mad_f32 v27, v24, v31, v30 ; D282001B 047A3F18 v_mad_f32 v17, v24, v18, v17 ; D2820011 04462518 v_add_f32_e32 v15, v20, v15 ; 061E1F14 v_add_f32_e32 v16, v25, v16 ; 06202119 v_add_f32_e32 v18, v19, v23 ; 06242F13 v_add_f32_e32 v5, v5, v8 ; 060A1105 v_add_f32_e32 v6, v6, v9 ; 060C1306 v_add_f32_e32 v7, v7, v14 ; 060E1D07 v_add_f32_e32 v5, v5, v15 ; 060A1F05 v_add_f32_e32 v6, v6, v16 ; 060C2106 v_add_f32_e32 v8, v7, v18 ; 06102507 v_mov_b32_e32 v9, 0x3de38e2a ; 7E1202FF 3DE38E2A v_mul_f32_e32 v7, v9, v5 ; 100E0B09 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v5, v9, v8 ; 100A1109 v_mul_f32_e32 v9, v26, v21 ; 10122B1A v_mad_f32 v8, -0.5, v11, v9 ; D2820008 042616F1 v_mac_f32_e32 v8, v26, v27 ; 3E10371A v_mad_f32 v14, 0.5, v10, -v1 ; D282000E 840614F0 v_mac_f32_e32 v14, 0.5, v13 ; 3E1C1AF0 v_add_f32_e64 v8, |v8|, |v14| ; D2060308 00021D08 v_mul_f32_e32 v14, v26, v22 ; 101C2D1A v_mul_f32_e32 v15, v26, v27 ; 101E371A v_mac_f32_e32 v14, -0.5, v12 ; 3E1C18F1 v_mac_f32_e32 v14, v26, v17 ; 3E1C231A v_add_f32_e64 v8, v8, |v14| ; D2060208 00021D08 v_mac_f32_e32 v9, -0.5, v10 ; 3E1214F1 v_mac_f32_e32 v9, v26, v22 ; 3E122D1A v_mad_f32 v14, 0.5, v11, -v1 ; D282000E 840616F0 v_mac_f32_e32 v14, 0.5, v12 ; 3E1C18F0 v_add_f32_e64 v9, |v9|, |v14| ; D2060309 00021D09 v_mac_f32_e32 v15, -0.5, v13 ; 3E1E1AF1 v_mac_f32_e32 v15, v26, v17 ; 3E1E231A v_add_f32_e64 v9, v9, |v15| ; D2060209 00021F09 v_cmp_ge_f32_e32 vcc, v8, v9 ; 7C0C1308 v_mov_b32_e32 v14, s4 ; 7E1C0204 v_mov_b32_e32 v15, s5 ; 7E1E0205 v_cndmask_b32_e32 v14, v14, v15 ; 001C1F0E v_cndmask_b32_e32 v10, v11, v10 ; 0014150B v_cndmask_b32_e32 v11, v12, v13 ; 00161B0C v_subrev_f32_e32 v12, v1, v10 ; 0A181501 v_mov_b32_e32 v13, 0x7fffffff ; 7E1A02FF 7FFFFFFF v_and_b32_e32 v15, v12, v13 ; 361E1B0C v_subrev_f32_e32 v16, v1, v11 ; 0A201701 v_and_b32_e32 v13, v16, v13 ; 361A1B10 v_cmp_ge_f32_e64 s[0:1], |v12|, |v16| ; D00C0300 0002210C v_cndmask_b32_e64 v10, v11, v10, s[0:1] ; D200000A 0002150B v_mov_b32_e32 v16, s5 ; 7E200205 v_mov_b32_e32 v17, s4 ; 7E220204 v_cndmask_b32_e64 v12, v13, v15, s[0:1] ; D200000C 00021F0D v_xor_b32_e32 v13, 0x80000000, v14 ; 3A1A1CFF 80000000 v_add_f32_e32 v10, v10, v1 ; 0614030A v_mul_f32_e32 v11, 0.5, v10 ; 101614F0 v_cndmask_b32_e64 v10, v14, v13, s[0:1] ; D200000A 00021B0E v_mul_f32_e32 v13, 0.5, v10 ; 101A14F0 v_cndmask_b32_e64 v14, v13, 0, vcc ; D200000E 01A9010D v_add_f32_e32 v15, v14, v3 ; 061E070E v_cndmask_b32_e32 v13, 0, v13 ; 001A1A80 v_add_f32_e32 v13, v13, v4 ; 061A090D v_mul_f32_e32 v12, v26, v12 ; 1018191A v_cndmask_b32_e64 v14, v16, 0, vcc ; D200000E 01A90110 v_cndmask_b32_e32 v16, 0, v17 ; 00202280 v_add_f32_e32 v27, v16, v15 ; 06361F10 v_add_f32_e32 v28, v14, v13 ; 06381B0E v_subrev_f32_e32 v26, v16, v15 ; 0A341F10 v_subrev_f32_e32 v25, v14, v13 ; 0A321B0E v_mov_b32_e32 v20, v11 ; 7E28030B v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v23, v11 ; 7E2E030B v_mov_b32_e32 v24, 0 ; 7E300280 s_mov_b64 s[0:1], 0 ; BE800480 v_mov_b32_e32 v13, v23 ; 7E1A0317 v_mov_b32_e32 v15, v20 ; 7E1E0314 v_cmp_gt_i32_e32 vcc, 16, v24 ; 7D083090 v_mov_b32_e32 v18, v26 ; 7E24031A v_mov_b32_e32 v22, v28 ; 7E2C031C v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v21, v27 ; 7E2A031B s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_9 ; BF880000 v_cmp_ne_i32_e32 vcc, -1, v2 ; 7D0A04C1 v_mov_b32_e32 v20, v15 ; 7E28030F s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v[25:26], 3, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[12:19], s[8:11] ; F0900300 00431912 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v20, v26, v25, 0x3ffb4a77 ; 4028331A 3FFB4A77 s_or_b64 exec, exec, s[20:21] ; 88FE147E v_cmp_ne_i32_e32 vcc, -1, v17 ; 7D0A22C1 v_mov_b32_e32 v23, v13 ; 7E2E030D s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v[25:26], 3, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[12:19], s[8:11] ; F0900300 00431915 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v23, v26, v25, 0x3ffb4a77 ; 402E331A 3FFB4A77 s_or_b64 exec, exec, s[20:21] ; 88FE147E v_subrev_f32_e32 v25, v11, v20 ; 0A32290B v_cmp_ge_f32_e64 s[20:21], |v25|, v12 ; D00C0114 00021919 v_cndmask_b32_e64 v2, v2, -1, s[20:21] ; D2000002 00518302 v_subrev_f32_e32 v25, v11, v23 ; 0A322F0B v_cmp_ge_f32_e64 s[20:21], |v25|, v12 ; D00C0114 00021919 v_cndmask_b32_e64 v17, v17, -1, s[20:21] ; D2000011 00518311 v_and_b32_e32 v25, v17, v2 ; 36320511 v_cmp_eq_i32_e32 vcc, 0, v25 ; 7D043280 s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_5 ; BF880000 v_subrev_f32_e32 v25, v16, v18 ; 0A322510 v_subrev_f32_e32 v27, v14, v19 ; 0A36270E v_cmp_ne_i32_e32 vcc, 0, v2 ; 7D0A0480 v_cndmask_b32_e32 v26, v25, v18 ; 00342519 v_cndmask_b32_e32 v25, v27, v19 ; 0032271B v_add_f32_e32 v27, v16, v21 ; 06362B10 v_add_f32_e32 v28, v14, v22 ; 06382D0E v_cmp_ne_i32_e32 vcc, 0, v17 ; 7D0A2280 v_cndmask_b32_e32 v27, v27, v21 ; 00362B1B v_cndmask_b32_e32 v28, v28, v22 ; 00382D1C v_add_i32_e32 v24, 1, v24 ; 4A303081 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_or_b64 s[0:1], s[20:21], s[0:1] ; 88800014 s_or_b64 exec, exec, s[4:5] ; 88FE047E s_or_b64 s[0:1], s[4:5], s[0:1] ; 88800004 s_andn2_b64 exec, exec, s[0:1] ; 8AFE007E s_cbranch_execnz BB0_4 ; BF890000 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_cmp_ge_f32_e32 vcc, v8, v9 ; 7C0C1308 v_cmp_lt_f32_e64 s[0:1], v1, v11 ; D0020000 00021701 v_subrev_f32_e32 v1, v18, v3 ; 0A020712 v_subrev_f32_e32 v2, v19, v4 ; 0A040913 v_subrev_f32_e32 v8, v3, v21 ; 0A102B03 v_subrev_f32_e32 v9, v4, v22 ; 0A122D04 v_cndmask_b32_e32 v1, v2, v1 ; 00020302 v_cndmask_b32_e32 v2, v9, v8 ; 00041109 v_cmp_lt_f32_e64 s[4:5], v1, v2 ; D0020004 00020501 v_cndmask_b32_e64 v8, v13, v15, s[4:5] ; D2000008 00121F0D v_cndmask_b32_e64 v9, v2, v1, s[4:5] ; D2000009 00120302 v_add_f32_e32 v1, v2, v1 ; 06020302 v_mov_b32_e32 v2, 0x6f800000 ; 7E0402FF 6F800000 v_cmp_gt_f32_e64 s[4:5], |v1|, v2 ; D0080104 00020501 v_mov_b32_e32 v2, 0x2f800000 ; 7E0402FF 2F800000 v_cndmask_b32_e64 v2, 1.0, v2, s[4:5] ; D2000002 001204F2 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_rcp_f32_e32 v1, v1 ; 7E025501 v_cmp_lt_f32_e64 s[4:5], v8, v11 ; D0020004 00021708 s_xor_b64 s[0:1], s[0:1], s[4:5] ; 89800400 v_cndmask_b32_e64 v8, 0, v10, s[0:1] ; D2000008 00021480 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mad_f32 v1, -v1, v9, 0.5 ; D2820001 23C21301 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_cndmask_b32_e64 v2, v1, 0, vcc ; D2000002 01A90101 v_cndmask_b32_e32 v1, 0, v1 ; 00020280 v_add_f32_e32 v8, v2, v3 ; 06100702 v_add_f32_e32 v9, v1, v4 ; 06120901 v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_l v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[12:19], s[8:11] ; F0900700 00430108 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v7, v0, v1 ; D2820004 04060107 v_mad_f32 v6, v6, v0, v2 ; D2820006 040A0106 v_mad_f32 v5, v5, v0, v3 ; D2820005 040E0105 v_mad_f32 v17, -v0, v1, v4 ; D2820011 24120300 v_mad_f32 v18, -v0, v2, v6 ; D2820012 241A0500 v_mad_f32 v19, -v0, v3, v5 ; D2820013 24160700 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_cvt_pkrtz_f16_f32_e32 v0, v17, v18 ; 5E002511 v_cvt_pkrtz_f16_f32_e64 v1, v19, 1.0 ; D25E0001 0001E513 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 36 Code Size: 1440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 510.0200, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[2], IN[1] 13: MOV OUT[1], IN[0] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %27, 0x407FE051E0000000 %38 = fadd float %37, 0x3FB99999A0000000 %39 = fptosi float %38 to i32 %40 = shl i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %39, 4 %43 = or i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %39, 4 %46 = or i32 %45, 8 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %39, 4 %49 = or i32 %48, 12 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %33, %41 %52 = fmul float %34, %44 %53 = fadd float %51, %52 %54 = fmul float %35, %47 %55 = fadd float %53, %54 %56 = fmul float %36, %50 %57 = fadd float %55, %56 %58 = fadd float %37, 0x3FF19999A0000000 %59 = fptosi float %58 to i32 %60 = shl i32 %59, 4 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %59, 4 %63 = or i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %59, 4 %66 = or i32 %65, 8 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %59, 4 %69 = or i32 %68, 12 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %33, %61 %72 = fmul float %34, %64 %73 = fadd float %71, %72 %74 = fmul float %35, %67 %75 = fadd float %73, %74 %76 = fmul float %36, %70 %77 = fadd float %75, %76 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %77, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x43ff028f ; 7E0202FF 43FF028F s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v8, v1, 0x3dcccccd ; 42000308 3DCCCCCD v_madak_f32_e32 v1, v8, v1, 0x3f8ccccd ; 42020308 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v14, v0, s[0:3], 0 offen ; E0301000 80000E00 v_or_b32_e32 v15, 4, v0 ; 381E0084 v_or_b32_e32 v16, 8, v0 ; 38200088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v17, 4, v1 ; 38220284 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v1, s[0:3], 0 offen ; E0301000 80001201 v_or_b32_e32 v19, 8, v1 ; 38260288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v15, v15, v11 ; 101E170F v_mac_f32_e32 v15, v14, v10 ; 3E1E150E s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v11, v17, v11 ; 10161711 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v11, v18, v10 ; 3E161512 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v15, v16, v12 ; 3E1E1910 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v11, v19, v12 ; 3E161913 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v0, v13 ; 3E1E1B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v11, v1, v13 ; 3E161B01 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v15, v11, v1, v0 ; F80008CF 00010B0F s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 2.1000} IMM[1] FLT32 { 3.1000, 255.0100, 4.0000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MUL TEMP[1].x, IN[1].zzzz, IMM[1].yyyy 13: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[1].wwww 14: F2I TEMP[2].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MOV TEMP[2], CONST[ADDR[0].x] 17: MAD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[2].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: MOV TEMP[1], CONST[ADDR[0].x] 21: MOV OUT[2], IN[1] 22: MOV OUT[3], TEMP[2] 23: MOV OUT[1], IN[0] 24: MOV OUT[4], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %27, 0x408FE051E0000000 %38 = fadd float %37, 0x4000CCCCC0000000 %39 = fptosi float %38 to i32 %40 = shl i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %39, 4 %43 = or i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %39, 4 %46 = or i32 %45, 8 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %39, 4 %49 = or i32 %48, 12 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %33, %41 %52 = fmul float %34, %44 %53 = fadd float %51, %52 %54 = fmul float %35, %47 %55 = fadd float %53, %54 %56 = fmul float %36, %50 %57 = fadd float %55, %56 %58 = fadd float %37, 0x4008CCCCC0000000 %59 = fptosi float %58 to i32 %60 = shl i32 %59, 4 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %59, 4 %63 = or i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %59, 4 %66 = or i32 %65, 8 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %59, 4 %69 = or i32 %68, 12 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %33, %61 %72 = fmul float %34, %64 %73 = fadd float %71, %72 %74 = fmul float %35, %67 %75 = fadd float %73, %74 %76 = fmul float %36, %70 %77 = fadd float %75, %76 %78 = fmul float %27, 0x406FE051E0000000 %79 = fmul float %78, 4.000000e+00 %80 = fadd float %79, 0x3FB99999A0000000 %81 = fptosi float %80 to i32 %82 = shl i32 %81, 4 %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %82) %84 = shl i32 %81, 4 %85 = or i32 %84, 4 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %81, 4 %88 = or i32 %87, 8 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %81, 4 %91 = or i32 %90, 12 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = fmul float %78, 4.000000e+00 %94 = fadd float %93, 0x3FF19999A0000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %83, float %86, float %89, float %92) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %97, float %100, float %103, float %106) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %77, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v8, v1, 0x40066666 ; 42000308 40066666 v_madak_f32_e32 v1, v8, v1, 0x40466666 ; 42020308 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_mul_f32_e32 v14, 0x437f028f, v8 ; 101C10FF 437F028F v_madak_f32_e32 v15, 4.0, v14, 0x3dcccccd ; 421E1CF6 3DCCCCCD v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_madak_f32_e32 v14, 4.0, v14, 0x3f8ccccd ; 421C1CF6 3F8CCCCD v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v16, v0, s[0:3], 0 offen ; E0301000 80001000 v_or_b32_e32 v17, 4, v0 ; 38220084 v_or_b32_e32 v18, 8, v0 ; 38240088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v19, v1, s[0:3], 0 offen ; E0301000 80001301 v_or_b32_e32 v20, 4, v1 ; 38280284 v_or_b32_e32 v21, 8, v1 ; 382A0288 v_or_b32_e32 v1, 12, v1 ; 3802028C v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v22, v15, s[0:3], 0 offen ; E0301000 8000160F v_or_b32_e32 v23, 4, v15 ; 382E1E84 v_or_b32_e32 v24, 8, v15 ; 38301E88 v_or_b32_e32 v15, 12, v15 ; 381E1E8C buffer_load_dword v25, v14, s[0:3], 0 offen ; E0301000 8000190E v_or_b32_e32 v26, 4, v14 ; 38341C84 v_or_b32_e32 v27, 8, v14 ; 38361C88 v_or_b32_e32 v14, 12, v14 ; 381C1C8C buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E s_waitcnt vmcnt(13) ; BF8C077D v_mul_f32_e32 v17, v17, v11 ; 10221711 v_mac_f32_e32 v17, v16, v10 ; 3E221510 s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mac_f32_e32 v11, v19, v10 ; 3E161513 v_mac_f32_e32 v17, v18, v12 ; 3E221912 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v11, v21, v12 ; 3E161915 v_mac_f32_e32 v17, v0, v13 ; 3E221B00 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v11, v1, v13 ; 3E161B01 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(3) ; BF8C0773 exp 15, 34, 0, 0, 0, v22, v23, v24, v15 ; F800022F 0F181716 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 35, 0, 0, 0, v25, v26, v27, v14 ; F800023F 0E1B1A19 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v17, v11, v1, v0 ; F80008CF 00010B11 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 416 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 %59 = fmul float %41, %21 %60 = fmul float %42, %22 %61 = fadd float %59, %60 %62 = fmul float %43, %23 %63 = fadd float %61, %62 %64 = fmul float %44, %24 %65 = fadd float %63, %64 %66 = fmul float %41, %25 %67 = fmul float %42, %26 %68 = fadd float %66, %67 %69 = fmul float %43, %27 %70 = fadd float %68, %69 %71 = fmul float %44, %28 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x5 ; C2060105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s10, s[0:3], 0xd ; C205010D s_buffer_load_dword s11, s[0:3], 0xe ; C205810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s15, v6 ; 10000C0F v_mac_f32_e32 v0, s14, v5 ; 3E000A0E v_mul_f32_e32 v9, s12, v6 ; 10120C0C v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mul_f32_e32 v10, s6, v6 ; 10140C06 v_mac_f32_e32 v10, s5, v5 ; 3E140A05 v_mul_f32_e32 v6, s10, v6 ; 100C0C0A v_mac_f32_e32 v6, s9, v5 ; 3E0C0A09 v_mac_f32_e32 v0, s16, v7 ; 3E000E10 v_mac_f32_e32 v9, s13, v7 ; 3E120E0D v_mac_f32_e32 v10, s7, v7 ; 3E140E07 v_mac_f32_e32 v6, s11, v7 ; 3E0C0E0B v_mac_f32_e32 v0, s17, v8 ; 3E001011 v_mac_f32_e32 v9, s4, v8 ; 3E121004 v_mac_f32_e32 v10, s8, v8 ; 3E141008 v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v10, v6, v1, v1 ; F800021F 0101060A v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v9, v1, v2 ; F80008CF 02010900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 208 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].x, -CONST[0].xxxx 4: BGNLOOP :0 5: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 6: UIF TEMP[4].xxxx :0 7: BRK 8: ENDIF 9: MAD TEMP[5].xy, TEMP[3].xxxx, CONST[2].xyyy, TEMP[0].xyyy 10: MOV TEMP[6].xy, TEMP[5].xyyy 11: MOV TEMP[6].w, IMM[0].xxxx 12: TXL TEMP[7], TEMP[6], SAMP[0], 2D 13: ADD TEMP[2], TEMP[2], TEMP[7] 14: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 15: ENDLOOP :0 16: MUL TEMP[1], TEMP[2], CONST[0].wwww 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV TEMP[0].xyz, IN[1].xyzx 19: MUL TEMP[0], TEMP[1], TEMP[0] 20: MUL TEMP[1], TEMP[0], IN[1].wwww 21: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %42 = fsub float -0.000000e+00, %24 br label %LOOP LOOP: ; preds = %ENDIF, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %81, %ENDIF ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %82, %ENDIF ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %83, %ENDIF ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %84, %ENDIF ] %temp12.0 = phi float [ %42, %main_body ], [ %85, %ENDIF ] %43 = fcmp olt float %24, %temp12.0 br i1 %43, label %IF, label %ENDIF IF: ; preds = %LOOP %44 = fmul float %temp8.0, %25 %45 = fmul float %temp9.0, %25 %46 = fmul float %temp10.0, %25 %47 = fmul float %temp11.0, %25 %48 = fmul float %44, %36 %49 = fmul float %45, %37 %50 = fmul float %46, %38 %51 = fmul float %48, %39 %52 = fmul float %49, %39 %53 = fmul float %50, %39 %54 = fmul float %47, %39 %55 = fmul float %32, %54 %56 = fadd float %55, %51 %57 = fmul float %33, %54 %58 = fadd float %57, %52 %59 = fmul float %34, %54 %60 = fadd float %59, %53 %61 = fmul float %35, %54 %62 = fadd float %61, %54 %63 = call i32 @llvm.SI.packf16(float %56, float %58) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %60, float %62) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void ENDIF: ; preds = %LOOP %67 = fmul float %temp12.0, %26 %68 = fadd float %67, %40 %69 = fmul float %temp12.0, %27 %70 = fadd float %69, %41 %71 = bitcast float %68 to i32 %72 = bitcast float %70 to i32 %73 = insertelement <4 x i32> undef, i32 %71, i32 0 %74 = insertelement <4 x i32> %73, i32 %72, i32 1 %75 = insertelement <4 x i32> %74, i32 0, i32 2 %76 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %29, <16 x i8> %31, i32 2) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = extractelement <4 x float> %76, i32 3 %81 = fadd float %temp8.0, %77 %82 = fadd float %temp9.0, %78 %83 = fadd float %temp10.0, %79 %84 = fadd float %temp11.0, %80 %85 = fadd float %temp12.0, 1.000000e+00 br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00 s_buffer_load_dword s0, s[12:15], 0x3 ; C2000D03 s_buffer_load_dword s2, s[12:15], 0x8 ; C2010D08 s_buffer_load_dword s3, s[12:15], 0x9 ; C2018D09 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s1, v1 ; 3A160201 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 s_mov_b64 s[16:17], 0 ; BE900480 v_mov_b32_e32 v1, v18 ; 7E020312 v_mov_b32_e32 v12, v17 ; 7E180311 v_mov_b32_e32 v13, v16 ; 7E1A0310 v_mov_b32_e32 v14, v15 ; 7E1C030F v_cmp_nlt_f32_e32 vcc, s1, v11 ; 7C1C1601 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mad_f32 v15, s2, v11, v10 ; D282000F 042A1602 v_mad_f32 v16, s3, v11, v0 ; D2820010 04021603 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[12:15] ; F0900F00 0061120F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v18, v14 ; 061E1D12 v_add_f32_e32 v16, v19, v13 ; 06201B13 v_add_f32_e32 v17, v20, v12 ; 06221914 v_add_f32_e32 v18, v21, v1 ; 06240315 v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_or_b64 s[16:17], s[18:19], s[16:17] ; 88901012 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v0, s0, v14 ; 10001C00 v_mul_f32_e32 v10, s0, v13 ; 10141A00 v_mul_f32_e32 v11, s0, v12 ; 10161800 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v6, v7, v10 ; 100C1507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v0, v1, v2 ; 3E000501 v_mac_f32_e32 v6, v1, v3 ; 3E0C0701 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mac_f32_e32 v1, v1, v5 ; 3E020B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXL TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXL TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[0].wwww 33: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx 34: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx 36: MAD TEMP[1], CONST[2], TEMP[2].xxxx, TEMP[0] 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %58 = fsub float -0.000000e+00, %24 %59 = fsub float -0.000000e+00, %25 br label %LOOP LOOP: ; preds = %IF47, %main_body %temp12.0 = phi float [ %58, %main_body ], [ %111, %IF47 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF47 ] %60 = fcmp olt float %24, %temp12.0 br i1 %60, label %IF, label %ENDIF IF: ; preds = %LOOP %61 = fmul float %temp11.0, %27 %62 = fmul float %56, %34 %63 = fmul float %57, %35 %64 = bitcast float %62 to i32 %65 = bitcast float %63 to i32 %66 = insertelement <4 x i32> undef, i32 %64, i32 0 %67 = insertelement <4 x i32> %66, i32 %65, i32 1 %68 = insertelement <4 x i32> %67, i32 0, i32 2 %69 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %68, <32 x i8> %39, <16 x i8> %41, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = fsub float 1.000000e+00, %73 %75 = fmul float %61, %74 %76 = fmul float %75, %26 %77 = call float @llvm.AMDIL.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %78 = fmul float %30, %77 %79 = fadd float %78, %70 %80 = fmul float %31, %77 %81 = fadd float %80, %71 %82 = fmul float %32, %77 %83 = fadd float %82, %72 %84 = fmul float %33, %77 %85 = fadd float %84, %73 %86 = fmul float %79, %52 %87 = fmul float %81, %53 %88 = fmul float %83, %54 %89 = fmul float %86, %55 %90 = fmul float %87, %55 %91 = fmul float %88, %55 %92 = fmul float %85, %55 %93 = fmul float %48, %92 %94 = fadd float %93, %89 %95 = fmul float %49, %92 %96 = fadd float %95, %90 %97 = fmul float %50, %92 %98 = fadd float %97, %91 %99 = fmul float %51, %92 %100 = fadd float %99, %92 %101 = call i32 @llvm.SI.packf16(float %94, float %96) %102 = bitcast i32 %101 to float %103 = call i32 @llvm.SI.packf16(float %98, float %100) %104 = bitcast i32 %103 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %102, float %104, float %102, float %104) ret void ENDIF: ; preds = %LOOP %105 = fadd float %28, %temp12.0 %106 = fmul float %105, %36 %107 = fadd float %106, %56 %108 = bitcast float %107 to i32 %109 = insertelement <4 x i32> undef, i32 %108, i32 0 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp13.0 = phi float [ %59, %ENDIF ], [ %121, %ENDIF46 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %120, %ENDIF46 ] %110 = fcmp olt float %25, %temp13.0 br i1 %110, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %111 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %112 = fadd float %29, %temp13.0 %113 = fmul float %112, %37 %114 = fadd float %113, %57 %115 = bitcast float %114 to i32 %116 = insertelement <4 x i32> %109, i32 %115, i32 1 %117 = insertelement <4 x i32> %116, i32 0, i32 2 %118 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %117, <32 x i8> %44, <16 x i8> %47, i32 2) %119 = extractelement <4 x float> %118, i32 3 %120 = fadd float %temp11.1, %119 %121 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[20:23], 0x0 ; C2051500 s_buffer_load_dword s11, s[20:23], 0x1 ; C2059501 s_buffer_load_dword s3, s[20:23], 0x2 ; C2019502 s_buffer_load_dword s9, s[20:23], 0x3 ; C2049503 s_buffer_load_dword s14, s[20:23], 0x4 ; C2071504 s_buffer_load_dword s15, s[20:23], 0x5 ; C2079505 s_buffer_load_dword s8, s[20:23], 0x8 ; C2041508 s_buffer_load_dword s2, s[20:23], 0x9 ; C2011509 s_buffer_load_dword s1, s[20:23], 0xa ; C200950A s_buffer_load_dword s0, s[20:23], 0xb ; C200150B s_buffer_load_dword s13, s[20:23], 0x10 ; C2069510 s_buffer_load_dword s12, s[20:23], 0x11 ; C2061511 s_buffer_load_dword s16, s[20:23], 0x18 ; C2081518 s_buffer_load_dword s17, s[20:23], 0x19 ; C2089519 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s10, v1 ; 3A16020A v_xor_b32_e32 v12, s11, v1 ; 3A18020B v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v1, v13 ; 7E02030D v_cmp_nlt_f32_e32 vcc, s10, v11 ; 7C1C160A s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s14, v11 ; 061A160E v_mad_f32 v14, s16, v13, v10 ; D282000E 042A1A10 s_mov_b64 s[18:19], 0 ; BE920480 v_mov_b32_e32 v16, v12 ; 7E20030C v_mov_b32_e32 v15, v1 ; 7E1E0301 v_mov_b32_e32 v13, v15 ; 7E1A030F v_cmp_nlt_f32_e32 vcc, s11, v16 ; 7C1C200B s_and_saveexec_b64 s[44:45], vcc ; BEAC246A s_xor_b64 s[44:45], exec, s[44:45] ; 89AC2C7E s_cbranch_execz BB0_6 ; BF880000 v_add_f32_e32 v15, s15, v16 ; 061E200F v_mad_f32 v15, s17, v15, v0 ; D282000F 04021E11 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, v14 ; 7E24030E v_mov_b32_e32 v19, v15 ; 7E26030F v_mov_b32_e32 v20, v16 ; 7E280310 v_mov_b32_e32 v21, v17 ; 7E2A0311 v_mov_b32_e32 v20, v17 ; 7E280311 image_sample_l v15, 8, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[32:39], s[40:43] ; F0900800 01480F12 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v15, v13 ; 061E1B0F v_add_f32_e32 v16, 1.0, v16 ; 062020F2 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E s_or_b64 s[18:19], s[44:45], s[18:19] ; 8892122C s_andn2_b64 exec, exec, s[18:19] ; 8AFE127E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_or_b64 s[4:5], s[6:7], s[4:5] ; 88840406 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mul_f32_e32 v10, s13, v10 ; 1014140D v_mul_f32_e32 v11, s12, v0 ; 1016000C v_mov_b32_e32 v12, 0 ; 7E180280 image_sample_l v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[24:31], s[20:23] ; F0900F00 00A60A0A s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v0, 1.0, v13 ; 08001AF2 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, s8, v0, v10 ; D2820001 042A0008 v_mad_f32 v10, s2, v0, v11 ; D282000A 042E0002 v_mad_f32 v11, s1, v0, v12 ; D282000B 04320001 v_mac_f32_e32 v13, s0, v0 ; 3E1A0000 v_mul_f32_e32 v0, v6, v1 ; 10000306 v_mul_f32_e32 v1, v7, v10 ; 10021507 v_mul_f32_e32 v6, v8, v11 ; 100C1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 496 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[1], IN[0] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s17, v10 ; 10001411 v_mac_f32_e32 v0, s16, v9 ; 3E001210 v_mul_f32_e32 v10, s7, v10 ; 10141407 v_mac_f32_e32 v10, s6, v9 ; 3E141206 v_mac_f32_e32 v0, s4, v11 ; 3E001604 v_mac_f32_e32 v10, s8, v11 ; 3E141608 v_mac_f32_e32 v0, s5, v12 ; 3E001805 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v10, v2, v1 ; F80008CF 01020A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 156 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 0.1000} IMM[1] FLT32 { 1.1000, 2.1000, 3.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MOV TEMP[1].xy, TEMP[2].xyxx 23: MOV OUT[1], IN[0] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %19, 0x408FE051E0000000 %30 = fadd float %29, 0x3FB99999A0000000 %31 = fptosi float %30 to i32 %32 = shl i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %31, 4 %35 = or i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %31, 4 %38 = or i32 %37, 8 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %31, 4 %41 = or i32 %40, 12 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %25, %33 %44 = fmul float %26, %36 %45 = fadd float %43, %44 %46 = fmul float %27, %39 %47 = fadd float %45, %46 %48 = fmul float %28, %42 %49 = fadd float %47, %48 %50 = fadd float %29, 0x3FF19999A0000000 %51 = fptosi float %50 to i32 %52 = shl i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %51, 4 %55 = or i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %51, 4 %58 = or i32 %57, 8 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %51, 4 %61 = or i32 %60, 12 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = fmul float %25, %53 %64 = fmul float %26, %56 %65 = fadd float %63, %64 %66 = fmul float %27, %59 %67 = fadd float %65, %66 %68 = fmul float %28, %62 %69 = fadd float %67, %68 %70 = fadd float %29, 0x4000CCCCC0000000 %71 = fptosi float %70 to i32 %72 = shl i32 %71, 4 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %71, 4 %75 = or i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %71, 4 %78 = or i32 %77, 8 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %71, 4 %81 = or i32 %80, 12 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = fmul float %25, %73 %84 = fmul float %26, %76 %85 = fadd float %83, %84 %86 = fmul float %27, %79 %87 = fadd float %85, %86 %88 = fmul float %28, %82 %89 = fadd float %87, %88 %90 = fadd float %29, 0x4008CCCCC0000000 %91 = fptosi float %90 to i32 %92 = shl i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %91, 4 %95 = or i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = shl i32 %91, 4 %98 = or i32 %97, 8 %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %98) %100 = shl i32 %91, 4 %101 = or i32 %100, 12 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %25, %93 %104 = fmul float %26, %96 %105 = fadd float %103, %104 %106 = fmul float %27, %99 %107 = fadd float %105, %106 %108 = fmul float %28, %102 %109 = fadd float %107, %108 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %89, float %109, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %49, float %69, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v4, v1, 0x40066666 ; 42000304 40066666 v_madak_f32_e32 v10, v4, v1, 0x3dcccccd ; 42140304 3DCCCCCD v_madak_f32_e32 v11, v4, v1, 0x3f8ccccd ; 42160304 3F8CCCCD v_madak_f32_e32 v1, v4, v1, 0x40466666 ; 42020304 40466666 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v12, v10, s[0:3], 0 offen ; E0301000 80000C0A v_or_b32_e32 v13, 4, v10 ; 381A1484 v_or_b32_e32 v14, 8, v10 ; 381C1488 v_or_b32_e32 v10, 12, v10 ; 3814148C buffer_load_dword v15, v11, s[0:3], 0 offen ; E0301000 80000F0B v_or_b32_e32 v16, 4, v11 ; 38201684 v_or_b32_e32 v17, 8, v11 ; 38221688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v18, v0, s[0:3], 0 offen ; E0301000 80001200 v_or_b32_e32 v19, 4, v0 ; 38260084 v_or_b32_e32 v20, 8, v0 ; 38280088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v21, 4, v1 ; 382A0284 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 v_or_b32_e32 v23, 8, v1 ; 382E0288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(12) ; BF8C077C v_mul_f32_e32 v13, v13, v7 ; 101A0F0D v_mac_f32_e32 v13, v12, v6 ; 3E1A0D0C s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v12, v16, v7 ; 10180F10 v_mac_f32_e32 v12, v15, v6 ; 3E180D0F s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v15, v19, v7 ; 101E0F13 v_mac_f32_e32 v15, v18, v6 ; 3E1E0D12 s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v7, v21, v7 ; 100E0F15 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v7, v22, v6 ; 3E0E0D16 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v13, v14, v8 ; 3E1A110E s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v12, v17, v8 ; 3E181111 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v15, v20, v8 ; 3E1E1114 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v7, v23, v8 ; 3E0E1117 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v13, v10, v9 ; 3E1A130A s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v12, v11, v9 ; 3E18130B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v0, v9 ; 3E1E1300 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v7, v1, v9 ; 3E0E1301 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v15, v7, v0, v0 ; F800021F 0000070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v13, v12, v0, v1 ; F80008CF 01000C0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 0.1000} IMM[1] FLT32 { 1.1000, 2.1000, 3.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].yyyy 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].zzzz 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MOV TEMP[1].xy, TEMP[2].xyxx 23: MOV OUT[1], IN[0] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = sitofp i32 %10 to float %30 = fmul float %29, 4.000000e+00 %31 = fadd float %30, 0x3FB99999A0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = fmul float %25, %34 %45 = fmul float %26, %37 %46 = fadd float %44, %45 %47 = fmul float %27, %40 %48 = fadd float %46, %47 %49 = fmul float %28, %43 %50 = fadd float %48, %49 %51 = fmul float %29, 4.000000e+00 %52 = fadd float %51, 0x3FF19999A0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = fmul float %25, %55 %66 = fmul float %26, %58 %67 = fadd float %65, %66 %68 = fmul float %27, %61 %69 = fadd float %67, %68 %70 = fmul float %28, %64 %71 = fadd float %69, %70 %72 = fmul float %29, 4.000000e+00 %73 = fadd float %72, 0x4000CCCCC0000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %25, %76 %87 = fmul float %26, %79 %88 = fadd float %86, %87 %89 = fmul float %27, %82 %90 = fadd float %88, %89 %91 = fmul float %28, %85 %92 = fadd float %90, %91 %93 = fmul float %29, 4.000000e+00 %94 = fadd float %93, 0x4008CCCCC0000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %25, %97 %108 = fmul float %26, %100 %109 = fadd float %107, %108 %110 = fmul float %27, %103 %111 = fadd float %109, %110 %112 = fmul float %28, %106 %113 = fadd float %111, %112 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %92, float %113, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 v_madak_f32_e32 v2, 4.0, v1, 0x40066666 ; 420402F6 40066666 v_cvt_i32_f32_e32 v2, v2 ; 7E041102 v_madak_f32_e32 v3, 4.0, v1, 0x40466666 ; 420602F6 40466666 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, 4.0, v1, 0x3dcccccd ; 420802F6 3DCCCCCD v_cvt_i32_f32_e32 v4, v4 ; 7E081104 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 v_or_b32_e32 v0, 4, v4 ; 38000884 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_or_b32_e32 v13, 8, v4 ; 381A0888 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D v_madak_f32_e32 v1, 4.0, v1, 0x3f8ccccd ; 420202F6 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 buffer_load_dword v14, v4, s[0:3], 0 offen ; E0301000 80000E04 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v15, 4, v1 ; 381E0284 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 8, v1 ; 38200288 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_or_b32_e32 v18, 4, v2 ; 38240484 buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 buffer_load_dword v19, v2, s[0:3], 0 offen ; E0301000 80001302 v_or_b32_e32 v20, 4, v3 ; 38280684 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v21, v3, s[0:3], 0 offen ; E0301000 80001503 v_or_b32_e32 v22, 8, v2 ; 382C0488 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 v_or_b32_e32 v23, 8, v3 ; 382E0688 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_or_b32_e32 v2, 12, v2 ; 3804048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt ; BF8C077F v_mul_f32_e32 v0, v0, v10 ; 10001500 s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v0, v14, v9 ; 3E00130E s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v14, v15, v10 ; 101C150F s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v14, v17, v9 ; 3E1C1311 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v15, v18, v10 ; 101E1512 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v15, v19, v9 ; 3E1E1313 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v10, v20, v10 ; 10141514 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v10, v21, v9 ; 3E141315 v_mac_f32_e32 v0, v13, v11 ; 3E00170D v_mac_f32_e32 v14, v16, v11 ; 3E1C1710 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v15, v22, v11 ; 3E1E1716 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v10, v23, v11 ; 3E141717 v_mac_f32_e32 v0, v4, v12 ; 3E001904 v_mac_f32_e32 v14, v1, v12 ; 3E1C1901 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v2, v12 ; 3E1E1902 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v3, v12 ; 3E141903 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v15, v10, v1, v1 ; F800021F 01010A0F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v14, v1, v2 ; F80008CF 02010E00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 428 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 1.1000, 0.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[3].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 15: F2I TEMP[1].x, TEMP[1].xxxx 16: UARL ADDR[0].x, TEMP[1].xxxx 17: UARL ADDR[0].x, TEMP[1].xxxx 18: MOV TEMP[1], CONST[ADDR[0].x] 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: MAD TEMP[1], IN[0], CONST[ADDR[0].x], TEMP[1] 22: MOV TEMP[2].xy, IN[1].xyxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[2] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = fmul float %39, 4.000000e+00 %41 = fadd float %40, 0x4000CCCCC0000000 %42 = fptosi float %41 to i32 %43 = shl i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %42, 4 %46 = or i32 %45, 4 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %42, 4 %49 = or i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = shl i32 %42, 4 %52 = or i32 %51, 12 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = fmul float %31, %44 %55 = fmul float %32, %47 %56 = fadd float %54, %55 %57 = fmul float %33, %50 %58 = fadd float %56, %57 %59 = fmul float %34, %53 %60 = fadd float %58, %59 %61 = fadd float %40, 0x4008CCCCC0000000 %62 = fptosi float %61 to i32 %63 = shl i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %62, 4 %66 = or i32 %65, 4 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %62, 4 %69 = or i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %62, 4 %72 = or i32 %71, 12 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = fmul float %31, %64 %75 = fmul float %32, %67 %76 = fadd float %74, %75 %77 = fmul float %33, %70 %78 = fadd float %76, %77 %79 = fmul float %34, %73 %80 = fadd float %78, %79 %81 = fadd float %40, 0x3FF19999A0000000 %82 = fptosi float %81 to i32 %83 = fadd float %40, 0x3FB99999A0000000 %84 = fptosi float %83 to i32 %85 = shl i32 %84, 4 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %84, 4 %88 = or i32 %87, 4 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %84, 4 %91 = or i32 %90, 8 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %84, 4 %94 = or i32 %93, 12 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %82, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = fmul float %17, %97 %99 = fadd float %98, %86 %100 = shl i32 %82, 4 %101 = or i32 %100, 4 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %18, %102 %104 = fadd float %103, %89 %105 = shl i32 %82, 4 %106 = or i32 %105, 8 %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %106) %108 = fmul float %19, %107 %109 = fadd float %108, %92 %110 = shl i32 %82, 4 %111 = or i32 %110, 12 %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %111) %113 = fmul float %20, %112 %114 = fadd float %113, %95 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %104, float %109, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %80, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 4.0, v11, 0x40066666 ; 420016F6 40066666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_madak_f32_e32 v12, 4.0, v11, 0x40466666 ; 421816F6 40466666 v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_madak_f32_e32 v13, 4.0, v11, 0x3f8ccccd ; 421A16F6 3F8CCCCD v_madak_f32_e32 v11, 4.0, v11, 0x3dcccccd ; 421616F6 3DCCCCCD v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 buffer_load_dword v14, v0, s[0:3], 0 offen ; E0301000 80000E00 v_or_b32_e32 v15, 4, v0 ; 381E0084 v_or_b32_e32 v16, 8, v0 ; 38200088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v17, v11, s[0:3], 0 offen ; E0301000 8000110B v_or_b32_e32 v18, 4, v11 ; 38241684 v_or_b32_e32 v19, 8, v11 ; 38261688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v20, v13, s[0:3], 0 offen ; E0301000 8000140D v_or_b32_e32 v21, 4, v13 ; 382A1A84 v_or_b32_e32 v22, 8, v13 ; 382C1A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D v_or_b32_e32 v23, 4, v12 ; 382E1884 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v12, s[0:3], 0 offen ; E0301000 8000180C v_or_b32_e32 v25, 8, v12 ; 38321888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v17, v20, v1 ; 3E220314 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v18, v21, v2 ; 3E240515 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v19, v22, v3 ; 3E260716 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v11, v13, v4 ; 3E16090D v_mul_f32_e32 v1, v15, v8 ; 1002110F v_mac_f32_e32 v1, v14, v7 ; 3E020F0E s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v2, v23, v8 ; 10041117 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v2, v24, v7 ; 3E040F18 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v1, v16, v9 ; 3E021310 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v25, v9 ; 3E041319 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, v0, v10 ; 3E021500 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v12, v10 ; 3E04150C exp 15, 32, 0, 0, 0, v17, v18, v19, v11 ; F800020F 0B131211 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v5, v6, v0, v0 ; F800021F 00000605 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v1, v2, v0, v3 ; F80008CF 03000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 432 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[1].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[0], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[0], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = fmul float %25, 2.000000e+00 %27 = fadd float %26, 0x3FB99999A0000000 %28 = fptosi float %27 to i32 %29 = shl i32 %28, 4 %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %29) %31 = shl i32 %28, 4 %32 = or i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %28, 4 %35 = or i32 %34, 8 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %28, 4 %38 = or i32 %37, 12 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = fmul float %17, %30 %41 = fmul float %18, %33 %42 = fadd float %40, %41 %43 = fmul float %19, %36 %44 = fadd float %42, %43 %45 = fmul float %20, %39 %46 = fadd float %44, %45 %47 = fadd float %26, 0x3FF19999A0000000 %48 = fptosi float %47 to i32 %49 = shl i32 %48, 4 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = shl i32 %48, 4 %52 = or i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %48, 4 %55 = or i32 %54, 8 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %48, 4 %58 = or i32 %57, 12 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = fmul float %17, %50 %61 = fmul float %18, %53 %62 = fadd float %60, %61 %63 = fmul float %19, %56 %64 = fadd float %62, %63 %65 = fmul float %20, %59 %66 = fadd float %64, %65 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %46, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v5, 0x3dcccccd ; 42000AF4 3DCCCCCD v_madak_f32_e32 v5, 2.0, v5, 0x3f8ccccd ; 420A0AF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_dword v6, v0, s[0:3], 0 offen ; E0301000 80000600 v_or_b32_e32 v7, 4, v0 ; 380E0084 v_or_b32_e32 v8, 8, v0 ; 38100088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v9, v5, s[0:3], 0 offen ; E0301000 80000905 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v10, 4, v5 ; 38140A84 v_or_b32_e32 v11, 8, v5 ; 38160A88 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v7, v7, v2 ; 100E0507 v_mac_f32_e32 v7, v6, v1 ; 3E0E0306 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 s_waitcnt vmcnt(4) ; BF8C0774 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mac_f32_e32 v2, v9, v1 ; 3E040309 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v7, v8, v3 ; 3E0E0708 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v11, v3 ; 3E04070B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v4 ; 3E040905 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v7, v2, v0, v6 ; F80008CF 06000207 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %25, %13 %30 = fmul float %26, %14 %31 = fadd float %29, %30 %32 = fmul float %27, %15 %33 = fadd float %31, %32 %34 = fmul float %28, %16 %35 = fadd float %33, %34 %36 = fmul float %25, %17 %37 = fmul float %26, %18 %38 = fadd float %36, %37 %39 = fmul float %27, %19 %40 = fadd float %38, %39 %41 = fmul float %28, %20 %42 = fadd float %40, %41 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %35, float %42, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v4, s6, v0 ; 3E080006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_mac_f32_e32 v4, s8, v2 ; 3E080408 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v4, s10, v3 ; 3E08060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v4, v1, v2, v0 ; F80008CF 00020104 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1], IN[0], CONST[1], CONST[0] 5: MOV TEMP[2].xy, IN[1].xyxx 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], TEMP[2] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %21 %52 = fmul float %48, %22 %53 = fadd float %51, %52 %54 = fmul float %49, %23 %55 = fadd float %53, %54 %56 = fmul float %50, %24 %57 = fadd float %55, %56 %58 = fmul float %47, %25 %59 = fmul float %48, %26 %60 = fadd float %58, %59 %61 = fmul float %49, %27 %62 = fadd float %60, %61 %63 = fmul float %50, %28 %64 = fadd float %62, %63 %65 = fmul float %33, %17 %66 = fadd float %65, %13 %67 = fmul float %34, %18 %68 = fadd float %67, %14 %69 = fmul float %35, %19 %70 = fadd float %69, %15 %71 = fmul float %36, %20 %72 = fadd float %71, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %66, float %68, float %70, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %64, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[7:10], v0, s[8:11], 0 idxen ; E00C2000 80020700 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 v_mov_b32_e32 v11, s17 ; 7E160211 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v12, s4 ; 7E180204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mov_b32_e32 v13, s5 ; 7E1A0205 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mac_f32_e32 v12, s8, v3 ; 3E180608 v_mac_f32_e32 v13, s9, v4 ; 3E1A0809 v_mul_f32_e32 v1, s11, v8 ; 1002100B v_mac_f32_e32 v1, s10, v7 ; 3E020E0A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s5, v8 ; 10041005 v_mac_f32_e32 v2, s4, v7 ; 3E040E04 v_mac_f32_e32 v1, s12, v9 ; 3E02120C v_mac_f32_e32 v2, s14, v9 ; 3E04120E v_mac_f32_e32 v1, s13, v10 ; 3E02140D v_mac_f32_e32 v2, s0, v10 ; 3E041400 exp 15, 32, 0, 0, 0, v0, v11, v12, v13 ; F800020F 0D0C0B00 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v5, v6, v0, v0 ; F800021F 00000605 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v1, v2, v0, v3 ; F80008CF 03000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[0], TEMP[0] 6: MOV OUT[1], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = fmul float %31, %13 %36 = fmul float %32, %14 %37 = fadd float %35, %36 %38 = fmul float %33, %15 %39 = fadd float %37, %38 %40 = fmul float %34, %16 %41 = fadd float %39, %40 %42 = fmul float %31, %17 %43 = fmul float %32, %18 %44 = fadd float %42, %43 %45 = fmul float %33, %19 %46 = fadd float %44, %45 %47 = fmul float %34, %20 %48 = fadd float %46, %47 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %41, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 exp 15, 32, 0, 0, 0, v2, v3, v1, v1 ; F800020F 01010302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s9, v5 ; 10000A09 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s5, v5 ; 10040A05 v_mac_f32_e32 v0, s8, v4 ; 3E000808 v_mac_f32_e32 v2, s4, v4 ; 3E040804 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v2, s6, v6 ; 3E040C06 v_mac_f32_e32 v0, s11, v7 ; 3E000E0B v_mac_f32_e32 v2, s0, v7 ; 3E040E00 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v0, v2, v1, v3 ; F80008CF 03010200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 %67 = fmul float %49, %29 %68 = fmul float %50, %30 %69 = fadd float %67, %68 %70 = fmul float %51, %31 %71 = fadd float %69, %70 %72 = fmul float %52, %32 %73 = fadd float %71, %72 %74 = fmul float %49, %33 %75 = fmul float %50, %34 %76 = fadd float %74, %75 %77 = fmul float %51, %35 %78 = fadd float %76, %77 %79 = fmul float %52, %36 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %73, float %80, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 s_buffer_load_dword s12, s[0:3], 0x17 ; C2060117 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x7 ; C2078107 s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108 s_buffer_load_dword s17, s[0:3], 0x9 ; C2088109 s_buffer_load_dword s18, s[0:3], 0xa ; C209010A s_buffer_load_dword s19, s[0:3], 0xb ; C209810B s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E s_buffer_load_dword s23, s[0:3], 0x0 ; C20B8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s25, s[0:3], 0x2 ; C20C8102 s_buffer_load_dword s26, s[0:3], 0x3 ; C20D0103 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s17, v6 ; 10000C11 v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mul_f32_e32 v9, s21, v6 ; 10120C15 v_mac_f32_e32 v9, s20, v5 ; 3E120A14 v_mul_f32_e32 v10, s10, v6 ; 10140C0A v_mac_f32_e32 v10, s9, v5 ; 3E140A09 v_mul_f32_e32 v6, s7, v6 ; 100C0C07 v_mac_f32_e32 v6, s6, v5 ; 3E0C0A06 v_mac_f32_e32 v0, s18, v7 ; 3E000E12 v_mac_f32_e32 v9, s22, v7 ; 3E120E16 v_mac_f32_e32 v10, s4, v7 ; 3E140E04 v_mac_f32_e32 v6, s11, v7 ; 3E0C0E0B v_mac_f32_e32 v0, s19, v8 ; 3E001013 v_mac_f32_e32 v9, s8, v8 ; 3E121008 v_mac_f32_e32 v10, s5, v8 ; 3E141005 v_mac_f32_e32 v6, s12, v8 ; 3E0C100C exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s24 ; 7E040218 v_mov_b32_e32 v3, s25 ; 7E060219 v_mov_b32_e32 v4, s26 ; 7E08021A exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s0 ; 7E020200 v_mov_b32_e32 v2, s13 ; 7E04020D v_mov_b32_e32 v3, s14 ; 7E06020E v_mov_b32_e32 v4, s15 ; 7E08020F exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v10, v6, v1, v1 ; F800023F 0101060A v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v9, v1, v2 ; F80008CF 02010900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 6.0000, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].yyyy 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: MAD TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].zzzz 18: F2I TEMP[3].x, TEMP[3].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: DP4 TEMP[3].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[3].xxxx 22: MAD TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].wwww 23: F2I TEMP[3].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: MOV TEMP[3], CONST[ADDR[0].x] 26: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[2].xxxx 27: F2I TEMP[1].x, TEMP[1].xxxx 28: UARL ADDR[0].x, TEMP[1].xxxx 29: MOV TEMP[1], CONST[ADDR[0].x] 30: MOV TEMP[2].xy, TEMP[2].xyxx 31: MOV OUT[1], IN[0] 32: MOV OUT[2], TEMP[3] 33: MOV OUT[3], TEMP[1] 34: MOV OUT[0], TEMP[0] 35: MOV OUT[4], TEMP[2] 36: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = sitofp i32 %10 to float %30 = fmul float %29, 6.000000e+00 %31 = fadd float %30, 0x4000CCCCC0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = fmul float %25, %34 %45 = fmul float %26, %37 %46 = fadd float %44, %45 %47 = fmul float %27, %40 %48 = fadd float %46, %47 %49 = fmul float %28, %43 %50 = fadd float %48, %49 %51 = fmul float %29, 6.000000e+00 %52 = fadd float %51, 0x4008CCCCC0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = fmul float %25, %55 %66 = fmul float %26, %58 %67 = fadd float %65, %66 %68 = fmul float %27, %61 %69 = fadd float %67, %68 %70 = fmul float %28, %64 %71 = fadd float %69, %70 %72 = fmul float %29, 6.000000e+00 %73 = fadd float %72, 0x4010666660000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %25, %76 %87 = fmul float %26, %79 %88 = fadd float %86, %87 %89 = fmul float %27, %82 %90 = fadd float %88, %89 %91 = fmul float %28, %85 %92 = fadd float %90, %91 %93 = fmul float %29, 6.000000e+00 %94 = fadd float %93, 0x4014666660000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %25, %97 %108 = fmul float %26, %100 %109 = fadd float %107, %108 %110 = fmul float %27, %103 %111 = fadd float %109, %110 %112 = fmul float %28, %106 %113 = fadd float %111, %112 %114 = fmul float %29, 6.000000e+00 %115 = fadd float %114, 0x3FB99999A0000000 %116 = fptosi float %115 to i32 %117 = shl i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %117) %119 = shl i32 %116, 4 %120 = or i32 %119, 4 %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %120) %122 = shl i32 %116, 4 %123 = or i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = shl i32 %116, 4 %126 = or i32 %125, 12 %127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %126) %128 = fmul float %29, 6.000000e+00 %129 = fadd float %128, 0x3FF19999A0000000 %130 = fptosi float %129 to i32 %131 = shl i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %131) %133 = shl i32 %130, 4 %134 = or i32 %133, 4 %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %134) %136 = shl i32 %130, 4 %137 = or i32 %136, 8 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = shl i32 %130, 4 %140 = or i32 %139, 12 %141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %118, float %121, float %124, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %135, float %138, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %92, float %113, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v2, 0x40c00000 ; 7E0402FF 40C00000 v_madak_f32_e32 v3, v1, v2, 0x40833333 ; 42060501 40833333 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, v1, v2, 0x40a33333 ; 42080501 40A33333 v_cvt_i32_f32_e32 v4, v4 ; 7E081104 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_madak_f32_e32 v5, v1, v2, 0x40066666 ; 420A0501 40066666 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00 v_madak_f32_e32 v0, v1, v2, 0x40466666 ; 42000501 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_madak_f32_e32 v14, v1, v2, 0x3dcccccd ; 421C0501 3DCCCCCD v_madak_f32_e32 v1, v1, v2, 0x3f8ccccd ; 42020501 3F8CCCCD v_cvt_i32_f32_e32 v2, v14 ; 7E04110E v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_or_b32_e32 v14, 4, v3 ; 381C0684 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v15, 4, v4 ; 381E0884 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 4, v5 ; 38200A84 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_or_b32_e32 v17, 4, v0 ; 38220084 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v3, s[0:3], 0 offen ; E0301000 80001203 v_or_b32_e32 v19, 8, v3 ; 38260688 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_dword v20, v4, s[0:3], 0 offen ; E0301000 80001404 v_or_b32_e32 v21, 8, v4 ; 382A0888 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 v_or_b32_e32 v22, 4, v2 ; 382C0484 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 v_or_b32_e32 v23, 8, v2 ; 382E0488 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_or_b32_e32 v24, 12, v2 ; 3830048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v5, s[0:3], 0 offen ; E0301000 80001905 v_or_b32_e32 v26, 8, v5 ; 38340A88 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 buffer_load_dword v27, v0, s[0:3], 0 offen ; E0301000 80001B00 v_or_b32_e32 v28, 8, v0 ; 38380088 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_or_b32_e32 v29, 4, v1 ; 383A0284 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_or_b32_e32 v30, 8, v1 ; 383C0288 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E buffer_load_dword v31, v1, s[0:3], 0 offen ; E0301000 80001F01 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt ; BF8C077F v_mul_f32_e32 v16, v16, v11 ; 10201710 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v16, v25, v10 ; 3E201519 v_mul_f32_e32 v17, v17, v11 ; 10221711 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v17, v27, v10 ; 3E22151B v_mul_f32_e32 v14, v14, v11 ; 101C170E v_mac_f32_e32 v14, v18, v10 ; 3E1C1512 v_mul_f32_e32 v11, v15, v11 ; 1016170F v_mac_f32_e32 v11, v20, v10 ; 3E161514 v_mac_f32_e32 v16, v26, v12 ; 3E20191A s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v17, v28, v12 ; 3E22191C v_mac_f32_e32 v14, v19, v12 ; 3E1C1913 v_mac_f32_e32 v11, v21, v12 ; 3E161915 v_mac_f32_e32 v16, v5, v13 ; 3E201B05 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v17, v0, v13 ; 3E221B00 v_mac_f32_e32 v14, v3, v13 ; 3E1C1B03 v_mac_f32_e32 v11, v4, v13 ; 3E161B04 exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 exp 15, 33, 0, 0, 0, v2, v22, v23, v24 ; F800021F 18171602 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v31, v29, v30, v1 ; F800022F 011E1D1F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v14, v11, v0, v0 ; F800023F 00000B0E s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v16, v17, v0, v1 ; F80008CF 01001110 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 552 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[2] 5: DP4 TEMP[2].x, IN[0], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], TEMP[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %57, float %64, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xb ; C205010B s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mac_f32_e32 v4, s14, v0 ; 3E08000E v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, s18, v0 ; 3E0A0012 v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v6, s7, v0 ; 3E0C0007 v_mac_f32_e32 v1, s11, v0 ; 3E02000B v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mac_f32_e32 v5, s5, v2 ; 3E0A0405 v_mac_f32_e32 v6, s9, v2 ; 3E0C0409 v_mac_f32_e32 v1, s13, v2 ; 3E02040D v_mac_f32_e32 v4, s17, v3 ; 3E080611 v_mac_f32_e32 v5, s6, v3 ; 3E0A0606 v_mac_f32_e32 v6, s10, v3 ; 3E0C060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 32, 0, 0, 0, v6, v1, v0, v0 ; F800020F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 188 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..4] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: DP4 TEMP[1].x, TEMP[0], CONST[1] 3: DP4 TEMP[2].x, TEMP[0], CONST[2] 4: MOV TEMP[1].y, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[0], CONST[3] 6: MOV TEMP[1].z, TEMP[2].xxxx 7: DP4 TEMP[2].x, TEMP[0], CONST[4] 8: MOV TEMP[1].w, TEMP[2].xxxx 9: ADD TEMP[0].x, TEMP[0].wwww, CONST[0].wwww 10: MAD TEMP[0], CONST[0], TEMP[0].xxxx, TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = bitcast float %48 to i32 %51 = bitcast float %49 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %45, <16 x i8> %47, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, %28 %60 = fmul float %56, %29 %61 = fadd float %59, %60 %62 = fmul float %57, %30 %63 = fadd float %61, %62 %64 = fmul float %58, %31 %65 = fadd float %63, %64 %66 = fmul float %55, %32 %67 = fmul float %56, %33 %68 = fadd float %66, %67 %69 = fmul float %57, %34 %70 = fadd float %68, %69 %71 = fmul float %58, %35 %72 = fadd float %70, %71 %73 = fmul float %55, %36 %74 = fmul float %56, %37 %75 = fadd float %73, %74 %76 = fmul float %57, %38 %77 = fadd float %75, %76 %78 = fmul float %58, %39 %79 = fadd float %77, %78 %80 = fmul float %55, %40 %81 = fmul float %56, %41 %82 = fadd float %80, %81 %83 = fmul float %57, %42 %84 = fadd float %82, %83 %85 = fmul float %58, %43 %86 = fadd float %84, %85 %87 = fadd float %58, %27 %88 = fmul float %24, %87 %89 = fadd float %88, %65 %90 = fmul float %25, %87 %91 = fadd float %90, %72 %92 = fmul float %26, %87 %93 = fadd float %92, %79 %94 = fmul float %27, %87 %95 = fadd float %94, %86 %96 = call i32 @llvm.SI.packf16(float %89, float %91) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %93, float %95) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230002 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s9, v1 ; 10080209 v_mac_f32_e32 v4, s8, v0 ; 3E080008 v_mac_f32_e32 v4, s10, v2 ; 3E08040A v_mac_f32_e32 v4, s11, v3 ; 3E08060B v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mac_f32_e32 v5, s12, v0 ; 3E0A000C v_mac_f32_e32 v5, s14, v2 ; 3E0A040E v_mac_f32_e32 v5, s15, v3 ; 3E0A060F v_mul_f32_e32 v6, s17, v1 ; 100C0211 v_mac_f32_e32 v6, s16, v0 ; 3E0C0010 v_mac_f32_e32 v6, s18, v2 ; 3E0C0412 v_mac_f32_e32 v6, s19, v3 ; 3E0C0613 v_mul_f32_e32 v1, s21, v1 ; 10020215 v_mac_f32_e32 v1, s20, v0 ; 3E020014 v_mac_f32_e32 v1, s22, v2 ; 3E020416 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_add_f32_e32 v0, s7, v3 ; 06000607 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_mac_f32_e32 v5, s5, v0 ; 3E0A0005 v_mac_f32_e32 v6, s6, v0 ; 3E0C0006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v1 ; 5E020306 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[2], IN[1] 13: MOV OUT[1], IN[0] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = sitofp i32 %10 to float %38 = fmul float %37, 2.000000e+00 %39 = fadd float %38, 0x3FB99999A0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %50) %52 = fmul float %33, %42 %53 = fmul float %34, %45 %54 = fadd float %52, %53 %55 = fmul float %35, %48 %56 = fadd float %54, %55 %57 = fmul float %36, %51 %58 = fadd float %56, %57 %59 = fmul float %37, 2.000000e+00 %60 = fadd float %59, 0x3FF19999A0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %71) %73 = fmul float %33, %63 %74 = fmul float %34, %66 %75 = fadd float %73, %74 %76 = fmul float %35, %69 %77 = fadd float %75, %76 %78 = fmul float %36, %72 %79 = fadd float %77, %78 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 v_madak_f32_e32 v2, 2.0, v1, 0x3dcccccd ; 420402F4 3DCCCCCD v_cvt_i32_f32_e32 v2, v2 ; 7E041102 v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_or_b32_e32 v3, 4, v2 ; 38060484 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 buffer_load_dword v0, v3, s[0:3], 0 offen ; E0301000 80000003 v_or_b32_e32 v3, 8, v2 ; 38060488 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 v_madak_f32_e32 v1, 2.0, v1, 0x3f8ccccd ; 420202F4 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 buffer_load_dword v16, v2, s[0:3], 0 offen ; E0301000 80001002 v_or_b32_e32 v2, 12, v2 ; 3804048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v17, 4, v1 ; 38220284 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v1, s[0:3], 0 offen ; E0301000 80001201 v_or_b32_e32 v19, 8, v1 ; 38260288 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v0, v0, v13 ; 10001B00 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v0, v16, v12 ; 3E001910 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v13, v18, v12 ; 3E1A1912 v_mac_f32_e32 v0, v3, v14 ; 3E001D03 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v13, v19, v14 ; 3E1A1D13 v_mac_f32_e32 v0, v2, v15 ; 3E001F02 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v13, v1, v15 ; 3E1A1F01 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v13, v2, v1 ; F80008CF 01020D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 264 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s9, s[0:3], 0x1a ; C204811A s_buffer_load_dword s10, s[0:3], 0x1b ; C205011B s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0x0 ; C2098100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s21, s[0:3], 0x2 ; C20A8102 s_buffer_load_dword s22, s[0:3], 0x3 ; C20B0103 s_buffer_load_dword s23, s[0:3], 0x4 ; C20B8104 s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F s_buffer_load_dword s25, s[0:3], 0x10 ; C20C8110 s_buffer_load_dword s26, s[0:3], 0x11 ; C20D0111 s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 s_buffer_load_dword s28, s[0:3], 0x13 ; C20E0113 s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s31, s[0:3], 0x16 ; C20F8116 s_buffer_load_dword s32, s[0:3], 0x17 ; C2100117 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x1e ; C211011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v6 ; 10000C0C v_mac_f32_e32 v0, s7, v5 ; 3E000A07 v_mul_f32_e32 v9, s17, v6 ; 10120C11 v_mac_f32_e32 v9, s15, v5 ; 3E120A0F v_mul_f32_e32 v10, s26, v6 ; 10140C1A v_mac_f32_e32 v10, s25, v5 ; 3E140A19 v_mul_f32_e32 v11, s30, v6 ; 10160C1E v_mac_f32_e32 v11, s29, v5 ; 3E160A1D v_mul_f32_e32 v12, s8, v6 ; 10180C08 v_mac_f32_e32 v12, s33, v5 ; 3E180A21 v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mac_f32_e32 v6, s11, v5 ; 3E0C0A0B v_mac_f32_e32 v0, s13, v7 ; 3E000E0D v_mac_f32_e32 v9, s18, v7 ; 3E120E12 v_mac_f32_e32 v10, s27, v7 ; 3E140E1B v_mac_f32_e32 v11, s31, v7 ; 3E160E1F v_mac_f32_e32 v12, s9, v7 ; 3E180E09 v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v0, s14, v8 ; 3E00100E v_mac_f32_e32 v9, s24, v8 ; 3E121018 v_mac_f32_e32 v10, s28, v8 ; 3E14101C v_mac_f32_e32 v11, s32, v8 ; 3E161020 v_mac_f32_e32 v12, s10, v8 ; 3E18100A v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s19 ; 7E020213 v_mov_b32_e32 v2, s20 ; 7E040214 v_mov_b32_e32 v3, s21 ; 7E060215 v_mov_b32_e32 v4, s22 ; 7E080216 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s4 ; 7E040204 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s6 ; 7E080206 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v12, v6, v1, v1 ; F800023F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[0], CONST[0] 1: DP4 TEMP[1].x, IN[0], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[0], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[0], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float %57, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_mul_f32_e32 v5, s9, v1 ; 100A0209 v_mac_f32_e32 v5, s8, v0 ; 3E0A0008 v_mul_f32_e32 v6, s13, v1 ; 100C020D v_mac_f32_e32 v6, s12, v0 ; 3E0C000C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s17, v0 ; 3E020011 v_mac_f32_e32 v4, s6, v2 ; 3E080406 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s15, v2 ; 3E0C040F v_mac_f32_e32 v1, s18, v2 ; 3E020412 v_mac_f32_e32 v4, s7, v3 ; 3E080607 v_mac_f32_e32 v5, s11, v3 ; 3E0A060B v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 v_mac_f32_e32 v1, s0, v3 ; 3E020600 exp 15, 12, 0, 1, 0, v4, v5, v6, v1 ; F80008CF 01060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[3], CONST[0] 9: MOV OUT[1], IN[0] 10: MOV OUT[4], CONST[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s13, v10 ; 1000140D v_mac_f32_e32 v0, s12, v9 ; 3E00120C v_mul_f32_e32 v13, s17, v10 ; 101A1411 v_mac_f32_e32 v13, s16, v9 ; 3E1A1210 v_mul_f32_e32 v14, s21, v10 ; 101C1415 v_mac_f32_e32 v14, s20, v9 ; 3E1C1214 v_mul_f32_e32 v10, s23, v10 ; 10141417 v_mac_f32_e32 v10, s22, v9 ; 3E141216 v_mac_f32_e32 v0, s14, v11 ; 3E00160E v_mac_f32_e32 v13, s18, v11 ; 3E1A1612 v_mac_f32_e32 v14, s24, v11 ; 3E1C1618 v_mac_f32_e32 v10, s26, v11 ; 3E14161A v_mac_f32_e32 v0, s15, v12 ; 3E00180F v_mac_f32_e32 v13, s19, v12 ; 3E1A1813 v_mac_f32_e32 v14, s25, v12 ; 3E1C1819 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s4 ; 7E020204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v4, s7 ; 7E080207 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v4, s11 ; 7E08020B exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 exp 15, 12, 0, 1, 0, v0, v13, v14, v10 ; F80008CF 0A0E0D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 300 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MAD TEMP[1], IN[0], CONST[1], CONST[0] 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV OUT[1], TEMP[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[2], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, %21 %60 = fmul float %56, %22 %61 = fadd float %59, %60 %62 = fmul float %57, %23 %63 = fadd float %61, %62 %64 = fmul float %58, %24 %65 = fadd float %63, %64 %66 = fmul float %55, %25 %67 = fmul float %56, %26 %68 = fadd float %66, %67 %69 = fmul float %57, %27 %70 = fadd float %68, %69 %71 = fmul float %58, %28 %72 = fadd float %70, %71 %73 = fmul float %55, %29 %74 = fmul float %56, %30 %75 = fadd float %73, %74 %76 = fmul float %57, %31 %77 = fadd float %75, %76 %78 = fmul float %58, %32 %79 = fadd float %77, %78 %80 = fmul float %55, %33 %81 = fmul float %56, %34 %82 = fadd float %80, %81 %83 = fmul float %57, %35 %84 = fadd float %82, %83 %85 = fmul float %58, %36 %86 = fadd float %84, %85 %87 = fmul float %41, %17 %88 = fadd float %87, %13 %89 = fmul float %42, %18 %90 = fadd float %89, %14 %91 = fmul float %43, %19 %92 = fadd float %91, %15 %93 = fmul float %44, %20 %94 = fadd float %93, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %90, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %72, float %79, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 v_mov_b32_e32 v12, s17 ; 7E180211 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v13, s4 ; 7E1A0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mov_b32_e32 v14, s5 ; 7E1C0205 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F v_mac_f32_e32 v0, s6, v2 ; 3E000406 v_mac_f32_e32 v12, s7, v3 ; 3E180607 v_mac_f32_e32 v13, s8, v4 ; 3E1A0808 v_mac_f32_e32 v14, s9, v5 ; 3E1C0A09 exp 15, 32, 0, 0, 0, v0, v12, v13, v14 ; F800020F 0E0D0C00 s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110 s_buffer_load_dword s7, s[0:3], 0x11 ; C2038111 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s11, v9 ; 1000120B v_mac_f32_e32 v0, s10, v8 ; 3E00100A v_mul_f32_e32 v2, s5, v9 ; 10041205 v_mac_f32_e32 v2, s4, v8 ; 3E041004 v_mul_f32_e32 v3, s7, v9 ; 10061207 v_mac_f32_e32 v3, s6, v8 ; 3E061006 v_mul_f32_e32 v4, s9, v9 ; 10081209 v_mac_f32_e32 v4, s8, v8 ; 3E081008 v_mac_f32_e32 v0, s12, v10 ; 3E00140C v_mac_f32_e32 v2, s14, v10 ; 3E04140E v_mac_f32_e32 v3, s16, v10 ; 3E061410 v_mac_f32_e32 v4, s18, v10 ; 3E081412 v_mac_f32_e32 v0, s13, v11 ; 3E00160D v_mac_f32_e32 v2, s15, v11 ; 3E04160F v_mac_f32_e32 v3, s17, v11 ; 3E061611 v_mac_f32_e32 v4, s0, v11 ; 3E081600 exp 15, 33, 0, 0, 0, v6, v7, v1, v1 ; F800021F 01010706 exp 15, 12, 0, 1, 0, v0, v2, v3, v4 ; F80008CF 04030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v6 ; 10000C0E v_mac_f32_e32 v0, s11, v5 ; 3E000A0B v_mul_f32_e32 v9, s18, v6 ; 10120C12 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mul_f32_e32 v10, s22, v6 ; 10140C16 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mul_f32_e32 v11, s26, v6 ; 10160C1A v_mac_f32_e32 v11, s25, v5 ; 3E160A19 v_mul_f32_e32 v12, s4, v6 ; 10180C04 v_mac_f32_e32 v12, s13, v5 ; 3E180A0D v_mul_f32_e32 v6, s8, v6 ; 100C0C08 v_mac_f32_e32 v6, s7, v5 ; 3E0C0A07 v_mac_f32_e32 v0, s15, v7 ; 3E000E0F v_mac_f32_e32 v9, s19, v7 ; 3E120E13 v_mac_f32_e32 v10, s23, v7 ; 3E140E17 v_mac_f32_e32 v11, s0, v7 ; 3E160E00 v_mac_f32_e32 v12, s5, v7 ; 3E180E05 v_mac_f32_e32 v6, s9, v7 ; 3E0C0E09 v_mac_f32_e32 v0, s16, v8 ; 3E001010 v_mac_f32_e32 v9, s20, v8 ; 3E121014 v_mac_f32_e32 v10, s24, v8 ; 3E141018 v_mac_f32_e32 v11, s12, v8 ; 3E16100C v_mac_f32_e32 v12, s6, v8 ; 3E181006 v_mac_f32_e32 v6, s10, v8 ; 3E0C100A exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v12, v6, v1, v1 ; F800021F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s9, s[0:3], 0x1a ; C204811A s_buffer_load_dword s10, s[0:3], 0x1b ; C205011B s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0x0 ; C2098100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s21, s[0:3], 0x2 ; C20A8102 s_buffer_load_dword s22, s[0:3], 0x3 ; C20B0103 s_buffer_load_dword s23, s[0:3], 0x4 ; C20B8104 s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F s_buffer_load_dword s25, s[0:3], 0x10 ; C20C8110 s_buffer_load_dword s26, s[0:3], 0x11 ; C20D0111 s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 s_buffer_load_dword s28, s[0:3], 0x13 ; C20E0113 s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s31, s[0:3], 0x16 ; C20F8116 s_buffer_load_dword s32, s[0:3], 0x17 ; C2100117 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x1e ; C211011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v6 ; 10000C0C v_mac_f32_e32 v0, s7, v5 ; 3E000A07 v_mul_f32_e32 v9, s17, v6 ; 10120C11 v_mac_f32_e32 v9, s15, v5 ; 3E120A0F v_mul_f32_e32 v10, s26, v6 ; 10140C1A v_mac_f32_e32 v10, s25, v5 ; 3E140A19 v_mul_f32_e32 v11, s30, v6 ; 10160C1E v_mac_f32_e32 v11, s29, v5 ; 3E160A1D v_mul_f32_e32 v12, s8, v6 ; 10180C08 v_mac_f32_e32 v12, s33, v5 ; 3E180A21 v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mac_f32_e32 v6, s11, v5 ; 3E0C0A0B v_mac_f32_e32 v0, s13, v7 ; 3E000E0D v_mac_f32_e32 v9, s18, v7 ; 3E120E12 v_mac_f32_e32 v10, s27, v7 ; 3E140E1B v_mac_f32_e32 v11, s31, v7 ; 3E160E1F v_mac_f32_e32 v12, s9, v7 ; 3E180E09 v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v0, s14, v8 ; 3E00100E v_mac_f32_e32 v9, s24, v8 ; 3E121018 v_mac_f32_e32 v10, s28, v8 ; 3E14101C v_mac_f32_e32 v11, s32, v8 ; 3E161020 v_mac_f32_e32 v12, s10, v8 ; 3E18100A v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s19 ; 7E020213 v_mov_b32_e32 v2, s20 ; 7E040214 v_mov_b32_e32 v3, s21 ; 7E060215 v_mov_b32_e32 v4, s22 ; 7E080216 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s4 ; 7E040204 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s6 ; 7E080206 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v12, v6, v1, v1 ; F800023F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[3].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[3].xyyy 9: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MAD TEMP[1].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[1].xyzx 13: MOV TEMP[0].w, IMM[1].zzzz 14: MAD TEMP[0], TEMP[0], IN[2], IN[1] 15: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 16: MOV TEMP[0].w, TEMP[1].xxxx 17: MOV OUT[0], TEMP[0] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %30 = bitcast <4 x i32> addrspace(2)* %29 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %47 to i32 %50 = bitcast float %48 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %34, <16 x i8> %37, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = fadd float %54, 0xBFE0101020000000 %56 = bitcast float %47 to i32 %57 = bitcast float %48 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %23, <16 x i8> %25, i32 2) %61 = extractelement <4 x float> %60, i32 0 %62 = fadd float %61, 0xBFB0101020000000 %63 = fmul float %62, 0x3FF29FBE80000000 %64 = fmul float %55, 0x3FF9893740000000 %65 = fadd float %64, %63 %66 = fmul float %55, 0xBFEA0418A0000000 %67 = fadd float %66, %63 %68 = fmul float %55, 0.000000e+00 %69 = fadd float %68, %63 %70 = bitcast float %47 to i32 %71 = bitcast float %48 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %28, <16 x i8> %31, i32 2) %75 = extractelement <4 x float> %74, i32 0 %76 = fadd float %75, 0xBFE0101020000000 %77 = fmul float %76, 0.000000e+00 %78 = fadd float %77, %65 %79 = fmul float %76, 0xBFD9168720000000 %80 = fadd float %79, %67 %81 = fmul float %76, 0x400022D0E0000000 %82 = fadd float %81, %69 %83 = fmul float %78, %43 %84 = fadd float %83, %39 %85 = fmul float %80, %44 %86 = fadd float %85, %40 %87 = fmul float %82, %45 %88 = fadd float %87, %41 %89 = fadd float %46, %42 %90 = fmul float %89, %38 %91 = call i32 @llvm.SI.packf16(float %84, float %86) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %88, float %90) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %92, float %94, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064000B image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[8:11] ; F0800100 0046010B image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[32:39], s[0:3] ; F0800100 00080B0B v_mov_b32_e32 v12, 0xbd808081 ; 7E1802FF BD808081 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v1, v1, v12 ; 06021901 v_mov_b32_e32 v12, 0xbf008081 ; 7E1802FF BF008081 v_add_f32_e32 v0, v12, v0 ; 0600010C v_mul_f32_e32 v1, 0x3f94fdf4, v1 ; 100202FF 3F94FDF4 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v11, v12, v11 ; 0616170C v_madmk_f32_e32 v12, v0, v1, 0x3fcc49ba ; 40180300 3FCC49BA v_mac_f32_e32 v12, 0, v11 ; 3E181680 v_mac_f32_e32 v3, v7, v12 ; 3E061907 v_madmk_f32_e32 v7, v0, v1, 0xbf5020c5 ; 400E0300 BF5020C5 v_madmk_f32_e32 v7, v11, v7, 0xbec8b439 ; 400E0F0B BEC8B439 v_mac_f32_e32 v4, v8, v7 ; 3E080F08 v_mac_f32_e32 v1, 0, v0 ; 3E020080 v_madmk_f32_e32 v0, v11, v1, 0x40011687 ; 4000030B 40011687 v_mac_f32_e32 v5, v9, v0 ; 3E0A0109 v_add_f32_e32 v0, v6, v10 ; 06001506 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[0] 1: DP4 TEMP[1].x, IN[2], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s5, v10 ; 10001405 v_mac_f32_e32 v0, s4, v9 ; 3E001204 v_mul_f32_e32 v13, s9, v10 ; 101A1409 v_mac_f32_e32 v13, s8, v9 ; 3E1A1208 v_mul_f32_e32 v14, s13, v10 ; 101C140D v_mac_f32_e32 v14, s12, v9 ; 3E1C120C v_mul_f32_e32 v10, s14, v10 ; 1014140E v_mac_f32_e32 v10, s17, v9 ; 3E141211 v_mac_f32_e32 v0, s6, v11 ; 3E001606 v_mac_f32_e32 v13, s10, v11 ; 3E1A160A v_mac_f32_e32 v14, s15, v11 ; 3E1C160F v_mac_f32_e32 v10, s18, v11 ; 3E141612 v_mac_f32_e32 v0, s7, v12 ; 3E001807 v_mac_f32_e32 v13, s11, v12 ; 3E1A180B v_mac_f32_e32 v14, s16, v12 ; 3E1C1810 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 exp 15, 12, 0, 1, 0, v0, v13, v14, v10 ; F80008CF 0A0E0D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 212 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v6 ; 10000C0E v_mac_f32_e32 v0, s11, v5 ; 3E000A0B v_mul_f32_e32 v9, s18, v6 ; 10120C12 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mul_f32_e32 v10, s22, v6 ; 10140C16 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mul_f32_e32 v11, s26, v6 ; 10160C1A v_mac_f32_e32 v11, s25, v5 ; 3E160A19 v_mul_f32_e32 v12, s4, v6 ; 10180C04 v_mac_f32_e32 v12, s13, v5 ; 3E180A0D v_mul_f32_e32 v6, s8, v6 ; 100C0C08 v_mac_f32_e32 v6, s7, v5 ; 3E0C0A07 v_mac_f32_e32 v0, s15, v7 ; 3E000E0F v_mac_f32_e32 v9, s19, v7 ; 3E120E13 v_mac_f32_e32 v10, s23, v7 ; 3E140E17 v_mac_f32_e32 v11, s0, v7 ; 3E160E00 v_mac_f32_e32 v12, s5, v7 ; 3E180E05 v_mac_f32_e32 v6, s9, v7 ; 3E0C0E09 v_mac_f32_e32 v0, s16, v8 ; 3E001010 v_mac_f32_e32 v9, s20, v8 ; 3E121014 v_mac_f32_e32 v10, s24, v8 ; 3E141018 v_mac_f32_e32 v11, s12, v8 ; 3E16100C v_mac_f32_e32 v12, s6, v8 ; 3E181006 v_mac_f32_e32 v6, s10, v8 ; 3E0C100A exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v12, v6, v1, v1 ; F800021F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[1].xyyy 9: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MAD TEMP[0].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[0].xyzx 13: MOV TEMP[0].w, IN[0].wwww 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %30 = bitcast <4 x i32> addrspace(2)* %29 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = bitcast float %39 to i32 %42 = bitcast float %40 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %34, <16 x i8> %37, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = fadd float %46, 0xBFE0101020000000 %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %23, <16 x i8> %25, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = fadd float %53, 0xBFB0101020000000 %55 = fmul float %54, 0x3FF29FBE80000000 %56 = fmul float %47, 0x3FF9893740000000 %57 = fadd float %56, %55 %58 = fmul float %47, 0xBFEA0418A0000000 %59 = fadd float %58, %55 %60 = fmul float %47, 0.000000e+00 %61 = fadd float %60, %55 %62 = bitcast float %39 to i32 %63 = bitcast float %40 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %28, <16 x i8> %31, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = fadd float %67, 0xBFE0101020000000 %69 = fmul float %68, 0.000000e+00 %70 = fadd float %69, %57 %71 = fmul float %68, 0xBFD9168720000000 %72 = fadd float %71, %59 %73 = fmul float %68, 0x400022D0E0000000 %74 = fadd float %73, %61 %75 = call i32 @llvm.SI.packf16(float %70, float %72) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float %74, float %38) %78 = bitcast i32 %77 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640003 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[0:3] ; F0800100 00060103 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[32:39], s[8:11] ; F0800100 00480303 v_mov_b32_e32 v4, 0xbf008081 ; 7E0802FF BF008081 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v4, v0 ; 06000104 v_mov_b32_e32 v5, 0xbd808081 ; 7E0A02FF BD808081 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v1, v1, v5 ; 06020B01 v_mul_f32_e32 v1, 0x3f94fdf4, v1 ; 100202FF 3F94FDF4 v_madmk_f32_e32 v5, v0, v1, 0x3fcc49ba ; 400A0300 3FCC49BA v_madmk_f32_e32 v6, v0, v1, 0xbf5020c5 ; 400C0300 BF5020C5 v_mac_f32_e32 v1, 0, v0 ; 3E020080 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v4, v3 ; 06000704 v_mac_f32_e32 v5, 0, v0 ; 3E0A0080 v_madmk_f32_e32 v3, v0, v6, 0xbec8b439 ; 40060D00 BEC8B439 v_madmk_f32_e32 v0, v0, v1, 0x40011687 ; 40000300 40011687 v_cvt_pkrtz_f16_f32_e32 v1, v5, v3 ; 5E020705 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 8 Code Size: 192 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[3], CONST[0] 6: MOV OUT[1], IN[0] 7: MOV OUT[4], CONST[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xb ; C205010B s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s7, v10 ; 10001407 v_mac_f32_e32 v0, s6, v9 ; 3E001206 v_mul_f32_e32 v10, s12, v10 ; 1014140C v_mac_f32_e32 v10, s11, v9 ; 3E14120B v_mac_f32_e32 v0, s9, v11 ; 3E001609 v_mac_f32_e32 v10, s13, v11 ; 3E14160D v_mac_f32_e32 v0, s10, v12 ; 3E00180A v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s14 ; 7E02020E v_mov_b32_e32 v2, s15 ; 7E04020F v_mov_b32_e32 v3, s16 ; 7E060210 v_mov_b32_e32 v4, s17 ; 7E080211 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s18 ; 7E020212 v_mov_b32_e32 v2, s8 ; 7E040208 v_mov_b32_e32 v3, s4 ; 7E060204 v_mov_b32_e32 v4, s5 ; 7E080205 exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v10, v2, v1 ; F80008CF 01020A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 244 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1530.0599, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 255.0100} IMM[2] FLT32 { 6.0000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MUL TEMP[1].x, IN[0].zzzz, IMM[1].wwww 23: MAD TEMP[3].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy 24: F2I TEMP[3].x, TEMP[3].xxxx 25: UARL ADDR[0].x, TEMP[3].xxxx 26: MOV TEMP[3], CONST[ADDR[0].x] 27: MAD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].zzzz 28: F2I TEMP[1].x, TEMP[1].xxxx 29: UARL ADDR[0].x, TEMP[1].xxxx 30: MOV TEMP[1], CONST[ADDR[0].x] 31: MOV TEMP[2].xy, TEMP[2].xyxx 32: MOV OUT[1], IN[0] 33: MOV OUT[2], TEMP[3] 34: MOV OUT[3], TEMP[1] 35: MOV OUT[0], TEMP[0] 36: MOV OUT[4], TEMP[2] 37: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %19, 0x4097E83D60000000 %30 = fadd float %29, 0x4000CCCCC0000000 %31 = fptosi float %30 to i32 %32 = shl i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %31, 4 %35 = or i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %31, 4 %38 = or i32 %37, 8 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %31, 4 %41 = or i32 %40, 12 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %25, %33 %44 = fmul float %26, %36 %45 = fadd float %43, %44 %46 = fmul float %27, %39 %47 = fadd float %45, %46 %48 = fmul float %28, %42 %49 = fadd float %47, %48 %50 = fadd float %29, 0x4008CCCCC0000000 %51 = fptosi float %50 to i32 %52 = shl i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %51, 4 %55 = or i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %51, 4 %58 = or i32 %57, 8 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %51, 4 %61 = or i32 %60, 12 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = fmul float %25, %53 %64 = fmul float %26, %56 %65 = fadd float %63, %64 %66 = fmul float %27, %59 %67 = fadd float %65, %66 %68 = fmul float %28, %62 %69 = fadd float %67, %68 %70 = fadd float %29, 0x4010666660000000 %71 = fptosi float %70 to i32 %72 = shl i32 %71, 4 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %71, 4 %75 = or i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %71, 4 %78 = or i32 %77, 8 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %71, 4 %81 = or i32 %80, 12 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = fmul float %25, %73 %84 = fmul float %26, %76 %85 = fadd float %83, %84 %86 = fmul float %27, %79 %87 = fadd float %85, %86 %88 = fmul float %28, %82 %89 = fadd float %87, %88 %90 = fadd float %29, 0x4014666660000000 %91 = fptosi float %90 to i32 %92 = shl i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %91, 4 %95 = or i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = shl i32 %91, 4 %98 = or i32 %97, 8 %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %98) %100 = shl i32 %91, 4 %101 = or i32 %100, 12 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %25, %93 %104 = fmul float %26, %96 %105 = fadd float %103, %104 %106 = fmul float %27, %99 %107 = fadd float %105, %106 %108 = fmul float %28, %102 %109 = fadd float %107, %108 %110 = fmul float %19, 0x406FE051E0000000 %111 = fmul float %110, 6.000000e+00 %112 = fadd float %111, 0x3FB99999A0000000 %113 = fptosi float %112 to i32 %114 = shl i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = shl i32 %113, 4 %117 = or i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %117) %119 = shl i32 %113, 4 %120 = or i32 %119, 8 %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %120) %122 = shl i32 %113, 4 %123 = or i32 %122, 12 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fmul float %110, 6.000000e+00 %126 = fadd float %125, 0x3FF19999A0000000 %127 = fptosi float %126 to i32 %128 = shl i32 %127, 4 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = shl i32 %127, 4 %131 = or i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %131) %133 = shl i32 %127, 4 %134 = or i32 %133, 8 %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %134) %136 = shl i32 %127, 4 %137 = or i32 %136, 12 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %115, float %118, float %121, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %132, float %135, float %138) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %89, float %109, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %49, float %69, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 0x44bf41eb ; 7E0202FF 44BF41EB v_mov_b32_e32 v2, 0x40c00000 ; 7E0402FF 40C00000 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[3:6], v0, s[4:7], 0 idxen ; E00C2000 80010300 buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v5, v1, 0x40833333 ; 42000305 40833333 v_madak_f32_e32 v11, v5, v1, 0x40a33333 ; 42160305 40A33333 v_mul_f32_e32 v12, 0x437f028f, v5 ; 10180AFF 437F028F v_madak_f32_e32 v13, v12, v2, 0x3dcccccd ; 421A050C 3DCCCCCD v_madak_f32_e32 v2, v12, v2, 0x3f8ccccd ; 4204050C 3F8CCCCD v_madak_f32_e32 v12, v5, v1, 0x40066666 ; 42180305 40066666 v_madak_f32_e32 v1, v5, v1, 0x40466666 ; 42020305 40466666 v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_cvt_i32_f32_e32 v2, v2 ; 7E041102 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 buffer_load_dword v14, v12, s[0:3], 0 offen ; E0301000 80000E0C v_or_b32_e32 v15, 4, v12 ; 381E1884 v_or_b32_e32 v16, 8, v12 ; 38201888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 v_or_b32_e32 v18, 4, v1 ; 38240284 v_or_b32_e32 v19, 8, v1 ; 38260288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v20, v0, s[0:3], 0 offen ; E0301000 80001400 v_or_b32_e32 v21, 4, v0 ; 382A0084 v_or_b32_e32 v22, 8, v0 ; 382C0088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v23, v11, s[0:3], 0 offen ; E0301000 8000170B v_or_b32_e32 v24, 4, v11 ; 38301684 v_or_b32_e32 v25, 8, v11 ; 38321688 v_or_b32_e32 v11, 12, v11 ; 3816168C v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v26, v13, s[0:3], 0 offen ; E0301000 80001A0D v_or_b32_e32 v27, 4, v13 ; 38361A84 v_or_b32_e32 v28, 8, v13 ; 38381A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v29, v2, s[0:3], 0 offen ; E0301000 80001D02 v_or_b32_e32 v30, 4, v2 ; 383C0484 v_or_b32_e32 v31, 8, v2 ; 383E0488 v_or_b32_e32 v2, 12, v2 ; 3804048C buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt ; BF8C077F v_mul_f32_e32 v15, v15, v8 ; 101E110F v_mac_f32_e32 v15, v14, v7 ; 3E1E0F0E v_mul_f32_e32 v14, v18, v8 ; 101C1112 v_mac_f32_e32 v14, v17, v7 ; 3E1C0F11 s_waitcnt vmcnt(13) ; BF8C077D v_mul_f32_e32 v17, v21, v8 ; 10221115 v_mac_f32_e32 v17, v20, v7 ; 3E220F14 s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v8, v24, v8 ; 10101118 v_mac_f32_e32 v8, v23, v7 ; 3E100F17 v_mac_f32_e32 v15, v16, v9 ; 3E1E1310 v_mac_f32_e32 v14, v19, v9 ; 3E1C1313 v_mac_f32_e32 v17, v22, v9 ; 3E221316 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v8, v25, v9 ; 3E101319 v_mac_f32_e32 v15, v12, v10 ; 3E1E150C v_mac_f32_e32 v14, v1, v10 ; 3E1C1501 v_mac_f32_e32 v17, v0, v10 ; 3E221500 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v8, v11, v10 ; 3E10150B exp 15, 32, 0, 0, 0, v3, v4, v5, v6 ; F800020F 06050403 s_waitcnt vmcnt(3) ; BF8C0773 exp 15, 33, 0, 0, 0, v26, v27, v28, v13 ; F800021F 0D1C1B1A s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v29, v30, v31, v2 ; F800022F 021F1E1D v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v17, v8, v0, v0 ; F800023F 00000811 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v15, v14, v0, v1 ; F80008CF 01000E0F s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 568 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[2].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV TEMP[1].xy, IN[0].xyxx 13: MOV OUT[0], TEMP[0] 14: MOV OUT[1], TEMP[1] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = fmul float %31, 2.000000e+00 %33 = fadd float %32, 0x3FB99999A0000000 %34 = fptosi float %33 to i32 %35 = shl i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %34, 4 %38 = or i32 %37, 4 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %34, 4 %41 = or i32 %40, 8 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = shl i32 %34, 4 %44 = or i32 %43, 12 %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %44) %46 = fmul float %23, %36 %47 = fmul float %24, %39 %48 = fadd float %46, %47 %49 = fmul float %25, %42 %50 = fadd float %48, %49 %51 = fmul float %26, %45 %52 = fadd float %50, %51 %53 = fadd float %32, 0x3FF19999A0000000 %54 = fptosi float %53 to i32 %55 = shl i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %54, 4 %58 = or i32 %57, 4 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %54, 4 %61 = or i32 %60, 8 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = shl i32 %54, 4 %64 = or i32 %63, 12 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %23, %56 %67 = fmul float %24, %59 %68 = fadd float %66, %67 %69 = fmul float %25, %62 %70 = fadd float %68, %69 %71 = fmul float %26, %65 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %72, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v0, s[12:15], 0 idxen ; E00C2000 80030300 buffer_load_format_xyzw v[7:10], v0, s[8:11], 0 idxen ; E00C2000 80020700 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v7, 0x3dcccccd ; 42000EF4 3DCCCCCD v_madak_f32_e32 v7, 2.0, v7, 0x3f8ccccd ; 420E0EF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v7, v7 ; 7E0E1107 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v7, 4, v7 ; 340E0E84 buffer_load_dword v8, v0, s[0:3], 0 offen ; E0301000 80000800 v_or_b32_e32 v9, 4, v0 ; 38120084 v_or_b32_e32 v10, 8, v0 ; 38140088 v_or_b32_e32 v0, 12, v0 ; 3800008C v_or_b32_e32 v11, 4, v7 ; 38160E84 buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v12, v7, s[0:3], 0 offen ; E0301000 80000C07 v_or_b32_e32 v13, 8, v7 ; 381A0E88 v_or_b32_e32 v7, 12, v7 ; 380E0E8C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v9, v9, v4 ; 10120909 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mac_f32_e32 v9, v8, v3 ; 3E120708 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v4, v12, v3 ; 3E08070C s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v9, v10, v5 ; 3E120B0A s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v4, v13, v5 ; 3E080B0D s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v9, v0, v6 ; 3E120D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, v7, v6 ; 3E080D07 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 32, 0, 0, 0, v1, v2, v0, v0 ; F800020F 00000201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v9, v4, v0, v1 ; F80008CF 01000409 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[1].xyzx 2: MOV TEMP[1].xy, IN[2].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[0], TEMP[1], TEMP[0] 5: MUL TEMP[0], TEMP[0], IN[1].wwww 6: MAD TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %23, <16 x i8> %25, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %30 %46 = fmul float %42, %31 %47 = fmul float %43, %32 %48 = fmul float %45, %33 %49 = fmul float %46, %33 %50 = fmul float %47, %33 %51 = fmul float %44, %33 %52 = fmul float %26, %51 %53 = fadd float %52, %48 %54 = fmul float %27, %51 %55 = fadd float %54, %49 %56 = fmul float %28, %51 %57 = fadd float %56, %50 %58 = fmul float %29, %51 %59 = fadd float %58, %51 %60 = call i32 @llvm.SI.packf16(float %53, float %55) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %57, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[4:11], s[0:3] ; F0800F00 00010A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_mul_f32_e32 v1, v7, v11 ; 10021707 v_mul_f32_e32 v6, v8, v12 ; 100C1908 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..15], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 2.0000, -2.0000, 1.0000, 0.0774} IMM[3] FLT32 { 0.9479, 0.0521, 2.4000, 0.0404} IMM[4] UINT32 {112, 3, 320, 48} IMM[5] FLT32 { 0.0000, 1.0000, 0.0039, 0.0000} IMM[6] UINT32 {304, 64, 512, 528} IMM[7] UINT32 {544, 560, 516, 532} IMM[8] UINT32 {548, 564, 524, 540} IMM[9] UINT32 {556, 572, 364, 372} IMM[10] UINT32 {520, 536, 552, 568} IMM[11] FLT32 { 0.0010, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MUL TEMP[5].x, IMM[0].yyyy, CONST[1][0].yyyy 21: MUL TEMP[6].x, IN[1].zzzz, CONST[5][2].xxxx 22: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: FRC TEMP[6].x, TEMP[5].xxxx 24: FLR TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 26: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].yyyy 27: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 28: RCP TEMP[8].x, CONST[1][0].yyyy 29: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 32: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 33: MOV TEMP[4].xy, TEMP[4].xyyy 34: MOV TEMP[4].w, IMM[0].xxxx 35: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 36: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 37: UIF TEMP[4].xxxx :0 38: MOV TEMP[4].x, TEMP[7].xxxx 39: ELSE :0 40: MOV TEMP[4].x, TEMP[5].xxxx 41: ENDIF 42: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 43: MOV TEMP[5].x, TEMP[4].xxxx 44: MOV TEMP[5].y, TEMP[2].xxxx 45: MOV TEMP[5].xy, TEMP[5].xyyy 46: MOV TEMP[5].w, IMM[0].xxxx 47: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 48: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 49: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 50: MOV TEMP[7].x, TEMP[4].xxxx 51: MOV TEMP[7].y, TEMP[6].xxxx 52: MOV TEMP[7].xy, TEMP[7].xyyy 53: MOV TEMP[7].w, IMM[0].xxxx 54: TXL TEMP[7], TEMP[7], SAMP[0], 2D 55: MOV TEMP[8].x, TEMP[4].xxxx 56: MAD TEMP[2].x, IMM[2].xxxx, CONST[1][0].zzzz, TEMP[2].xxxx 57: MOV TEMP[8].y, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[8].xyyy 59: MOV TEMP[2].w, IMM[0].xxxx 60: TXL TEMP[2], TEMP[2], SAMP[0], 2D 61: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 62: MOV TEMP[0].y, TEMP[6].xxxx 63: MOV TEMP[0].xy, TEMP[0].xyyy 64: MOV TEMP[0].w, IMM[0].xxxx 65: TXL TEMP[0], TEMP[0], SAMP[0], 2D 66: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 67: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 68: RCP TEMP[8].x, TEMP[4].xxxx 69: RCP TEMP[8].y, TEMP[4].yyyy 70: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 71: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 72: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 73: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 74: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 75: RCP TEMP[9].x, TEMP[4].xxxx 76: RCP TEMP[9].y, TEMP[4].yyyy 77: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 78: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 79: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 80: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 81: MOV TEMP[8].y, IMM[0].xxxx 82: MOV TEMP[8].x, TEMP[5].xxxx 83: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 84: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 85: MOV TEMP[10].x, TEMP[9].xxxx 86: MOV TEMP[10].y, TEMP[9].xxxx 87: MOV TEMP[10].z, TEMP[5].xxxx 88: MOV TEMP[10].w, TEMP[5].xxxx 89: ADD TEMP[5], TEMP[3], IMM[0].zzzz 90: RCP TEMP[9].x, CONST[5][2].zzzz 91: MOV_SAT TEMP[9].x, TEMP[9].xxxx 92: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 93: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 94: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 95: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 96: MOV TEMP[10].x, TEMP[5].xxxx 97: MOV TEMP[10].y, TEMP[9].xxxx 98: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 99: MOV TEMP[10].z, TEMP[11].xxxx 100: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 101: MOV TEMP[10].w, TEMP[3].xxxx 102: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 103: MAD TEMP[3], TEMP[3], IMM[2].xxxx, CONST[5][6].zwzw 104: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 105: UIF TEMP[11].xxxx :0 106: MOV TEMP[11].x, TEMP[0].zzzz 107: ELSE :0 108: MOV TEMP[11].x, TEMP[0].xxxx 109: ENDIF 110: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 111: UIF TEMP[12].xxxx :0 112: MOV TEMP[12].x, TEMP[0].wwww 113: ELSE :0 114: MOV TEMP[12].x, TEMP[0].yyyy 115: ENDIF 116: MOV TEMP[0].x, TEMP[11].xxxx 117: MOV TEMP[0].y, TEMP[12].xxxx 118: MOV TEMP[5].x, TEMP[5].xxxx 119: MOV TEMP[5].y, TEMP[9].xxxx 120: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 121: RCP TEMP[5].x, TEMP[3].xxxx 122: RCP TEMP[5].y, TEMP[3].yyyy 123: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 124: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 125: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 126: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].xxxx 127: ADD TEMP[0].xy, IMM[2].zzzz, -TEMP[0].xyyy 128: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 129: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 130: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 131: MOV TEMP[3].x, TEMP[5].xxxx 132: MOV TEMP[3].y, TEMP[5].yyyy 133: MOV TEMP[3].z, TEMP[2].xxxx 134: MOV TEMP[3].w, TEMP[2].yyyy 135: MUL TEMP[2].xyz, IN[2].xyzz, IMM[2].wwww 136: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].xxxx, IMM[3].yyyy 137: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].zzzz 138: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].zzzz 139: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].zzzz 140: FSLT TEMP[4].x, IMM[3].wwww, IN[2].xxxx 141: UIF TEMP[4].xxxx :0 142: MOV TEMP[4].x, TEMP[5].xxxx 143: ELSE :0 144: MOV TEMP[4].x, TEMP[2].xxxx 145: ENDIF 146: FSLT TEMP[6].x, IMM[3].wwww, IN[2].yyyy 147: UIF TEMP[6].xxxx :0 148: MOV TEMP[6].x, TEMP[5].yyyy 149: ELSE :0 150: MOV TEMP[6].x, TEMP[2].yyyy 151: ENDIF 152: FSLT TEMP[7].x, IMM[3].wwww, IN[2].zzzz 153: UIF TEMP[7].xxxx :0 154: MOV TEMP[5].x, TEMP[5].zzzz 155: ELSE :0 156: MOV TEMP[5].x, TEMP[2].zzzz 157: ENDIF 158: MOV TEMP[2].x, TEMP[4].xxxx 159: MOV TEMP[2].y, TEMP[6].xxxx 160: MOV TEMP[2].z, TEMP[5].xxxx 161: MOV TEMP[2].w, IN[2].wwww 162: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 163: MUL TEMP[4].xyz, IMM[5].yxxx, CONST[4][20].yzxx 164: MAD TEMP[4].xyz, IMM[5].xyxx, CONST[4][20].zxyy, -TEMP[4].xyzz 165: COS TEMP[5].x, IN[4].xxxx 166: SIN TEMP[6].x, IN[4].xxxx 167: MUL TEMP[7].xyz, TEMP[5].xxxx, TEMP[4].xyzz 168: MAD TEMP[7].xyz, IMM[5].xxyy, TEMP[6].xxxx, TEMP[7].xyzz 169: MUL TEMP[4].xyz, TEMP[6].xxxx, TEMP[4].xyzz 170: MAD TEMP[4].xyz, IMM[5].xxyy, TEMP[5].xxxx, -TEMP[4].xyzz 171: SIN TEMP[5].x, IN[4].yyyy 172: COS TEMP[6].x, IN[4].yyyy 173: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[6].xxxx 174: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].yyyy 175: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[10].xxxx 176: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[4].zzzz 177: MUL TEMP[12].x, TEMP[4].xxxx, TEMP[4].zzzz 178: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx 179: MUL TEMP[13].x, TEMP[5].xxxx, TEMP[4].yyyy 180: MUL TEMP[14].x, TEMP[4].yyyy, TEMP[4].zzzz 181: MUL TEMP[9].x, TEMP[14].xxxx, TEMP[9].xxxx 182: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 183: MUL TEMP[14].x, TEMP[4].xxxx, TEMP[4].xxxx 184: LRP TEMP[14].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[14].xxxx 185: ADD TEMP[15].x, TEMP[10].xxxx, -TEMP[11].xxxx 186: MOV TEMP[14].y, TEMP[15].xxxx 187: ADD TEMP[15].x, TEMP[12].xxxx, TEMP[13].xxxx 188: MOV TEMP[14].z, TEMP[15].xxxx 189: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 190: MUL TEMP[11].x, TEMP[4].yyyy, TEMP[4].yyyy 191: LRP TEMP[11].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[11].xxxx 192: MOV TEMP[10].y, TEMP[11].xxxx 193: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[5].xxxx 194: MOV TEMP[10].z, TEMP[11].xxxx 195: ADD TEMP[11].x, TEMP[12].xxxx, -TEMP[13].xxxx 196: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 197: MOV TEMP[11].y, TEMP[5].xxxx 198: MUL TEMP[5].x, TEMP[4].zzzz, TEMP[4].zzzz 199: LRP TEMP[5].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[5].xxxx 200: MOV TEMP[11].z, TEMP[5].xxxx 201: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[14].xyzz 202: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[10].xyzz 203: MOV TEMP[5].y, TEMP[6].xxxx 204: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[11].xyzz 205: MOV TEMP[5].z, TEMP[6].xxxx 206: ADD TEMP[6].x, CONST[5][3].yyyy, TEMP[0].xxxx 207: ADD TEMP[0].x, CONST[5][3].zzzz, TEMP[0].yyyy 208: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz 209: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[0].xyzz 210: ADD TEMP[4].xyz, CONST[4][19].xyzz, -IN[3].xyzz 211: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 212: SQRT TEMP[4].x, TEMP[4].xxxx 213: MOV TEMP[5], TEMP[2] 214: MOV TEMP[6].x, IN[4].wwww 215: FSLT TEMP[7].x, IMM[0].xxxx, CONST[5][4].zzzz 216: UIF TEMP[7].xxxx :0 217: MUL TEMP[7].x, TEMP[4].xxxx, CONST[5][0].yyyy 218: FSLT TEMP[9].x, TEMP[7].xxxx, IN[4].wwww 219: ADD TEMP[7].x, IN[4].wwww, -TEMP[7].xxxx 220: ADD TEMP[10].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 221: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 222: RCP TEMP[10].x, TEMP[10].xxxx 223: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[10].xxxx 224: ADD TEMP[7].x, IMM[2].zzzz, -TEMP[7].xxxx 225: MUL TEMP[7], TEMP[7].xxxx, TEMP[2] 226: MUL TEMP[10].x, TEMP[4].xxxx, CONST[5][0].zzzz 227: FSLT TEMP[10].x, TEMP[10].xxxx, IN[4].wwww 228: UIF TEMP[10].xxxx :0 229: MOV TEMP[10], IMM[0].xxxx 230: ELSE :0 231: MOV TEMP[10], TEMP[7] 232: ENDIF 233: UIF TEMP[9].xxxx :0 234: MOV TEMP[7], TEMP[10] 235: ELSE :0 236: MOV TEMP[7], TEMP[2] 237: ENDIF 238: MOV TEMP[5], TEMP[7] 239: MUL TEMP[2].x, TEMP[4].xxxx, CONST[5][0].xxxx 240: MAX TEMP[2].x, IN[4].wwww, TEMP[2].xxxx 241: MUL TEMP[4].x, TEMP[4].xxxx, CONST[5][0].wwww 242: MIN TEMP[6].x, TEMP[2].xxxx, TEMP[4].xxxx 243: ENDIF 244: FSLT TEMP[2].x, TEMP[5].wwww, IMM[5].zzzz 245: UIF TEMP[2].xxxx :0 246: MOV TEMP[2].x, IMM[0].xxxx 247: ELSE :0 248: MOV TEMP[2].x, TEMP[6].xxxx 249: ENDIF 250: MOV TEMP[4].xy, IMM[0].xxxx 251: MOV TEMP[4].w, IMM[0].xxxx 252: TXL TEMP[4], TEMP[4], SAMP[1], 2D 253: MUL TEMP[4].xyz, TEMP[4], IMM[5].wwww 254: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 255: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 256: MOV TEMP[2].w, IMM[2].zzzz 257: MOV TEMP[2].x, TEMP[0].xxxx 258: MOV TEMP[2].y, TEMP[0].yyyy 259: MOV TEMP[2].z, TEMP[0].zzzz 260: MOV TEMP[4].x, CONST[4][32].xxxx 261: MOV TEMP[4].y, CONST[4][33].xxxx 262: MOV TEMP[4].z, CONST[4][34].xxxx 263: MOV TEMP[4].w, CONST[4][35].xxxx 264: DP4 TEMP[4].x, TEMP[2], TEMP[4] 265: MOV TEMP[6].x, CONST[4][32].yyyy 266: MOV TEMP[6].y, CONST[4][33].yyyy 267: MOV TEMP[6].z, CONST[4][34].yyyy 268: MOV TEMP[6].w, CONST[4][35].yyyy 269: DP4 TEMP[6].x, TEMP[2], TEMP[6] 270: MOV TEMP[7].x, CONST[4][32].wwww 271: MOV TEMP[7].y, CONST[4][33].wwww 272: MOV TEMP[7].z, CONST[4][34].wwww 273: MOV TEMP[7].w, CONST[4][35].wwww 274: DP4 TEMP[7].x, TEMP[2], TEMP[7] 275: MAD TEMP[9].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 276: MOV TEMP[10].w, IMM[2].zzzz 277: MOV TEMP[10].x, TEMP[9].xxxx 278: MOV TEMP[10].y, TEMP[9].yyyy 279: MOV TEMP[10].z, TEMP[9].zzzz 280: MOV TEMP[9].xyz, -CONST[4][19].xyzx 281: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[9].xyzz 282: MOV TEMP[12].x, TEMP[0].xxxx 283: MOV TEMP[12].y, TEMP[0].yyyy 284: MOV TEMP[12].z, TEMP[0].zzzz 285: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 286: MOV TEMP[12].w, TEMP[13].xxxx 287: MOV TEMP[13].x, TEMP[4].xxxx 288: MOV TEMP[13].y, TEMP[6].xxxx 289: MOV TEMP[14].x, -CONST[4][22].wwww 290: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 291: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 292: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 293: RCP TEMP[14].x, TEMP[14].xxxx 294: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 295: MOV TEMP[13].z, TEMP[11].xxxx 296: MOV TEMP[13].w, TEMP[7].xxxx 297: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xyzz 298: MOV TEMP[0].xyz, -TEMP[0].xyzx 299: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[0].xyzz 300: RSQ TEMP[9].x, TEMP[9].xxxx 301: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xxxx 302: MOV TEMP[4].x, TEMP[4].xxxx 303: MOV TEMP[4].y, -TEMP[6].xxxx 304: MOV TEMP[6].x, CONST[4][32].zzzz 305: MOV TEMP[6].y, CONST[4][33].zzzz 306: MOV TEMP[6].z, CONST[4][34].zzzz 307: MOV TEMP[6].w, CONST[4][35].zzzz 308: MOV TEMP[9].x, CONST[4][32].wwww 309: MOV TEMP[9].y, CONST[4][33].wwww 310: MOV TEMP[9].z, CONST[4][34].wwww 311: MOV TEMP[9].w, CONST[4][35].wwww 312: MOV TEMP[11].x, CONST[4][32].zzzz 313: MOV TEMP[11].y, CONST[4][33].zzzz 314: MOV TEMP[11].z, CONST[4][34].zzzz 315: MOV TEMP[11].w, CONST[4][35].zzzz 316: DP4 TEMP[6].x, TEMP[10], TEMP[6] 317: DP4 TEMP[9].x, TEMP[10], TEMP[9] 318: RCP TEMP[9].x, TEMP[9].xxxx 319: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 320: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 321: DP4 TEMP[2].x, TEMP[2], TEMP[11] 322: MIN TEMP[2].x, IMM[11].xxxx, TEMP[2].xxxx 323: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[2].xxxx 324: MAD TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx, -TEMP[7].xxxx 325: MOV TEMP[4].z, TEMP[2].xxxx 326: MOV TEMP[4].w, TEMP[7].xxxx 327: MOV OUT[1], TEMP[1] 328: MOV OUT[6].xyz, TEMP[0].xyzx 329: MOV OUT[2], TEMP[3] 330: MOV OUT[3], TEMP[5] 331: MOV OUT[4], TEMP[13] 332: MOV OUT[0], TEMP[4] 333: MOV OUT[5], TEMP[12] 334: MOV OUT[7].xy, TEMP[8].xyxx 335: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 40) %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 48) %50 = call float @llvm.SI.load.const(<16 x i8> %44, i32 52) %51 = call float @llvm.SI.load.const(<16 x i8> %44, i32 56) %52 = call float @llvm.SI.load.const(<16 x i8> %44, i32 72) %53 = call float @llvm.SI.load.const(<16 x i8> %44, i32 96) %54 = call float @llvm.SI.load.const(<16 x i8> %44, i32 100) %55 = call float @llvm.SI.load.const(<16 x i8> %44, i32 104) %56 = call float @llvm.SI.load.const(<16 x i8> %44, i32 108) %57 = call float @llvm.SI.load.const(<16 x i8> %44, i32 112) %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = add i32 %10, %6 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %10, %6 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %10, %6 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %10, %6 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 3 %102 = fmul float %72, 5.000000e-01 %103 = fadd float %102, 5.000000e-01 %104 = fmul float %73, -5.000000e-01 %105 = fadd float %104, 5.000000e-01 %106 = fdiv float 1.000000e+00, %14 %107 = fmul float %78, 3.000000e+00 %108 = fcmp olt float %78, %16 %. = select i1 %108, float %107, float 0.000000e+00 %109 = fmul float %15, %. %110 = fadd float %109, %13 %111 = fmul float %14, 5.000000e-01 %112 = fmul float %79, %47 %113 = fmul float %111, %112 %114 = call float @llvm.AMDIL.fraction.(float %113) %115 = call float @floor(float %113) %116 = fmul float %115, 2.000000e+00 %117 = fadd float %14, -2.000000e+00 %118 = call float @llvm.minnum.f32(float %117, float %116) %119 = fdiv float 1.000000e+00, %14 %120 = fmul float %116, %119 %121 = call float @floor(float %120) %122 = fmul float %14, %121 %123 = fsub float %116, %122 %124 = bitcast float %110 to i32 %125 = insertelement <4 x i32> , i32 %124, i32 1 %126 = insertelement <4 x i32> %125, i32 0, i32 2 %127 = bitcast <8 x i32> %59 to <32 x i8> %128 = bitcast <4 x i32> %61 to <16 x i8> %129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 2 %131 = fcmp ogt float %130, 0.000000e+00 %temp16.0 = select i1 %131, float %118, float %123 %132 = fmul float %106, %temp16.0 %133 = bitcast float %132 to i32 %134 = bitcast float %110 to i32 %135 = insertelement <4 x i32> undef, i32 %133, i32 0 %136 = insertelement <4 x i32> %135, i32 %134, i32 1 %137 = insertelement <4 x i32> %136, i32 0, i32 2 %138 = bitcast <8 x i32> %59 to <32 x i8> %139 = bitcast <4 x i32> %61 to <16 x i8> %140 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 3 %143 = call float @llvm.AMDGPU.lrp(float %114, float %142, float %141) %144 = fadd float %110, %15 %145 = bitcast float %132 to i32 %146 = bitcast float %144 to i32 %147 = insertelement <4 x i32> undef, i32 %145, i32 0 %148 = insertelement <4 x i32> %147, i32 %146, i32 1 %149 = insertelement <4 x i32> %148, i32 0, i32 2 %150 = bitcast <8 x i32> %59 to <32 x i8> %151 = bitcast <4 x i32> %61 to <16 x i8> %152 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %149, <32 x i8> %150, <16 x i8> %151, i32 2) %153 = extractelement <4 x float> %152, i32 0 %154 = extractelement <4 x float> %152, i32 1 %155 = extractelement <4 x float> %152, i32 2 %156 = extractelement <4 x float> %152, i32 3 %157 = fmul float %15, 2.000000e+00 %158 = fadd float %157, %110 %159 = bitcast float %132 to i32 %160 = bitcast float %158 to i32 %161 = insertelement <4 x i32> undef, i32 %159, i32 0 %162 = insertelement <4 x i32> %161, i32 %160, i32 1 %163 = insertelement <4 x i32> %162, i32 0, i32 2 %164 = bitcast <8 x i32> %59 to <32 x i8> %165 = bitcast <4 x i32> %61 to <16 x i8> %166 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %163, <32 x i8> %164, <16 x i8> %165, i32 2) %167 = extractelement <4 x float> %166, i32 0 %168 = extractelement <4 x float> %166, i32 1 %169 = extractelement <4 x float> %166, i32 2 %170 = extractelement <4 x float> %166, i32 3 %171 = fadd float %106, %132 %172 = bitcast float %171 to i32 %173 = bitcast float %144 to i32 %174 = insertelement <4 x i32> undef, i32 %172, i32 0 %175 = insertelement <4 x i32> %174, i32 %173, i32 1 %176 = insertelement <4 x i32> %175, i32 0, i32 2 %177 = bitcast <8 x i32> %59 to <32 x i8> %178 = bitcast <4 x i32> %61 to <16 x i8> %179 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %176, <32 x i8> %177, <16 x i8> %178, i32 2) %180 = extractelement <4 x float> %179, i32 0 %181 = extractelement <4 x float> %179, i32 1 %182 = extractelement <4 x float> %179, i32 2 %183 = extractelement <4 x float> %179, i32 3 %184 = fsub float %182, %180 %185 = fsub float %183, %181 %186 = fsub float %155, %153 %187 = fsub float %156, %154 %188 = fdiv float 1.000000e+00, %184 %189 = fdiv float 1.000000e+00, %185 %190 = fmul float %186, %188 %191 = fmul float %187, %189 %192 = fmul float %180, %190 %193 = fmul float %181, %191 %194 = fsub float %153, %192 %195 = fsub float %154, %193 %196 = fadd float %194, %190 %197 = fadd float %195, %191 %198 = fsub float %169, %167 %199 = fsub float %170, %168 %200 = fdiv float 1.000000e+00, %184 %201 = fdiv float 1.000000e+00, %185 %202 = fmul float %198, %200 %203 = fmul float %199, %201 %204 = fmul float %180, %202 %205 = fmul float %181, %203 %206 = fsub float %167, %204 %207 = fsub float %168, %205 %208 = fadd float %206, %202 %209 = fadd float %207, %203 %210 = fmul float %143, 5.000000e-01 %211 = fadd float %210, 5.000000e-01 %212 = fadd float %103, -5.000000e-01 %213 = fadd float %105, -5.000000e-01 %214 = fadd float %103, -5.000000e-01 %215 = fadd float %105, -5.000000e-01 %216 = fdiv float 1.000000e+00, %48 %217 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) %218 = fmul float %212, %217 %219 = fadd float %218, 5.000000e-01 %220 = fmul float %213, %217 %221 = fadd float %220, 5.000000e-01 %222 = fmul float %214, %217 %223 = fadd float %222, 5.000000e-01 %224 = fmul float %215, %217 %225 = fadd float %224, 5.000000e-01 %226 = call float @llvm.AMDGPU.lrp(float %211, float %219, float %103) %227 = call float @llvm.AMDGPU.lrp(float %211, float %221, float %105) %228 = call float @llvm.AMDGPU.lrp(float %210, float %223, float %103) %229 = call float @llvm.AMDGPU.lrp(float %210, float %225, float %105) %230 = fmul float %55, %226 %231 = fadd float %230, %53 %232 = fmul float %56, %227 %233 = fadd float %232, %54 %234 = fmul float %55, %228 %235 = fadd float %234, %53 %236 = fmul float %56, %229 %237 = fadd float %236, %54 %238 = fsub float %53, %231 %239 = fsub float %54, %233 %240 = fsub float %53, %235 %241 = fsub float %54, %237 %242 = fmul float %238, 2.000000e+00 %243 = fadd float %242, %55 %244 = fmul float %239, 2.000000e+00 %245 = fadd float %244, %56 %246 = fmul float %240, 2.000000e+00 %247 = fadd float %246, %55 %248 = fmul float %241, 2.000000e+00 %249 = fadd float %248, %56 %250 = fcmp olt float %243, 0.000000e+00 %.94 = select i1 %250, float %182, float %180 %251 = fcmp olt float %245, 0.000000e+00 %temp48.0 = select i1 %251, float %183, float %181 %252 = fsub float %.94, %231 %253 = fsub float %temp48.0, %233 %254 = fdiv float 1.000000e+00, %243 %255 = fdiv float 1.000000e+00, %245 %256 = fmul float %252, %254 %257 = fmul float %253, %255 %258 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %259 = call float @llvm.AMDIL.clamp.(float %257, float 0.000000e+00, float 1.000000e+00) %260 = fmul float %258, %243 %261 = fadd float %260, %231 %262 = fmul float %259, %245 %263 = fadd float %262, %233 %264 = fmul float %258, %247 %265 = fadd float %264, %235 %266 = fmul float %259, %249 %267 = fadd float %266, %237 %268 = fmul float %258, 2.000000e+00 %269 = fmul float %259, 2.000000e+00 %270 = fsub float 1.000000e+00, %268 %271 = fsub float 1.000000e+00, %269 %272 = fmul float %270, %72 %273 = fmul float %271, %73 %274 = call float @llvm.AMDGPU.lrp(float %261, float %196, float %194) %275 = call float @llvm.AMDGPU.lrp(float %263, float %197, float %195) %276 = call float @llvm.AMDGPU.lrp(float %265, float %208, float %206) %277 = call float @llvm.AMDGPU.lrp(float %267, float %209, float %207) %278 = fmul float %84, 0x3FB3D07220000000 %279 = fmul float %85, 0x3FB3D07220000000 %280 = fmul float %86, 0x3FB3D07220000000 %281 = fmul float %84, 0x3FEE54EDE0000000 %282 = fadd float %281, 0x3FAAB12320000000 %283 = fmul float %85, 0x3FEE54EDE0000000 %284 = fadd float %283, 0x3FAAB12320000000 %285 = fmul float %86, 0x3FEE54EDE0000000 %286 = fadd float %285, 0x3FAAB12320000000 %287 = call float @llvm.pow.f32(float %282, float 0x4003333340000000) %288 = call float @llvm.pow.f32(float %284, float 0x4003333340000000) %289 = call float @llvm.pow.f32(float %286, float 0x4003333340000000) %290 = fcmp ogt float %84, 0x3FA4B5DCC0000000 %.95 = select i1 %290, float %287, float %278 %291 = fcmp ogt float %85, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %291, float %288, float %279 %292 = fcmp ogt float %86, 0x3FA4B5DCC0000000 %.96 = select i1 %292, float %289, float %280 %293 = call float @llvm.AMDGPU.lrp(float %57, float %.95, float %84) %294 = call float @llvm.AMDGPU.lrp(float %57, float %temp24.0, float %85) %295 = call float @llvm.AMDGPU.lrp(float %57, float %.96, float %86) %296 = call float @llvm.AMDGPU.lrp(float %57, float %87, float %87) %297 = fmul float %24, 0.000000e+00 %298 = fmul float %22, 0.000000e+00 %299 = fmul float %24, 0.000000e+00 %300 = fsub float %299, %23 %301 = fsub float %22, %297 %302 = fmul float %23, 0.000000e+00 %303 = fsub float %302, %298 %304 = call float @llvm.cos.f32(float %99) %305 = call float @llvm.sin.f32(float %99) %306 = fmul float %304, %300 %307 = fmul float %304, %301 %308 = fmul float %304, %303 %309 = fmul float %305, 0.000000e+00 %310 = fadd float %309, %306 %311 = fmul float %305, 0.000000e+00 %312 = fadd float %311, %307 %313 = fadd float %305, %308 %314 = fmul float %305, %300 %315 = fmul float %305, %301 %316 = fmul float %305, %303 %317 = fmul float %304, 0.000000e+00 %318 = fsub float %317, %314 %319 = fmul float %304, 0.000000e+00 %320 = fsub float %319, %315 %321 = fsub float %304, %316 %322 = call float @llvm.sin.f32(float %100) %323 = call float @llvm.cos.f32(float %100) %324 = fsub float 1.000000e+00, %323 %325 = fmul float %318, %320 %326 = fmul float %324, %325 %327 = fmul float %322, %321 %328 = fmul float %318, %321 %329 = fmul float %324, %328 %330 = fmul float %322, %320 %331 = fmul float %320, %321 %332 = fmul float %331, %324 %333 = fmul float %322, %318 %334 = fmul float %318, %318 %335 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %334) %336 = fsub float %326, %327 %337 = fadd float %329, %330 %338 = fadd float %326, %327 %339 = fmul float %320, %320 %340 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %339) %341 = fsub float %332, %333 %342 = fsub float %329, %330 %343 = fadd float %332, %333 %344 = fmul float %321, %321 %345 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %344) %346 = fmul float %310, %335 %347 = fmul float %312, %336 %348 = fadd float %347, %346 %349 = fmul float %313, %337 %350 = fadd float %348, %349 %351 = fmul float %310, %338 %352 = fmul float %312, %340 %353 = fadd float %352, %351 %354 = fmul float %313, %341 %355 = fadd float %353, %354 %356 = fmul float %310, %342 %357 = fmul float %312, %343 %358 = fadd float %357, %356 %359 = fmul float %313, %345 %360 = fadd float %358, %359 %361 = fadd float %50, %272 %362 = fadd float %51, %273 %363 = fmul float %362, %318 %364 = fmul float %362, %320 %365 = fmul float %362, %321 %366 = fmul float %350, %361 %367 = fadd float %366, %363 %368 = fmul float %355, %361 %369 = fadd float %368, %364 %370 = fmul float %360, %361 %371 = fadd float %370, %365 %372 = fsub float %19, %92 %373 = fsub float %20, %93 %374 = fsub float %21, %94 %375 = fmul float %372, %372 %376 = fmul float %373, %373 %377 = fadd float %376, %375 %378 = fmul float %374, %374 %379 = fadd float %377, %378 %380 = call float @llvm.sqrt.f32(float %379) %381 = fcmp ogt float %52, 0.000000e+00 br i1 %381, label %IF83, label %ENDIF82 IF83: ; preds = %main_body %382 = call float @llvm.SI.load.const(<16 x i8> %44, i32 12) %383 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %384 = fmul float %380, %45 %385 = fcmp olt float %384, %101 %386 = fsub float %101, %384 %387 = fsub float %46, %45 %388 = fmul float %387, %380 %389 = fdiv float 1.000000e+00, %388 %390 = fmul float %386, %389 %391 = fsub float 1.000000e+00, %390 %392 = fmul float %391, %293 %393 = fmul float %391, %294 %394 = fmul float %391, %295 %395 = fmul float %391, %296 %396 = fmul float %380, %46 %397 = fcmp olt float %396, %101 %.97 = select i1 %397, float 0.000000e+00, float %392 %.98 = select i1 %397, float 0.000000e+00, float %393 %.99 = select i1 %397, float 0.000000e+00, float %394 %.100 = select i1 %397, float 0.000000e+00, float %395 %.97. = select i1 %385, float %.97, float %293 %.98. = select i1 %385, float %.98, float %294 %.99. = select i1 %385, float %.99, float %295 %.100. = select i1 %385, float %.100, float %296 %398 = fmul float %380, %383 %399 = call float @llvm.maxnum.f32(float %101, float %398) %400 = fmul float %380, %382 %401 = call float @llvm.minnum.f32(float %399, float %400) br label %ENDIF82 ENDIF82: ; preds = %main_body, %IF83 %temp20.1 = phi float [ %.97., %IF83 ], [ %293, %main_body ] %temp21.0 = phi float [ %.98., %IF83 ], [ %294, %main_body ] %temp22.0 = phi float [ %.99., %IF83 ], [ %295, %main_body ] %temp23.0 = phi float [ %.100., %IF83 ], [ %296, %main_body ] %temp24.1 = phi float [ %401, %IF83 ], [ %101, %main_body ] %402 = fcmp olt float %temp23.0, 0x3F70101060000000 %.temp24.1 = select i1 %402, float 0.000000e+00, float %temp24.1 %403 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %64, <16 x i8> %67, i32 2) %404 = extractelement <4 x float> %403, i32 0 %405 = extractelement <4 x float> %403, i32 1 %406 = extractelement <4 x float> %403, i32 2 %407 = fmul float %404, 0x3E7AD7F2A0000000 %408 = fmul float %405, 0x3E7AD7F2A0000000 %409 = fmul float %406, 0x3E7AD7F2A0000000 %410 = fmul float %.temp24.1, %367 %411 = fadd float %410, %92 %412 = fmul float %.temp24.1, %369 %413 = fadd float %412, %93 %414 = fmul float %.temp24.1, %371 %415 = fadd float %414, %94 %416 = fadd float %407, %411 %417 = fadd float %408, %413 %418 = fadd float %409, %415 %419 = fmul float %416, %27 %420 = fmul float %417, %31 %421 = fadd float %419, %420 %422 = fmul float %418, %35 %423 = fadd float %421, %422 %424 = fadd float %423, %39 %425 = fmul float %416, %28 %426 = fmul float %417, %32 %427 = fadd float %425, %426 %428 = fmul float %418, %36 %429 = fadd float %427, %428 %430 = fadd float %429, %40 %431 = fmul float %416, %30 %432 = fmul float %417, %34 %433 = fadd float %431, %432 %434 = fmul float %418, %38 %435 = fadd float %433, %434 %436 = fadd float %435, %42 %437 = fmul float %22, %49 %438 = fadd float %437, %416 %439 = fmul float %23, %49 %440 = fadd float %439, %417 %441 = fmul float %24, %49 %442 = fadd float %441, %418 %443 = fsub float %416, %19 %444 = fsub float %417, %20 %445 = fsub float %418, %21 %446 = fmul float %22, %443 %447 = fmul float %23, %444 %448 = fadd float %447, %446 %449 = fmul float %24, %445 %450 = fadd float %448, %449 %451 = fmul float %443, %22 %452 = fmul float %444, %23 %453 = fadd float %452, %451 %454 = fmul float %445, %24 %455 = fadd float %453, %454 %456 = fsub float %455, %25 %457 = fsub float %26, %25 %458 = fdiv float 1.000000e+00, %457 %459 = fmul float %456, %458 %460 = fsub float %416, %19 %461 = fsub float %417, %20 %462 = fsub float %418, %21 %463 = fmul float %460, %460 %464 = fmul float %461, %461 %465 = fadd float %464, %463 %466 = fmul float %462, %462 %467 = fadd float %465, %466 %468 = call float @llvm.AMDGPU.rsq.clamped.f32(float %467) %469 = fmul float %460, %468 %470 = fsub float -0.000000e+00, %469 %471 = fmul float %461, %468 %472 = fsub float -0.000000e+00, %471 %473 = fmul float %462, %468 %474 = fsub float -0.000000e+00, %473 %475 = fsub float -0.000000e+00, %430 %476 = fmul float %438, %29 %477 = fmul float %440, %33 %478 = fadd float %476, %477 %479 = fmul float %442, %37 %480 = fadd float %478, %479 %481 = fadd float %480, %41 %482 = fmul float %438, %30 %483 = fmul float %440, %34 %484 = fadd float %482, %483 %485 = fmul float %442, %38 %486 = fadd float %484, %485 %487 = fadd float %486, %42 %488 = fdiv float 1.000000e+00, %487 %489 = fmul float %481, %488 %490 = fmul float %489, %436 %491 = fmul float %416, %29 %492 = fmul float %417, %33 %493 = fadd float %491, %492 %494 = fmul float %418, %37 %495 = fadd float %493, %494 %496 = fadd float %495, %41 %497 = call float @llvm.minnum.f32(float %496, float 0x3F50624DE0000000) %498 = call float @llvm.maxnum.f32(float %490, float %497) %499 = fmul float %498, 2.000000e+00 %500 = fsub float %499, %436 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %274, float %275, float %276, float %277) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp20.1, float %temp21.0, float %temp22.0, float %temp23.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %424, float %430, float %459, float %436) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %416, float %417, float %418, float %450) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %470, float %472, float %474, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %143, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %424, float %475, float %500, float %436) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v4, s11, v3 ; 4A08060B s_load_dwordx4 s[24:27], s[2:3], 0x10 ; C08C0310 v_mov_b32_e32 v18, 0x3d558919 ; 7E2402FF 3D558919 v_mov_b32_e32 v11, 0x3f72a76f ; 7E1602FF 3F72A76F v_mov_b32_e32 v19, 0x80000000 ; 7E2602FF 80000000 v_mov_b32_e32 v21, 0x4019999a ; 7E2A02FF 4019999A v_mov_b32_e32 v12, 0x3d9e8391 ; 7E1802FF 3D9E8391 v_mov_b32_e32 v13, 0x3d25aee6 ; 7E1A02FF 3D25AEE6 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00 s_buffer_load_dword s0, s[12:15], 0x1 ; C2000D01 s_buffer_load_dword s10, s[12:15], 0x2 ; C2050D02 s_buffer_load_dword s2, s[12:15], 0x3 ; C2010D03 s_buffer_load_dword s14, s[24:27], 0x4c ; C207194C s_buffer_load_dword s15, s[24:27], 0x4d ; C207994D s_buffer_load_dword s16, s[24:27], 0x4e ; C208194E s_buffer_load_dword s12, s[24:27], 0x50 ; C2061950 s_buffer_load_dword s13, s[24:27], 0x51 ; C2069951 s_buffer_load_dword s3, s[20:23], 0x8 ; C2019508 s_buffer_load_dword s11, s[20:23], 0xa ; C205950A s_buffer_load_dword s19, s[20:23], 0xc ; C209950C s_buffer_load_dword s17, s[20:23], 0xd ; C208950D s_buffer_load_dword s18, s[20:23], 0xe ; C209150E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v14, 0.5, s0 ; D210000E 000000F0 v_add_f32_e64 v15, -2.0, s0 ; D206000F 000000F5 s_buffer_load_dword s28, s[20:23], 0x1c ; C20E151C s_buffer_load_dword s29, s[20:23], 0x18 ; C20E9518 s_load_dwordx4 s[44:47], s[8:9], 0x0 ; C0960900 s_load_dwordx4 s[48:51], s[8:9], 0x4 ; C0980904 v_rcp_f32_e32 v22, s0 ; 7E2C5400 s_load_dwordx4 s[52:55], s[8:9], 0x8 ; C09A0908 v_mov_b32_e32 v30, s1 ; 7E3C0201 s_load_dwordx4 s[56:59], s[8:9], 0xc ; C09C090C s_load_dwordx4 s[60:63], s[8:9], 0x10 ; C09E0910 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v16, 1.0, s28 ; D2080010 000038F2 buffer_load_format_xyzw v[7:10], v0, s[44:47], 0 idxen ; E00C2000 800B0700 buffer_load_format_xyzw v[23:26], v4, s[48:51], 0 idxen ; E00C2000 800C1704 buffer_load_format_xyzw v[32:35], v4, s[52:55], 0 idxen ; E00C2000 800D2004 buffer_load_format_xyzw v[0:3], v4, s[56:59], 0 idxen ; E00C2000 800E0004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[60:63], 0 idxen ; E00C2000 800F0304 v_cmp_gt_f32_e32 vcc, s2, v23 ; 7C082E02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, 0x40400000, v23 ; 100A2EFF 40400000 v_mul_f32_e32 v17, s3, v25 ; 10223203 v_cndmask_b32_e32 v5, 0, v5 ; 000A0A80 v_mac_f32_e32 v30, s10, v5 ; 3E3C0A0A v_mov_b32_e32 v29, 0 ; 7E3A0280 v_mad_f32 v25, v11, v32, v18 ; D2820019 044A410B v_mad_f32 v26, v11, v33, v18 ; D282001A 044A430B v_mac_f32_e32 v18, v11, v34 ; 3E24450B v_mov_b32_e32 v31, v29 ; 7E3E031D image_sample_l v5, 4, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[36:43], s[32:35] ; F0900400 0109051D s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v5 ; 7C020A80 v_mul_f32_e32 v5, v17, v14 ; 100A1D11 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_add_f32_e32 v9, v5, v5 ; 06120B05 v_min_f32_e32 v10, v9, v15 ; 1E141F09 v_mul_f32_e32 v11, v22, v9 ; 10161316 v_floor_f32_e32 v11, v11 ; 7E16490B v_mad_f32 v9, -s0, v11, v9 ; D2820009 24261600 v_cndmask_b32_e32 v9, v9, v10 ; 00121509 v_mul_f32_e32 v27, v9, v22 ; 10362D09 v_cmp_gt_f32_e32 vcc, v32, v13 ; 7C081B20 v_cmp_gt_f32_e64 s[0:1], v33, v13 ; D0080000 00021B21 v_cmp_gt_f32_e64 s[2:3], v34, v13 ; D0080002 00021B22 v_mov_b32_e32 v28, v30 ; 7E38031E v_add_f32_e32 v23, s10, v30 ; 062E3C0A image_sample_l v[36:37], 9, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0900900 0109241B v_mov_b32_e32 v28, v23 ; 7E380317 v_mac_f32_e32 v22, v9, v22 ; 3E2C2D09 image_sample_l v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0900F00 0109261B v_mov_b32_e32 v28, v30 ; 7E38031E v_mac_f32_e64 v28, 2.0, s10 ; D23E001C 000014F4 v_mul_f32_e32 v30, v12, v32 ; 103C410C v_mul_f32_e32 v31, v12, v33 ; 103E430C v_mul_f32_e32 v42, v12, v34 ; 1054450C v_mul_f32_e32 v9, v32, v16 ; 10122120 v_mul_f32_e32 v10, v33, v16 ; 10142121 v_mul_f32_e32 v11, v34, v16 ; 10162122 v_mul_f32_e32 v12, v35, v16 ; 10182123 v_mac_f32_e32 v12, s28, v35 ; 3E18461C v_mov_b32_e32 v24, v29 ; 7E30031D image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0900F00 0109201B image_sample_l v[43:46], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[36:43], s[32:35] ; F0900F00 01092B16 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v13, v43, v45 ; 0A1A5B2B v_rcp_f32_e32 v15, v13 ; 7E1E550D v_subrev_f32_e32 v13, v44, v46 ; 0A1A5D2C v_rcp_f32_e32 v16, v13 ; 7E20550D v_subrev_f32_e32 v13, v38, v40 ; 0A1A5126 v_mul_f32_e32 v20, v15, v13 ; 10281B0F v_mad_f32 v20, -v43, v20, v38 ; D2820014 249A292B v_subrev_f32_e32 v22, v39, v41 ; 0A2C5327 v_mul_f32_e32 v23, v16, v22 ; 102E2D10 v_mad_f32 v23, -v44, v23, v39 ; D2820017 249E2F2C v_subrev_f32_e32 v24, v32, v34 ; 0A304520 v_mul_f32_e32 v27, v15, v24 ; 1036310F v_mad_f32 v27, -v43, v27, v32 ; D282001B 2482372B v_subrev_f32_e32 v28, v33, v35 ; 0A384721 v_mul_f32_e32 v29, v16, v28 ; 103A3910 v_mad_f32 v29, -v44, v29, v33 ; D282001D 24863B2C v_mad_f32 v14, v14, v17, -v5 ; D282000E 8416230E v_rcp_f32_e32 v17, s11 ; 7E22540B v_sub_f32_e32 v5, 1.0, v14 ; 080A1CF2 v_mul_f32_e32 v5, v36, v5 ; 100A0B24 v_mac_f32_e32 v5, v37, v14 ; 3E0A1D25 v_add_f32_e64 v14, 0, v17 clamp ; D206080E 00022280 v_mad_f32 v17, 0.5, v7, 0.5 ; D2820011 03C20EF0 v_add_f32_e32 v32, -0.5, v17 ; 064022F1 v_mad_f32 v32, v32, v14, 0.5 ; D2820020 03C21D20 s_buffer_load_dword s30, s[20:23], 0x19 ; C20F1519 s_buffer_load_dword s31, s[20:23], 0x1a ; C20F951A s_buffer_load_dword s32, s[20:23], 0x1b ; C210151B v_mad_f32 v33, -0.5, v8, 0.5 ; D2820021 03C210F1 v_add_f32_e32 v34, -0.5, v33 ; 064442F1 v_mad_f32 v14, v34, v14, 0.5 ; D282000E 03C21D22 v_mad_f32 v34, 0.5, v5, 0.5 ; D2820022 03C20AF0 v_sub_f32_e32 v35, 1.0, v34 ; 084644F2 v_mul_f32_e32 v36, v17, v35 ; 10484711 v_mul_f32_e32 v35, v33, v35 ; 10464721 v_mac_f32_e32 v36, v32, v34 ; 3E484520 v_mac_f32_e32 v35, v14, v34 ; 3E46450E v_mov_b32_e32 v34, s29 ; 7E44021D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v34, s31, v36, v34 ; D2820022 048A481F v_mov_b32_e32 v36, s30 ; 7E48021E v_mad_f32 v35, s32, v35, v36 ; D2820023 04924620 v_sub_f32_e32 v36, s29, v34 ; 0848441D v_mad_f32 v36, 2.0, v36, s31 ; D2820024 007E48F4 v_sub_f32_e32 v37, s30, v35 ; 084A461E v_mad_f32 v37, 2.0, v37, s32 ; D2820025 00824AF4 v_cmp_gt_f32_e64 s[8:9], 0, v36 ; D0080008 00024880 v_cmp_gt_f32_e64 s[10:11], 0, v37 ; D008000A 00024A80 v_cndmask_b32_e64 v38, v43, v45, s[8:9] ; D2000026 00225B2B v_cndmask_b32_e64 v39, v44, v46, s[10:11] ; D2000027 002A5D2C v_mad_f32 v40, 0.5, -v5, 1.0 ; D2820028 43CA0AF0 v_mul_f32_e32 v41, v17, v40 ; 10525111 v_mul_f32_e32 v33, v33, v40 ; 10425121 v_mul_f32_e32 v17, 0.5, v5 ; 10220AF0 v_rcp_f32_e32 v40, v36 ; 7E505524 v_mac_f32_e32 v41, v32, v17 ; 3E522320 v_mac_f32_e32 v33, v14, v17 ; 3E42230E v_subrev_f32_e32 v14, v34, v38 ; 0A1C4D22 v_mul_f32_e32 v14, v40, v14 ; 101C1D28 v_add_f32_e64 v17, 0, v14 clamp ; D2060811 00021C80 v_mac_f32_e32 v34, v36, v17 ; 3E442324 v_mad_f32 v14, v15, v13, v20 ; D282000E 04521B0F v_rcp_f32_e32 v32, v37 ; 7E405525 v_sub_f32_e32 v13, 1.0, v34 ; 081A44F2 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 v_mac_f32_e32 v13, v14, v34 ; 3E1A450E v_subrev_f32_e32 v14, v35, v39 ; 0A1C4F23 v_mul_f32_e32 v14, v32, v14 ; 101C1D20 v_add_f32_e64 v20, 0, v14 clamp ; D2060814 00021C80 v_mac_f32_e32 v35, v37, v20 ; 3E462925 v_mad_f32 v22, v16, v22, v23 ; D2820016 045E2D10 v_sub_f32_e32 v14, 1.0, v35 ; 081C46F2 v_mul_f32_e32 v14, v23, v14 ; 101C1D17 v_mac_f32_e32 v14, v22, v35 ; 3E1C4716 v_mov_b32_e32 v22, s29 ; 7E2C021D v_mad_f32 v22, s31, v41, v22 ; D2820016 045A521F v_mad_f32 v23, v15, v24, v27 ; D2820017 046E310F v_sub_f32_e32 v15, s29, v22 ; 081E2C1D v_mad_f32 v15, 2.0, v15, s31 ; D282000F 007E1EF4 v_mac_f32_e32 v22, v15, v17 ; 3E2C230F v_sub_f32_e32 v15, 1.0, v22 ; 081E2CF2 v_mul_f32_e32 v15, v27, v15 ; 101E1F1B v_mac_f32_e32 v15, v23, v22 ; 3E1E2D17 v_mov_b32_e32 v22, s30 ; 7E2C021E v_mad_f32 v22, s32, v33, v22 ; D2820016 045A4220 v_mad_f32 v23, v16, v28, v29 ; D2820017 04763910 v_sub_f32_e32 v16, s30, v22 ; 08202C1E v_mad_f32 v16, 2.0, v16, s32 ; D2820010 008220F4 v_mac_f32_e32 v22, v16, v20 ; 3E2C2910 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_mul_f32_e32 v16, v29, v16 ; 1020211D v_log_f32_e32 v24, v25 ; 7E304F19 v_log_f32_e32 v25, v26 ; 7E324F1A v_log_f32_e32 v18, v18 ; 7E244F12 v_mac_f32_e32 v16, v23, v22 ; 3E202D17 v_mul_legacy_f32_e32 v22, v21, v24 ; 0E2C3115 v_mul_legacy_f32_e32 v23, v21, v25 ; 0E2E3315 v_mul_legacy_f32_e32 v18, v21, v18 ; 0E242515 v_exp_f32_e32 v21, v22 ; 7E2A4B16 v_cndmask_b32_e32 v21, v30, v21 ; 002A2B1E v_exp_f32_e32 v22, v23 ; 7E2C4B17 v_cndmask_b32_e64 v22, v31, v22, s[0:1] ; D2000016 00022D1F v_exp_f32_e32 v18, v18 ; 7E244B12 v_cndmask_b32_e64 v18, v42, v18, s[2:3] ; D2000012 000A252A v_mac_f32_e32 v9, s28, v21 ; 3E122A1C v_mac_f32_e32 v10, s28, v22 ; 3E142C1C v_mac_f32_e32 v11, s28, v18 ; 3E16241C s_buffer_load_dword s29, s[24:27], 0x52 ; C20E9952 s_buffer_load_dword s30, s[24:27], 0x5b ; C20F195B s_buffer_load_dword s0, s[24:27], 0x5d ; C200195D s_buffer_load_dword s32, s[24:27], 0x80 ; C2101980 s_buffer_load_dword s31, s[24:27], 0x81 ; C20F9981 s_buffer_load_dword s2, s[24:27], 0x82 ; C2011982 s_buffer_load_dword s3, s[24:27], 0x83 ; C2019983 s_buffer_load_dword s35, s[24:27], 0x84 ; C2119984 s_buffer_load_dword s33, s[24:27], 0x85 ; C2109985 s_buffer_load_dword s10, s[24:27], 0x86 ; C2051986 s_buffer_load_dword s11, s[24:27], 0x87 ; C2059987 s_buffer_load_dword s36, s[24:27], 0x88 ; C2121988 s_buffer_load_dword s34, s[24:27], 0x89 ; C2111989 s_buffer_load_dword s8, s[24:27], 0x8a ; C204198A s_buffer_load_dword s28, s[24:27], 0x8b ; C20E198B s_buffer_load_dword s38, s[24:27], 0x8c ; C213198C s_buffer_load_dword s37, s[24:27], 0x8d ; C212998D s_buffer_load_dword s9, s[24:27], 0x8e ; C204998E s_buffer_load_dword s24, s[24:27], 0x8f ; C20C198F v_mov_b32_e32 v18, s13 ; 7E24020D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v25, 0, s29, -v18 ; D2820019 84483A80 v_mov_b32_e32 v18, s12 ; 7E24020C v_mad_f32 v26, 0, -s29, v18 ; D282001A 44483A80 v_mul_f32_e32 v27, s12, v19 ; 1036260C v_mac_f32_e64 v27, 0, s13 ; D23E001B 00001A80 v_mov_b32_e32 v18, 0x3e22f983 ; 7E2402FF 3E22F983 v_mul_f32_e32 v3, v18, v3 ; 10060712 v_mul_f32_e32 v4, v18, v4 ; 10080912 v_fract_f32_e32 v3, v3 ; 7E064103 v_fract_f32_e32 v24, v4 ; 7E304104 v_cos_f32_e32 v28, v3 ; 7E386D03 v_sin_f32_e32 v4, v3 ; 7E086B03 v_mul_f32_e32 v3, v25, v4 ; 10060919 v_mad_f32 v18, 0, v28, -v3 ; D2820012 840E3880 v_mul_f32_e32 v3, v26, v4 ; 1006091A v_mad_f32 v19, 0, v28, -v3 ; D2820013 840E3880 s_buffer_load_dword s1, s[20:23], 0x12 ; C2009512 v_cos_f32_e32 v29, v24 ; 7E3A6D18 v_mad_f32 v21, -v4, v27, v28 ; D2820015 24723704 v_sub_f32_e32 v3, 1.0, v29 ; 08063AF2 v_mul_f32_e32 v22, v21, v21 ; 102C2B15 v_mul_f32_e32 v22, v22, v3 ; 102C0716 v_mul_f32_e32 v23, v18, v18 ; 102E2512 v_mul_f32_e32 v30, v23, v3 ; 103C0717 v_mul_f32_e32 v23, v19, v19 ; 102E2713 v_mul_f32_e32 v23, v23, v3 ; 102E0717 v_sin_f32_e32 v24, v24 ; 7E306B18 v_mac_f32_e32 v22, 1.0, v29 ; 3E2C3AF2 v_mac_f32_e32 v30, 1.0, v29 ; 3E3C3AF2 v_mac_f32_e32 v23, 1.0, v29 ; 3E2E3AF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[26:27], 0, s1 ; D002001A 00000280 v_mov_b32_e32 v31, s0 ; 7E3E0200 v_mov_b32_e32 v3, s19 ; 7E060213 s_and_saveexec_b64 s[26:27], s[26:27] ; BE9A241A s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v32, s14, v0 ; 0840000E v_sub_f32_e32 v33, s15, v1 ; 0842020F v_sub_f32_e32 v34, s16, v2 ; 08440410 v_mul_f32_e32 v32, v32, v32 ; 10404120 s_buffer_load_dword s0, s[20:23], 0x1 ; C2001501 s_buffer_load_dword s1, s[20:23], 0x2 ; C2009502 s_buffer_load_dword s19, s[20:23], 0x0 ; C2099500 s_buffer_load_dword s25, s[20:23], 0x3 ; C20C9503 v_mac_f32_e32 v32, v33, v33 ; 3E404321 v_mac_f32_e32 v32, v34, v34 ; 3E404522 v_sqrt_f32_e32 v32, v32 ; 7E406720 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v33, s0 ; 7E420200 v_sub_f32_e32 v33, s1, v33 ; 08424201 v_mul_f32_e32 v33, v32, v33 ; 10424320 v_rcp_f32_e32 v33, v33 ; 7E425521 v_mul_f32_e32 v34, s0, v32 ; 10444000 v_mad_f32 v35, -v32, s0, v6 ; D2820023 24180120 v_mad_f32 v33, -v35, v33, 1.0 ; D2820021 23CA4323 v_cmp_lt_f32_e32 vcc, v34, v6 ; 7C020D22 v_mul_f32_e32 v34, s1, v32 ; 10444001 v_mul_f32_e32 v35, s19, v32 ; 10464013 v_max_f32_e32 v35, v35, v6 ; 20460D23 v_cmp_lt_f32_e64 s[0:1], v34, v6 ; D0020000 00020D22 v_mul_f32_e32 v6, v9, v33 ; 100C4309 v_mul_f32_e32 v34, v10, v33 ; 1044430A v_mul_f32_e32 v36, v11, v33 ; 1048430B v_mul_f32_e32 v33, v12, v33 ; 1042430C v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_cndmask_b32_e64 v34, v34, 0, s[0:1] ; D2000022 00010122 v_cndmask_b32_e64 v36, v36, 0, s[0:1] ; D2000024 00010124 v_cndmask_b32_e64 v33, v33, 0, s[0:1] ; D2000021 00010121 v_cndmask_b32_e32 v9, v9, v6 ; 00120D09 v_cndmask_b32_e32 v10, v10, v34 ; 0014450A v_cndmask_b32_e32 v11, v11, v36 ; 0016490B v_cndmask_b32_e32 v12, v12, v33 ; 0018430C v_mul_f32_e32 v6, s25, v32 ; 100C4019 v_min_f32_e32 v6, v6, v35 ; 1E0C4706 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E v_mad_f32 v17, -2.0, v17, 1.0 ; D2820011 03CA22F5 v_mad_f32 v7, v17, v7, s17 ; D2820007 00460F11 v_mad_f32 v17, -2.0, v20, 1.0 ; D2820011 03CA28F5 v_mad_f32 v8, v17, v8, s18 ; D2820008 004A1111 v_mul_f32_e32 v17, v25, v28 ; 10223919 v_mul_f32_e32 v20, v26, v28 ; 1028391A v_mac_f32_e32 v17, 0, v4 ; 3E220880 v_mac_f32_e32 v20, 0, v4 ; 3E280880 v_mac_f32_e32 v4, v27, v28 ; 3E08391B v_sub_f32_e32 v25, 1.0, v29 ; 08323AF2 v_mul_f32_e32 v26, v30, v17 ; 1034231E v_subrev_f32_e32 v27, s30, v31 ; 0A363E1E v_rcp_f32_e32 v27, v27 ; 7E36551B s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_mov_b32_e32 v28, 0x3b808083 ; 7E3802FF 3B808083 v_cmp_gt_f32_e32 vcc, v28, v12 ; 7C08191C v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mul_f32_e32 v28, v19, v18 ; 10382513 v_mul_f32_e32 v29, v21, v24 ; 103A3115 v_mad_f32 v30, v25, v28, -v29 ; D282001E 84763919 v_mac_f32_e32 v29, v28, v25 ; 3E3A331C v_mul_f32_e32 v28, v21, v18 ; 10382515 v_mul_f32_e32 v31, v19, v24 ; 103E3113 v_mad_f32 v32, v28, v25, v31 ; D2820020 047E331C v_mad_f32 v28, v25, v28, -v31 ; D282001C 847E3919 v_mul_f32_e32 v24, v18, v24 ; 10303112 v_mul_f32_e32 v31, v21, v19 ; 103E2715 v_mad_f32 v33, v31, v25, -v24 ; D2820021 8462331F v_mac_f32_e32 v24, v25, v31 ; 3E303F19 v_mac_f32_e32 v26, v30, v20 ; 3E34291E v_mul_f32_e32 v25, v29, v17 ; 1032231D v_mac_f32_e32 v25, v23, v20 ; 3E322917 v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mac_f32_e32 v17, v24, v20 ; 3E222918 v_mac_f32_e32 v26, v32, v4 ; 3E340920 v_mac_f32_e32 v25, v33, v4 ; 3E320921 v_mac_f32_e32 v17, v22, v4 ; 3E220916 v_mul_f32_e32 v4, v18, v8 ; 10081112 v_mul_f32_e32 v18, v19, v8 ; 10241113 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v4, v7, v26 ; 3E083507 v_mac_f32_e32 v18, v7, v25 ; 3E243307 v_mov_b32_e32 v19, 0 ; 7E260280 v_mac_f32_e32 v8, v7, v17 ; 3E102307 v_mov_b32_e32 v20, v19 ; 7E280313 v_mad_f32 v0, v4, v6, v0 ; D2820000 04020D04 v_mad_f32 v1, v18, v6, v1 ; D2820001 04060D12 v_mac_f32_e32 v2, v8, v6 ; 3E040D08 v_mov_b32_e32 v21, v19 ; 7E2A0313 v_mov_b32_e32 v4, 0x33d6bf95 ; 7E0802FF 33D6BF95 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[40:47], s[20:23] ; F0900700 00AA0613 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v4, v6 ; 3E000D04 v_mac_f32_e32 v1, v4, v7 ; 3E020F04 v_mac_f32_e32 v2, v4, v8 ; 3E041104 exp 15, 32, 0, 0, 0, v19, v19, v19, v19 ; F800020F 13131313 exp 15, 33, 0, 0, 0, v13, v14, v15, v16 ; F800021F 100F0E0D exp 15, 34, 0, 0, 0, v9, v10, v11, v12 ; F800022F 0C0B0A09 v_mul_f32_e32 v4, s35, v1 ; 10080223 v_mac_f32_e32 v4, s32, v0 ; 3E080020 v_mac_f32_e32 v4, s36, v2 ; 3E080424 v_add_f32_e32 v4, s38, v4 ; 06080826 v_mul_f32_e32 v6, s33, v1 ; 100C0221 v_mac_f32_e32 v6, s31, v0 ; 3E0C001F v_mac_f32_e32 v6, s34, v2 ; 3E0C0422 v_add_f32_e32 v6, s37, v6 ; 060C0C25 v_subrev_f32_e32 v7, s14, v0 ; 0A0E000E v_subrev_f32_e32 v8, s15, v1 ; 0A10020F s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v9, s16, v2 ; 0A120410 v_mul_f32_e32 v10, s12, v7 ; 10140E0C v_mac_f32_e32 v10, s13, v8 ; 3E14100D v_mac_f32_e32 v10, s29, v9 ; 3E14121D v_subrev_f32_e32 v11, s30, v10 ; 0A16141E v_mul_f32_e32 v11, v27, v11 ; 1016171B v_mul_f32_e32 v12, s11, v1 ; 1018020B v_mac_f32_e32 v12, s3, v0 ; 3E180003 v_mac_f32_e32 v12, s28, v2 ; 3E18041C v_add_f32_e32 v12, s24, v12 ; 06181818 exp 15, 35, 0, 0, 0, v4, v6, v11, v12 ; F800023F 0C0B0604 exp 15, 36, 0, 0, 0, v0, v1, v2, v10 ; F800024F 0A020100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mac_f32_e32 v10, v8, v8 ; 3E141108 v_mac_f32_e32 v10, v9, v9 ; 3E141309 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, s10, v1 ; 1016020A v_mac_f32_e32 v11, s2, v0 ; 3E160002 v_mac_f32_e32 v11, s8, v2 ; 3E160408 v_mad_f32 v2, s29, v3, v2 ; D2820002 040A061D v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_xor_b32_e32 v7, v7, v10 ; 3A0E1507 v_xor_b32_e32 v8, v8, v10 ; 3A101508 v_xor_b32_e32 v9, v9, v10 ; 3A121509 exp 15, 37, 0, 0, 0, v7, v8, v9, v19 ; F800025F 13090807 v_mad_f32 v0, s12, v3, v0 ; D2820000 0402060C v_mad_f32 v1, s13, v3, v1 ; D2820001 0406060D v_xor_b32_e32 v3, v6, v10 ; 3A061506 v_mul_f32_e32 v6, s10, v1 ; 100C020A v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mac_f32_e32 v1, s3, v0 ; 3E020003 v_mac_f32_e32 v1, s28, v2 ; 3E02041C v_add_f32_e32 v1, s24, v1 ; 06020218 v_rcp_f32_e32 v1, v1 ; 7E025501 v_mac_f32_e32 v6, s2, v0 ; 3E0C0002 v_mac_f32_e32 v6, s8, v2 ; 3E0C0408 v_add_f32_e32 v0, s9, v6 ; 06000C09 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_add_f32_e32 v1, s9, v11 ; 06021609 v_min_f32_e32 v1, 0x3a83126f, v1 ; 1E0202FF 3A83126F v_max_f32_e32 v0, v1, v0 ; 20000101 exp 15, 38, 0, 0, 0, v5, v19, v19, v19 ; F800026F 13131305 v_mad_f32 v0, 2.0, v0, -v12 ; D2820000 843200F4 exp 15, 12, 0, 0, 0, v4, v3, v0, v12 ; F80000CF 0C000304 exp 15, 13, 0, 1, 0, v19, v19, v19, v19 ; F80008DF 13131313 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 48 Code Size: 2088 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..4], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.0000} IMM[1] UINT32 {0, 12, 28, 44} IMM[2] UINT32 {60, 16, 32, 48} IMM[3] UINT32 {4, 20, 36, 52} IMM[4] UINT32 {8, 24, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].w, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[0].z, IN[0].zzzz 7: MOV TEMP[2].x, CONST[1][0].wwww 8: MOV TEMP[2].y, CONST[1][1].wwww 9: MOV TEMP[2].z, CONST[1][2].wwww 10: MOV TEMP[2].w, CONST[1][3].wwww 11: DP4 TEMP[2].x, TEMP[0], TEMP[2] 12: MOV TEMP[3].x, CONST[1][0].xxxx 13: MOV TEMP[3].y, CONST[1][1].xxxx 14: MOV TEMP[3].z, CONST[1][2].xxxx 15: MOV TEMP[3].w, CONST[1][3].xxxx 16: DP4 TEMP[3].x, TEMP[0], TEMP[3] 17: MOV TEMP[4].x, CONST[1][0].yyyy 18: MOV TEMP[4].y, CONST[1][1].yyyy 19: MOV TEMP[4].z, CONST[1][2].yyyy 20: MOV TEMP[4].w, CONST[1][3].yyyy 21: DP4 TEMP[4].x, TEMP[0], TEMP[4] 22: MOV TEMP[3].y, -TEMP[4].xxxx 23: MOV TEMP[4].x, CONST[1][0].zzzz 24: MOV TEMP[4].y, CONST[1][1].zzzz 25: MOV TEMP[4].z, CONST[1][2].zzzz 26: MOV TEMP[4].w, CONST[1][3].zzzz 27: DP4 TEMP[0].x, TEMP[0], TEMP[4] 28: MAD TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz, -TEMP[2].xxxx 29: MOV TEMP[3].z, TEMP[0].xxxx 30: MOV TEMP[3].w, TEMP[2].xxxx 31: MOV OUT[1], TEMP[1] 32: MOV OUT[0], TEMP[3] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = fmul float %33, %16 %37 = fmul float %34, %20 %38 = fadd float %36, %37 %39 = fmul float %35, %24 %40 = fadd float %38, %39 %41 = fadd float %40, %28 %42 = fmul float %33, %13 %43 = fmul float %34, %17 %44 = fadd float %42, %43 %45 = fmul float %35, %21 %46 = fadd float %44, %45 %47 = fadd float %46, %25 %48 = fmul float %33, %14 %49 = fmul float %34, %18 %50 = fadd float %48, %49 %51 = fmul float %35, %22 %52 = fadd float %50, %51 %53 = fadd float %52, %26 %54 = fsub float -0.000000e+00, %53 %55 = fmul float %33, %15 %56 = fmul float %34, %19 %57 = fadd float %55, %56 %58 = fmul float %35, %23 %59 = fadd float %57, %58 %60 = fadd float %59, %27 %61 = fmul float %60, 2.000000e+00 %62 = fsub float %61, %41 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %47, float %54, float %62, float %41) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_buffer_load_dword s12, s[0:3], 0x3 ; C2060103 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_mul_f32_e32 v0, s6, v3 ; 10000606 v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mul_f32_e32 v5, s13, v3 ; 100A060D v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s4, v3 ; 100C0604 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mac_f32_e32 v3, s11, v2 ; 3E06040B v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v5, s7, v4 ; 3E0A0807 v_mac_f32_e32 v6, s8, v4 ; 3E0C0808 v_mac_f32_e32 v3, s14, v4 ; 3E06080E v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v2, s16, v5 ; 06040A10 v_add_f32_e32 v4, s17, v6 ; 06080C11 v_add_f32_e32 v3, s18, v3 ; 06060612 v_xor_b32_e32 v4, 0x80000000, v4 ; 3A0808FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 12, 0, 0, 0, v2, v4, v3, v0 ; F80000CF 00030402 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 204 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyyx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2 v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 28 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyyx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2 v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 28 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[4] DCL CONST[1][0..8] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..6], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, -0.5000, 2.0000} IMM[1] UINT32 {3, 400, 304, 0} IMM[2] UINT32 {320, 12, 64, 60} IMM[3] FLT32 { 1.0000, 3.0000, 0.0000, 0.0000} IMM[4] UINT32 {36, 28, 16, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: ADD TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4][25].xyyy 4: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[4][19].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].xyz, TEMP[3], SAMP[1], 2D 11: MOV TEMP[4].xy, TEMP[1].xyyy 12: MOV TEMP[4].w, IMM[0].yyyy 13: TXL TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz 15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww 16: MOV TEMP[1].xy, TEMP[1].xyyy 17: MOV TEMP[1].w, IMM[0].yyyy 18: TXL TEMP[1].x, TEMP[1], SAMP[0], 2D 19: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[2].xyzz 20: RCP TEMP[5].xyz, TEMP[5].xxxx 21: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 22: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[4][19].xyzz 23: ADD TEMP[1].xyz, CONST[1][0].xyzz, -TEMP[1].xyzz 24: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 25: RSQ TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[2].xxxx 27: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 28: SQRT TEMP[1].x, TEMP[1].xxxx 29: ADD TEMP[1].x, CONST[1][0].wwww, -TEMP[1].xxxx 30: FSLT TEMP[5].x, TEMP[1].xxxx, IMM[0].yyyy 31: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 32: KILL_IF -TEMP[5].xxxx 33: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[2].xyzz 34: FSLT TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy 35: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 36: KILL_IF -TEMP[5].xxxx 37: MUL TEMP[5].x, CONST[1][4].xxxx, TEMP[2].yyyy 38: MAD TEMP[5].x, CONST[1][3].wwww, TEMP[2].xxxx, -TEMP[5].xxxx 39: MUL TEMP[6].x, CONST[1][4].xxxx, TEMP[2].xxxx 40: MAD TEMP[2].x, CONST[1][3].wwww, TEMP[2].yyyy, TEMP[6].xxxx 41: MOV TEMP[5].y, TEMP[2].xxxx 42: RCP TEMP[2].x, CONST[1][0].wwww 43: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 44: RCP TEMP[2].x, CONST[1][2].yyyy 45: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[1].x, TEMP[1].xxxx 47: MUL TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 48: ADD TEMP[2].x, IMM[3].yyyy, -TEMP[2].xxxx 49: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 50: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 51: MUL TEMP[1].xyz, CONST[1][1].wwww, TEMP[1].xxxx 52: MAD TEMP[2].xy, IMM[0].xxxx, TEMP[5].xyyy, IMM[0].xxxx 53: MOV TEMP[2].xy, TEMP[2].xyyy 54: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 55: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 56: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[2].xyzz 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 58: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 59: MOV TEMP[2].w, IMM[3].xxxx 60: MOV TEMP[2].x, TEMP[1].xxxx 61: MOV TEMP[2].y, TEMP[1].yyyy 62: MOV TEMP[2].z, TEMP[1].zzzz 63: MOV OUT[0], TEMP[2] 64: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 4) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 8) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 12) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 24) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 28) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %27, i32 64) %39 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = call float @llvm.SI.load.const(<16 x i8> %40, i32 304) %42 = call float @llvm.SI.load.const(<16 x i8> %40, i32 308) %43 = call float @llvm.SI.load.const(<16 x i8> %40, i32 312) %44 = call float @llvm.SI.load.const(<16 x i8> %40, i32 320) %45 = call float @llvm.SI.load.const(<16 x i8> %40, i32 324) %46 = call float @llvm.SI.load.const(<16 x i8> %40, i32 328) %47 = call float @llvm.SI.load.const(<16 x i8> %40, i32 400) %48 = call float @llvm.SI.load.const(<16 x i8> %40, i32 404) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %74 = fmul float %24, %15 %75 = fadd float %74, %25 %76 = fadd float %14, 5.000000e-01 %77 = fadd float %75, 5.000000e-01 %78 = fmul float %76, %47 %79 = fmul float %77, %48 %80 = fsub float %71, %41 %81 = fsub float %72, %42 %82 = fsub float %73, %43 %83 = fmul float %80, %80 %84 = fmul float %81, %81 %85 = fadd float %84, %83 %86 = fmul float %82, %82 %87 = fadd float %85, %86 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = fmul float %80, %88 %90 = fmul float %81, %88 %91 = fmul float %82, %88 %92 = bitcast float %78 to i32 %93 = bitcast float %79 to i32 %94 = insertelement <4 x i32> undef, i32 %92, i32 0 %95 = insertelement <4 x i32> %94, i32 %93, i32 1 %96 = insertelement <4 x i32> %95, i32 0, i32 2 %97 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %55, <16 x i8> %58, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = bitcast float %78 to i32 %102 = bitcast float %79 to i32 %103 = insertelement <4 x i32> undef, i32 %101, i32 0 %104 = insertelement <4 x i32> %103, i32 %102, i32 1 %105 = insertelement <4 x i32> %104, i32 0, i32 2 %106 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %105, <32 x i8> %61, <16 x i8> %64, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fadd float %107, -5.000000e-01 %111 = fadd float %108, -5.000000e-01 %112 = fadd float %109, -5.000000e-01 %113 = fmul float %110, 2.000000e+00 %114 = fmul float %111, 2.000000e+00 %115 = fmul float %112, 2.000000e+00 %116 = bitcast float %78 to i32 %117 = bitcast float %79 to i32 %118 = insertelement <4 x i32> undef, i32 %116, i32 0 %119 = insertelement <4 x i32> %118, i32 %117, i32 1 %120 = insertelement <4 x i32> %119, i32 0, i32 2 %121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %120, <32 x i8> %50, <16 x i8> %52, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = fmul float %44, %89 %124 = fmul float %45, %90 %125 = fadd float %124, %123 %126 = fmul float %46, %91 %127 = fadd float %125, %126 %128 = fdiv float 1.000000e+00, %127 %129 = fmul float %89, %128 %130 = fmul float %90, %128 %131 = fmul float %91, %128 %132 = fmul float %122, %129 %133 = fadd float %132, %41 %134 = fmul float %122, %130 %135 = fadd float %134, %42 %136 = fmul float %122, %131 %137 = fadd float %136, %43 %138 = fsub float %28, %133 %139 = fsub float %29, %135 %140 = fsub float %30, %137 %141 = fmul float %138, %138 %142 = fmul float %139, %139 %143 = fadd float %142, %141 %144 = fmul float %140, %140 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %138, %146 %148 = fmul float %139, %146 %149 = fmul float %140, %146 %150 = fmul float %138, %138 %151 = fmul float %139, %139 %152 = fadd float %151, %150 %153 = fmul float %140, %140 %154 = fadd float %152, %153 %155 = call float @llvm.sqrt.f32(float %154) %156 = fsub float %31, %155 %157 = fcmp olt float %156, 0.000000e+00 %158 = select i1 %157, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %158) %159 = fmul float %113, %147 %160 = fmul float %114, %148 %161 = fadd float %160, %159 %162 = fmul float %115, %149 %163 = fadd float %161, %162 %164 = fcmp olt float %163, 0.000000e+00 %165 = select i1 %164, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %165) %166 = fmul float %38, %148 %167 = fmul float %37, %147 %168 = fsub float %167, %166 %169 = fmul float %38, %147 %170 = fmul float %37, %148 %171 = fadd float %170, %169 %172 = fdiv float 1.000000e+00, %31 %173 = fmul float %156, %172 %174 = fdiv float 1.000000e+00, %36 %175 = fmul float %173, %174 %176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %177 = fmul float %176, 2.000000e+00 %178 = fsub float 3.000000e+00, %177 %179 = fmul float %176, %178 %180 = fmul float %176, %179 %181 = fmul float %35, %180 %182 = fmul float %35, %180 %183 = fmul float %35, %180 %184 = fmul float %168, 5.000000e-01 %185 = fadd float %184, 5.000000e-01 %186 = fmul float %171, 5.000000e-01 %187 = fadd float %186, 5.000000e-01 %188 = bitcast float %185 to i32 %189 = bitcast float %187 to i32 %190 = insertelement <2 x i32> undef, i32 %188, i32 0 %191 = insertelement <2 x i32> %190, i32 %189, i32 1 %192 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %191, <32 x i8> %67, <16 x i8> %70, i32 2) %193 = extractelement <4 x float> %192, i32 0 %194 = extractelement <4 x float> %192, i32 1 %195 = extractelement <4 x float> %192, i32 2 %196 = fmul float %193, %98 %197 = fmul float %194, %99 %198 = fmul float %195, %100 %199 = fmul float %32, %196 %200 = fmul float %33, %197 %201 = fmul float %34, %198 %202 = fmul float %199, %163 %203 = fmul float %200, %163 %204 = fmul float %201, %163 %205 = fmul float %181, %202 %206 = fmul float %182, %203 %207 = fmul float %183, %204 %208 = call i32 @llvm.SI.packf16(float %205, float %206) %209 = bitcast i32 %208 to float %210 = call i32 @llvm.SI.packf16(float %207, float 1.000000e+00) %211 = bitcast i32 %210 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %209, float %211, float %209, float %211) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10 s_buffer_load_dword s10, s[12:15], 0x11 ; C2050D11 s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s9, s[16:19], 0x4c ; C204914C s_buffer_load_dword s11, s[16:19], 0x4d ; C205914D s_buffer_load_dword s12, s[16:19], 0x4e ; C206114E s_buffer_load_dword s13, s[16:19], 0x50 ; C2069150 s_buffer_load_dword s14, s[16:19], 0x51 ; C2071151 s_buffer_load_dword s15, s[16:19], 0x52 ; C2079152 s_buffer_load_dword s20, s[16:19], 0x64 ; C20A1164 s_buffer_load_dword s16, s[16:19], 0x65 ; C2081165 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s10 ; 7E02020A v_mac_f32_e32 v1, s8, v3 ; 3E020608 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_subrev_f32_e32 v3, s9, v4 ; 0A060809 v_subrev_f32_e32 v4, s11, v5 ; 0A080A0B v_subrev_f32_e32 v0, s12, v0 ; 0A00000C v_mul_f32_e32 v5, v3, v3 ; 100A0703 v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_mul_f32_e32 v6, s20, v2 ; 100C0414 v_mul_f32_e32 v7, s16, v1 ; 100E0210 v_mul_f32_e32 v1, v5, v3 ; 10020705 v_mul_f32_e32 v2, v5, v4 ; 10040905 v_mul_f32_e32 v3, s13, v1 ; 1006020D s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700 v_mac_f32_e32 v3, s14, v2 ; 3E06040E v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mac_f32_e32 v3, s15, v0 ; 3E06000F v_mov_b32_e32 v8, 0 ; 7E100280 v_rcp_f32_e32 v3, v3 ; 7E065503 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C s_load_dwordx8 s[56:63], s[6:7], 0x18 ; C0DC0718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[28:35], s[20:23] ; F0900700 00A70906 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[36:43], s[24:27] ; F0900700 00C90C06 image_sample_l v4, 1, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[44:51], s[16:19] ; F0900100 008B0406 v_mul_f32_e32 v1, v3, v1 ; 10020303 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v1, s9 ; D2820001 00260304 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v2, v4, v2, s11 ; D2820002 002E0504 v_mad_f32 v0, v4, v0, s12 ; D2820000 00320104 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s4, v1 ; 08020204 v_sub_f32_e32 v2, s5, v2 ; 08040405 v_sub_f32_e32 v0, s6, v0 ; 08000006 v_mul_f32_e32 v3, v1, v1 ; 10060301 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v4, v3 ; 7E085903 v_add_f32_e32 v5, -0.5, v12 ; 060A18F1 v_add_f32_e32 v6, -0.5, v13 ; 060C1AF1 v_add_f32_e32 v7, -0.5, v14 ; 060E1CF1 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_sqrt_f32_e32 v3, v3 ; 7E066703 v_sub_f32_e32 v3, s7, v3 ; 08060607 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v4, 0, -1.0, vcc ; D2000004 01A9E680 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_add_f32_e32 v6, v6, v6 ; 060C0D06 v_add_f32_e32 v7, v7, v7 ; 060E0F07 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_buffer_load_dword s5, s[0:3], 0xf ; C202810F v_mul_f32_e32 v4, v1, v5 ; 10080B01 v_mac_f32_e32 v4, v2, v6 ; 3E080D02 v_mac_f32_e32 v4, v0, v7 ; 3E080F00 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v2 ; 10000404 v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mad_f32 v0, s5, v1, -v0 ; D2820000 84020205 v_mac_f32_e32 v5, s5, v2 ; 3E0A0405 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v1, 0.5, v5, 0.5 ; D2820001 03C20AF0 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[56:63], s[52:55] ; F0800700 01AE0000 v_rcp_f32_e32 v5, s7 ; 7E0A5407 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 v_rcp_f32_e32 v6, s6 ; 7E0C5406 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_madak_f32_e32 v5, -2.0, v3, 0x40400000 ; 420A06F5 40400000 v_mul_f32_e32 v5, v5, v3 ; 100A0705 v_mul_f32_e32 v3, v5, v3 ; 10060705 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v0, s8, v0 ; 10000008 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mul_f32_e32 v2, v11, v2 ; 1004050B v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 16 Code Size: 612 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..15], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 4095.9399, 2.0000, -2.0000, 1.0000} IMM[3] FLT32 { 0.0774, 0.9479, 0.0521, 2.4000} IMM[4] FLT32 { 0.0404, 0.0039, 0.0000, 0.0010} IMM[5] UINT32 {112, 3, 320, 336} IMM[6] UINT32 {48, 304, 64, 512} IMM[7] UINT32 {528, 544, 560, 516} IMM[8] UINT32 {532, 548, 564, 524} IMM[9] UINT32 {540, 556, 572, 364} IMM[10] UINT32 {372, 520, 536, 552} IMM[11] UINT32 {568, 0, 0, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MOV TEMP[4].xy, TEMP[4].xyyy 21: MOV TEMP[4].w, IMM[0].xxxx 22: TXL TEMP[4].yz, TEMP[4], SAMP[0], 2D 23: MUL TEMP[5].x, TEMP[4].yyyy, IMM[2].xxxx 24: RCP TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][0].yyyy 26: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 27: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].zzzz 28: MUL TEMP[5].x, TEMP[5].xxxx, CONST[5][2].xxxx 29: FRC TEMP[6].x, TEMP[5].xxxx 30: FLR TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy 32: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].zzzz 33: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 34: RCP TEMP[8].x, CONST[1][0].yyyy 35: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 36: FLR TEMP[8].x, TEMP[8].xxxx 37: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 38: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 39: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 40: UIF TEMP[4].xxxx :0 41: MOV TEMP[4].x, TEMP[7].xxxx 42: ELSE :0 43: MOV TEMP[4].x, TEMP[5].xxxx 44: ENDIF 45: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 46: MOV TEMP[5].x, TEMP[4].xxxx 47: MOV TEMP[5].y, TEMP[2].xxxx 48: MOV TEMP[5].xy, TEMP[5].xyyy 49: MOV TEMP[5].w, IMM[0].xxxx 50: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 51: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 52: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 53: MOV TEMP[7].x, TEMP[4].xxxx 54: MOV TEMP[7].y, TEMP[6].xxxx 55: MOV TEMP[7].xy, TEMP[7].xyyy 56: MOV TEMP[7].w, IMM[0].xxxx 57: TXL TEMP[7], TEMP[7], SAMP[0], 2D 58: MOV TEMP[8].x, TEMP[4].xxxx 59: MAD TEMP[2].x, IMM[2].yyyy, CONST[1][0].zzzz, TEMP[2].xxxx 60: MOV TEMP[8].y, TEMP[2].xxxx 61: MOV TEMP[2].xy, TEMP[8].xyyy 62: MOV TEMP[2].w, IMM[0].xxxx 63: TXL TEMP[2], TEMP[2], SAMP[0], 2D 64: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 65: MOV TEMP[0].y, TEMP[6].xxxx 66: MOV TEMP[0].xy, TEMP[0].xyyy 67: MOV TEMP[0].w, IMM[0].xxxx 68: TXL TEMP[0], TEMP[0], SAMP[0], 2D 69: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 70: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 71: RCP TEMP[8].x, TEMP[4].xxxx 72: RCP TEMP[8].y, TEMP[4].yyyy 73: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 74: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 75: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 76: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 77: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 78: RCP TEMP[9].x, TEMP[4].xxxx 79: RCP TEMP[9].y, TEMP[4].yyyy 80: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 81: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 82: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 83: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 84: MOV TEMP[8].y, IMM[0].xxxx 85: MOV TEMP[8].x, TEMP[5].xxxx 86: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 87: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 88: MOV TEMP[10].x, TEMP[9].xxxx 89: MOV TEMP[10].y, TEMP[9].xxxx 90: MOV TEMP[10].z, TEMP[5].xxxx 91: MOV TEMP[10].w, TEMP[5].xxxx 92: ADD TEMP[5], TEMP[3], IMM[0].zzzz 93: RCP TEMP[9].x, CONST[5][2].zzzz 94: MOV_SAT TEMP[9].x, TEMP[9].xxxx 95: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 96: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 97: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 98: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 99: MOV TEMP[10].x, TEMP[5].xxxx 100: MOV TEMP[10].y, TEMP[9].xxxx 101: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 102: MOV TEMP[10].z, TEMP[11].xxxx 103: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 104: MOV TEMP[10].w, TEMP[3].xxxx 105: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 106: MAD TEMP[3], TEMP[3], IMM[2].yyyy, CONST[5][6].zwzw 107: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].x, TEMP[0].zzzz 110: ELSE :0 111: MOV TEMP[11].x, TEMP[0].xxxx 112: ENDIF 113: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 114: UIF TEMP[12].xxxx :0 115: MOV TEMP[12].x, TEMP[0].wwww 116: ELSE :0 117: MOV TEMP[12].x, TEMP[0].yyyy 118: ENDIF 119: MOV TEMP[0].x, TEMP[11].xxxx 120: MOV TEMP[0].y, TEMP[12].xxxx 121: MOV TEMP[5].x, TEMP[5].xxxx 122: MOV TEMP[5].y, TEMP[9].xxxx 123: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 124: RCP TEMP[5].x, TEMP[3].xxxx 125: RCP TEMP[5].y, TEMP[3].yyyy 126: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 127: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 128: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 129: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].yyyy 130: ADD TEMP[0].xy, IMM[2].wwww, -TEMP[0].xyyy 131: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 132: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 133: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 134: MOV TEMP[3].x, TEMP[5].xxxx 135: MOV TEMP[3].y, TEMP[5].yyyy 136: MOV TEMP[3].z, TEMP[2].xxxx 137: MOV TEMP[3].w, TEMP[2].yyyy 138: MUL TEMP[2].xyz, IN[2].xyzz, IMM[3].xxxx 139: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].yyyy, IMM[3].zzzz 140: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].wwww 141: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].wwww 142: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].wwww 143: FSLT TEMP[4].x, IMM[4].xxxx, IN[2].xxxx 144: UIF TEMP[4].xxxx :0 145: MOV TEMP[4].x, TEMP[5].xxxx 146: ELSE :0 147: MOV TEMP[4].x, TEMP[2].xxxx 148: ENDIF 149: FSLT TEMP[6].x, IMM[4].xxxx, IN[2].yyyy 150: UIF TEMP[6].xxxx :0 151: MOV TEMP[6].x, TEMP[5].yyyy 152: ELSE :0 153: MOV TEMP[6].x, TEMP[2].yyyy 154: ENDIF 155: FSLT TEMP[7].x, IMM[4].xxxx, IN[2].zzzz 156: UIF TEMP[7].xxxx :0 157: MOV TEMP[5].x, TEMP[5].zzzz 158: ELSE :0 159: MOV TEMP[5].x, TEMP[2].zzzz 160: ENDIF 161: MOV TEMP[2].x, TEMP[4].xxxx 162: MOV TEMP[2].y, TEMP[6].xxxx 163: MOV TEMP[2].z, TEMP[5].xxxx 164: MOV TEMP[2].w, IN[2].wwww 165: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 166: MUL TEMP[4].xyz, CONST[4][20].zxyy, CONST[4][21].yzxx 167: MAD TEMP[4].xyz, CONST[4][20].yzxx, CONST[4][21].zxyy, -TEMP[4].xyzz 168: COS TEMP[5].x, IN[4].xxxx 169: SIN TEMP[6].x, IN[4].xxxx 170: MUL TEMP[7].xyz, TEMP[5].xxxx, TEMP[4].xyzz 171: MAD TEMP[7].xyz, TEMP[6].xxxx, CONST[4][21].xyzz, TEMP[7].xyzz 172: MUL TEMP[4].xyz, TEMP[6].xxxx, TEMP[4].xyzz 173: MAD TEMP[4].xyz, TEMP[5].xxxx, CONST[4][21].xyzz, -TEMP[4].xyzz 174: SIN TEMP[5].x, IN[4].yyyy 175: COS TEMP[6].x, IN[4].yyyy 176: ADD TEMP[9].x, IMM[2].wwww, -TEMP[6].xxxx 177: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].yyyy 178: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[10].xxxx 179: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[4].zzzz 180: MUL TEMP[12].x, TEMP[4].xxxx, TEMP[4].zzzz 181: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx 182: MUL TEMP[13].x, TEMP[5].xxxx, TEMP[4].yyyy 183: MUL TEMP[14].x, TEMP[4].yyyy, TEMP[4].zzzz 184: MUL TEMP[9].x, TEMP[14].xxxx, TEMP[9].xxxx 185: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 186: MUL TEMP[14].x, TEMP[4].xxxx, TEMP[4].xxxx 187: LRP TEMP[14].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[14].xxxx 188: ADD TEMP[15].x, TEMP[10].xxxx, -TEMP[11].xxxx 189: MOV TEMP[14].y, TEMP[15].xxxx 190: ADD TEMP[15].x, TEMP[12].xxxx, TEMP[13].xxxx 191: MOV TEMP[14].z, TEMP[15].xxxx 192: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 193: MUL TEMP[11].x, TEMP[4].yyyy, TEMP[4].yyyy 194: LRP TEMP[11].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[11].xxxx 195: MOV TEMP[10].y, TEMP[11].xxxx 196: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[5].xxxx 197: MOV TEMP[10].z, TEMP[11].xxxx 198: ADD TEMP[11].x, TEMP[12].xxxx, -TEMP[13].xxxx 199: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 200: MOV TEMP[11].y, TEMP[5].xxxx 201: MUL TEMP[5].x, TEMP[4].zzzz, TEMP[4].zzzz 202: LRP TEMP[5].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[5].xxxx 203: MOV TEMP[11].z, TEMP[5].xxxx 204: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[14].xyzz 205: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[10].xyzz 206: MOV TEMP[5].y, TEMP[6].xxxx 207: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[11].xyzz 208: MOV TEMP[5].z, TEMP[6].xxxx 209: ADD TEMP[6].x, CONST[5][3].yyyy, TEMP[0].xxxx 210: ADD TEMP[0].x, CONST[5][3].zzzz, TEMP[0].yyyy 211: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz 212: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[0].xyzz 213: ADD TEMP[4].xyz, CONST[4][19].xyzz, -IN[3].xyzz 214: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 215: SQRT TEMP[4].x, TEMP[4].xxxx 216: MOV TEMP[5], TEMP[2] 217: MOV TEMP[6].x, IN[4].wwww 218: FSLT TEMP[7].x, IMM[0].xxxx, CONST[5][4].zzzz 219: UIF TEMP[7].xxxx :0 220: MUL TEMP[7].x, TEMP[4].xxxx, CONST[5][0].yyyy 221: FSLT TEMP[9].x, TEMP[7].xxxx, IN[4].wwww 222: ADD TEMP[7].x, IN[4].wwww, -TEMP[7].xxxx 223: ADD TEMP[10].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 224: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 225: RCP TEMP[10].x, TEMP[10].xxxx 226: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[10].xxxx 227: ADD TEMP[7].x, IMM[2].wwww, -TEMP[7].xxxx 228: MUL TEMP[7], TEMP[7].xxxx, TEMP[2] 229: MUL TEMP[10].x, TEMP[4].xxxx, CONST[5][0].zzzz 230: FSLT TEMP[10].x, TEMP[10].xxxx, IN[4].wwww 231: UIF TEMP[10].xxxx :0 232: MOV TEMP[10], IMM[0].xxxx 233: ELSE :0 234: MOV TEMP[10], TEMP[7] 235: ENDIF 236: UIF TEMP[9].xxxx :0 237: MOV TEMP[7], TEMP[10] 238: ELSE :0 239: MOV TEMP[7], TEMP[2] 240: ENDIF 241: MOV TEMP[5], TEMP[7] 242: MUL TEMP[2].x, TEMP[4].xxxx, CONST[5][0].xxxx 243: MAX TEMP[2].x, IN[4].wwww, TEMP[2].xxxx 244: MUL TEMP[4].x, TEMP[4].xxxx, CONST[5][0].wwww 245: MIN TEMP[6].x, TEMP[2].xxxx, TEMP[4].xxxx 246: ENDIF 247: FSLT TEMP[2].x, TEMP[5].wwww, IMM[4].yyyy 248: UIF TEMP[2].xxxx :0 249: MOV TEMP[2].x, IMM[0].xxxx 250: ELSE :0 251: MOV TEMP[2].x, TEMP[6].xxxx 252: ENDIF 253: MOV TEMP[4].xy, IMM[0].xxxx 254: MOV TEMP[4].w, IMM[0].xxxx 255: TXL TEMP[4], TEMP[4], SAMP[1], 2D 256: MUL TEMP[4].xyz, TEMP[4], IMM[4].zzzz 257: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 258: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 259: MOV TEMP[2].w, IMM[2].wwww 260: MOV TEMP[2].x, TEMP[0].xxxx 261: MOV TEMP[2].y, TEMP[0].yyyy 262: MOV TEMP[2].z, TEMP[0].zzzz 263: MOV TEMP[4].x, CONST[4][32].xxxx 264: MOV TEMP[4].y, CONST[4][33].xxxx 265: MOV TEMP[4].z, CONST[4][34].xxxx 266: MOV TEMP[4].w, CONST[4][35].xxxx 267: DP4 TEMP[4].x, TEMP[2], TEMP[4] 268: MOV TEMP[6].x, CONST[4][32].yyyy 269: MOV TEMP[6].y, CONST[4][33].yyyy 270: MOV TEMP[6].z, CONST[4][34].yyyy 271: MOV TEMP[6].w, CONST[4][35].yyyy 272: DP4 TEMP[6].x, TEMP[2], TEMP[6] 273: MOV TEMP[7].x, CONST[4][32].wwww 274: MOV TEMP[7].y, CONST[4][33].wwww 275: MOV TEMP[7].z, CONST[4][34].wwww 276: MOV TEMP[7].w, CONST[4][35].wwww 277: DP4 TEMP[7].x, TEMP[2], TEMP[7] 278: MAD TEMP[9].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 279: MOV TEMP[10].w, IMM[2].wwww 280: MOV TEMP[10].x, TEMP[9].xxxx 281: MOV TEMP[10].y, TEMP[9].yyyy 282: MOV TEMP[10].z, TEMP[9].zzzz 283: MOV TEMP[9].xyz, -CONST[4][19].xyzx 284: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[9].xyzz 285: MOV TEMP[12].x, TEMP[0].xxxx 286: MOV TEMP[12].y, TEMP[0].yyyy 287: MOV TEMP[12].z, TEMP[0].zzzz 288: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 289: MOV TEMP[12].w, TEMP[13].xxxx 290: MOV TEMP[13].x, TEMP[4].xxxx 291: MOV TEMP[13].y, TEMP[6].xxxx 292: MOV TEMP[14].x, -CONST[4][22].wwww 293: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 294: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 295: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 296: RCP TEMP[14].x, TEMP[14].xxxx 297: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 298: MOV TEMP[13].z, TEMP[11].xxxx 299: MOV TEMP[13].w, TEMP[7].xxxx 300: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xyzz 301: MOV TEMP[0].xyz, -TEMP[0].xyzx 302: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[0].xyzz 303: RSQ TEMP[9].x, TEMP[9].xxxx 304: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xxxx 305: MOV TEMP[4].x, TEMP[4].xxxx 306: MOV TEMP[4].y, -TEMP[6].xxxx 307: MOV TEMP[6].x, CONST[4][32].zzzz 308: MOV TEMP[6].y, CONST[4][33].zzzz 309: MOV TEMP[6].z, CONST[4][34].zzzz 310: MOV TEMP[6].w, CONST[4][35].zzzz 311: MOV TEMP[9].x, CONST[4][32].wwww 312: MOV TEMP[9].y, CONST[4][33].wwww 313: MOV TEMP[9].z, CONST[4][34].wwww 314: MOV TEMP[9].w, CONST[4][35].wwww 315: MOV TEMP[11].x, CONST[4][32].zzzz 316: MOV TEMP[11].y, CONST[4][33].zzzz 317: MOV TEMP[11].z, CONST[4][34].zzzz 318: MOV TEMP[11].w, CONST[4][35].zzzz 319: DP4 TEMP[6].x, TEMP[10], TEMP[6] 320: DP4 TEMP[9].x, TEMP[10], TEMP[9] 321: RCP TEMP[9].x, TEMP[9].xxxx 322: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 323: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 324: DP4 TEMP[2].x, TEMP[2], TEMP[11] 325: MIN TEMP[2].x, IMM[4].wwww, TEMP[2].xxxx 326: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[2].xxxx 327: MAD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx, -TEMP[7].xxxx 328: MOV TEMP[4].z, TEMP[2].xxxx 329: MOV TEMP[4].w, TEMP[7].xxxx 330: MOV OUT[1], TEMP[1] 331: MOV OUT[6].xyz, TEMP[0].xyzx 332: MOV OUT[2], TEMP[3] 333: MOV OUT[3], TEMP[5] 334: MOV OUT[4], TEMP[13] 335: MOV OUT[0], TEMP[4] 336: MOV OUT[5], TEMP[12] 337: MOV OUT[7].xy, TEMP[8].xyxx 338: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %46 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.load.const(<16 x i8> %47, i32 4) %49 = call float @llvm.SI.load.const(<16 x i8> %47, i32 8) %50 = call float @llvm.SI.load.const(<16 x i8> %47, i32 32) %51 = call float @llvm.SI.load.const(<16 x i8> %47, i32 40) %52 = call float @llvm.SI.load.const(<16 x i8> %47, i32 48) %53 = call float @llvm.SI.load.const(<16 x i8> %47, i32 52) %54 = call float @llvm.SI.load.const(<16 x i8> %47, i32 56) %55 = call float @llvm.SI.load.const(<16 x i8> %47, i32 72) %56 = call float @llvm.SI.load.const(<16 x i8> %47, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %47, i32 100) %58 = call float @llvm.SI.load.const(<16 x i8> %47, i32 104) %59 = call float @llvm.SI.load.const(<16 x i8> %47, i32 108) %60 = call float @llvm.SI.load.const(<16 x i8> %47, i32 112) %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = add i32 %10, %6 %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %79) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 2 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = add i32 %10, %6 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = extractelement <4 x float> %86, i32 3 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = add i32 %10, %6 %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %93) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = add i32 %10, %6 %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %100) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = extractelement <4 x float> %101, i32 3 %105 = fmul float %75, 5.000000e-01 %106 = fadd float %105, 5.000000e-01 %107 = fmul float %76, -5.000000e-01 %108 = fadd float %107, 5.000000e-01 %109 = fdiv float 1.000000e+00, %14 %110 = fmul float %81, 3.000000e+00 %111 = fcmp olt float %81, %16 %. = select i1 %111, float %110, float 0.000000e+00 %112 = fmul float %15, %. %113 = fadd float %112, %13 %114 = bitcast float %113 to i32 %115 = insertelement <4 x i32> , i32 %114, i32 1 %116 = insertelement <4 x i32> %115, i32 0, i32 2 %117 = bitcast <8 x i32> %62 to <32 x i8> %118 = bitcast <4 x i32> %64 to <16 x i8> %119 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 1 %121 = extractelement <4 x float> %119, i32 2 %122 = fmul float %120, 0x40AFFFE140000000 %123 = fdiv float 1.000000e+00, %122 %124 = fmul float %123, %14 %125 = fmul float %124, 5.000000e-01 %126 = fmul float %125, %82 %127 = fmul float %126, %50 %128 = call float @llvm.AMDIL.fraction.(float %127) %129 = call float @floor(float %127) %130 = fmul float %129, 2.000000e+00 %131 = fadd float %14, -2.000000e+00 %132 = call float @llvm.minnum.f32(float %131, float %130) %133 = fdiv float 1.000000e+00, %14 %134 = fmul float %130, %133 %135 = call float @floor(float %134) %136 = fmul float %14, %135 %137 = fsub float %130, %136 %138 = fcmp ogt float %121, 0.000000e+00 %temp16.0 = select i1 %138, float %132, float %137 %139 = fmul float %109, %temp16.0 %140 = bitcast float %139 to i32 %141 = bitcast float %113 to i32 %142 = insertelement <4 x i32> undef, i32 %140, i32 0 %143 = insertelement <4 x i32> %142, i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 0, i32 2 %145 = bitcast <8 x i32> %62 to <32 x i8> %146 = bitcast <4 x i32> %64 to <16 x i8> %147 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 3 %150 = call float @llvm.AMDGPU.lrp(float %128, float %149, float %148) %151 = fadd float %113, %15 %152 = bitcast float %139 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <4 x i32> undef, i32 %152, i32 0 %155 = insertelement <4 x i32> %154, i32 %153, i32 1 %156 = insertelement <4 x i32> %155, i32 0, i32 2 %157 = bitcast <8 x i32> %62 to <32 x i8> %158 = bitcast <4 x i32> %64 to <16 x i8> %159 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = fmul float %15, 2.000000e+00 %165 = fadd float %164, %113 %166 = bitcast float %139 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <4 x i32> undef, i32 %166, i32 0 %169 = insertelement <4 x i32> %168, i32 %167, i32 1 %170 = insertelement <4 x i32> %169, i32 0, i32 2 %171 = bitcast <8 x i32> %62 to <32 x i8> %172 = bitcast <4 x i32> %64 to <16 x i8> %173 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = extractelement <4 x float> %173, i32 3 %178 = fadd float %109, %139 %179 = bitcast float %178 to i32 %180 = bitcast float %151 to i32 %181 = insertelement <4 x i32> undef, i32 %179, i32 0 %182 = insertelement <4 x i32> %181, i32 %180, i32 1 %183 = insertelement <4 x i32> %182, i32 0, i32 2 %184 = bitcast <8 x i32> %62 to <32 x i8> %185 = bitcast <4 x i32> %64 to <16 x i8> %186 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %183, <32 x i8> %184, <16 x i8> %185, i32 2) %187 = extractelement <4 x float> %186, i32 0 %188 = extractelement <4 x float> %186, i32 1 %189 = extractelement <4 x float> %186, i32 2 %190 = extractelement <4 x float> %186, i32 3 %191 = fsub float %189, %187 %192 = fsub float %190, %188 %193 = fsub float %162, %160 %194 = fsub float %163, %161 %195 = fdiv float 1.000000e+00, %191 %196 = fdiv float 1.000000e+00, %192 %197 = fmul float %193, %195 %198 = fmul float %194, %196 %199 = fmul float %187, %197 %200 = fmul float %188, %198 %201 = fsub float %160, %199 %202 = fsub float %161, %200 %203 = fadd float %201, %197 %204 = fadd float %202, %198 %205 = fsub float %176, %174 %206 = fsub float %177, %175 %207 = fdiv float 1.000000e+00, %191 %208 = fdiv float 1.000000e+00, %192 %209 = fmul float %205, %207 %210 = fmul float %206, %208 %211 = fmul float %187, %209 %212 = fmul float %188, %210 %213 = fsub float %174, %211 %214 = fsub float %175, %212 %215 = fadd float %213, %209 %216 = fadd float %214, %210 %217 = fmul float %150, 5.000000e-01 %218 = fadd float %217, 5.000000e-01 %219 = fadd float %106, -5.000000e-01 %220 = fadd float %108, -5.000000e-01 %221 = fadd float %106, -5.000000e-01 %222 = fadd float %108, -5.000000e-01 %223 = fdiv float 1.000000e+00, %51 %224 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00) %225 = fmul float %219, %224 %226 = fadd float %225, 5.000000e-01 %227 = fmul float %220, %224 %228 = fadd float %227, 5.000000e-01 %229 = fmul float %221, %224 %230 = fadd float %229, 5.000000e-01 %231 = fmul float %222, %224 %232 = fadd float %231, 5.000000e-01 %233 = call float @llvm.AMDGPU.lrp(float %218, float %226, float %106) %234 = call float @llvm.AMDGPU.lrp(float %218, float %228, float %108) %235 = call float @llvm.AMDGPU.lrp(float %217, float %230, float %106) %236 = call float @llvm.AMDGPU.lrp(float %217, float %232, float %108) %237 = fmul float %58, %233 %238 = fadd float %237, %56 %239 = fmul float %59, %234 %240 = fadd float %239, %57 %241 = fmul float %58, %235 %242 = fadd float %241, %56 %243 = fmul float %59, %236 %244 = fadd float %243, %57 %245 = fsub float %56, %238 %246 = fsub float %57, %240 %247 = fsub float %56, %242 %248 = fsub float %57, %244 %249 = fmul float %245, 2.000000e+00 %250 = fadd float %249, %58 %251 = fmul float %246, 2.000000e+00 %252 = fadd float %251, %59 %253 = fmul float %247, 2.000000e+00 %254 = fadd float %253, %58 %255 = fmul float %248, 2.000000e+00 %256 = fadd float %255, %59 %257 = fcmp olt float %250, 0.000000e+00 %.94 = select i1 %257, float %189, float %187 %258 = fcmp olt float %252, 0.000000e+00 %temp48.0 = select i1 %258, float %190, float %188 %259 = fsub float %.94, %238 %260 = fsub float %temp48.0, %240 %261 = fdiv float 1.000000e+00, %250 %262 = fdiv float 1.000000e+00, %252 %263 = fmul float %259, %261 %264 = fmul float %260, %262 %265 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00) %266 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %265, %250 %268 = fadd float %267, %238 %269 = fmul float %266, %252 %270 = fadd float %269, %240 %271 = fmul float %265, %254 %272 = fadd float %271, %242 %273 = fmul float %266, %256 %274 = fadd float %273, %244 %275 = fmul float %265, 2.000000e+00 %276 = fmul float %266, 2.000000e+00 %277 = fsub float 1.000000e+00, %275 %278 = fsub float 1.000000e+00, %276 %279 = fmul float %277, %75 %280 = fmul float %278, %76 %281 = call float @llvm.AMDGPU.lrp(float %268, float %203, float %201) %282 = call float @llvm.AMDGPU.lrp(float %270, float %204, float %202) %283 = call float @llvm.AMDGPU.lrp(float %272, float %215, float %213) %284 = call float @llvm.AMDGPU.lrp(float %274, float %216, float %214) %285 = fmul float %87, 0x3FB3D07220000000 %286 = fmul float %88, 0x3FB3D07220000000 %287 = fmul float %89, 0x3FB3D07220000000 %288 = fmul float %87, 0x3FEE54EDE0000000 %289 = fadd float %288, 0x3FAAB12320000000 %290 = fmul float %88, 0x3FEE54EDE0000000 %291 = fadd float %290, 0x3FAAB12320000000 %292 = fmul float %89, 0x3FEE54EDE0000000 %293 = fadd float %292, 0x3FAAB12320000000 %294 = call float @llvm.pow.f32(float %289, float 0x4003333340000000) %295 = call float @llvm.pow.f32(float %291, float 0x4003333340000000) %296 = call float @llvm.pow.f32(float %293, float 0x4003333340000000) %297 = fcmp ogt float %87, 0x3FA4B5DCC0000000 %.95 = select i1 %297, float %294, float %285 %298 = fcmp ogt float %88, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %298, float %295, float %286 %299 = fcmp ogt float %89, 0x3FA4B5DCC0000000 %.96 = select i1 %299, float %296, float %287 %300 = call float @llvm.AMDGPU.lrp(float %60, float %.95, float %87) %301 = call float @llvm.AMDGPU.lrp(float %60, float %temp24.0, float %88) %302 = call float @llvm.AMDGPU.lrp(float %60, float %.96, float %89) %303 = call float @llvm.AMDGPU.lrp(float %60, float %90, float %90) %304 = fmul float %24, %26 %305 = fmul float %22, %27 %306 = fmul float %23, %25 %307 = fmul float %23, %27 %308 = fsub float %307, %304 %309 = fmul float %24, %25 %310 = fsub float %309, %305 %311 = fmul float %22, %26 %312 = fsub float %311, %306 %313 = call float @llvm.cos.f32(float %102) %314 = call float @llvm.sin.f32(float %102) %315 = fmul float %313, %308 %316 = fmul float %313, %310 %317 = fmul float %313, %312 %318 = fmul float %314, %25 %319 = fadd float %318, %315 %320 = fmul float %314, %26 %321 = fadd float %320, %316 %322 = fmul float %314, %27 %323 = fadd float %322, %317 %324 = fmul float %314, %308 %325 = fmul float %314, %310 %326 = fmul float %314, %312 %327 = fmul float %313, %25 %328 = fsub float %327, %324 %329 = fmul float %313, %26 %330 = fsub float %329, %325 %331 = fmul float %313, %27 %332 = fsub float %331, %326 %333 = call float @llvm.sin.f32(float %103) %334 = call float @llvm.cos.f32(float %103) %335 = fsub float 1.000000e+00, %334 %336 = fmul float %328, %330 %337 = fmul float %335, %336 %338 = fmul float %333, %332 %339 = fmul float %328, %332 %340 = fmul float %335, %339 %341 = fmul float %333, %330 %342 = fmul float %330, %332 %343 = fmul float %342, %335 %344 = fmul float %333, %328 %345 = fmul float %328, %328 %346 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %345) %347 = fsub float %337, %338 %348 = fadd float %340, %341 %349 = fadd float %337, %338 %350 = fmul float %330, %330 %351 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %350) %352 = fsub float %343, %344 %353 = fsub float %340, %341 %354 = fadd float %343, %344 %355 = fmul float %332, %332 %356 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %355) %357 = fmul float %319, %346 %358 = fmul float %321, %347 %359 = fadd float %358, %357 %360 = fmul float %323, %348 %361 = fadd float %359, %360 %362 = fmul float %319, %349 %363 = fmul float %321, %351 %364 = fadd float %363, %362 %365 = fmul float %323, %352 %366 = fadd float %364, %365 %367 = fmul float %319, %353 %368 = fmul float %321, %354 %369 = fadd float %368, %367 %370 = fmul float %323, %356 %371 = fadd float %369, %370 %372 = fadd float %53, %279 %373 = fadd float %54, %280 %374 = fmul float %373, %328 %375 = fmul float %373, %330 %376 = fmul float %373, %332 %377 = fmul float %361, %372 %378 = fadd float %377, %374 %379 = fmul float %366, %372 %380 = fadd float %379, %375 %381 = fmul float %371, %372 %382 = fadd float %381, %376 %383 = fsub float %19, %95 %384 = fsub float %20, %96 %385 = fsub float %21, %97 %386 = fmul float %383, %383 %387 = fmul float %384, %384 %388 = fadd float %387, %386 %389 = fmul float %385, %385 %390 = fadd float %388, %389 %391 = call float @llvm.sqrt.f32(float %390) %392 = fcmp ogt float %55, 0.000000e+00 br i1 %392, label %IF83, label %ENDIF82 IF83: ; preds = %main_body %393 = call float @llvm.SI.load.const(<16 x i8> %47, i32 12) %394 = call float @llvm.SI.load.const(<16 x i8> %47, i32 0) %395 = fmul float %391, %48 %396 = fcmp olt float %395, %104 %397 = fsub float %104, %395 %398 = fsub float %49, %48 %399 = fmul float %398, %391 %400 = fdiv float 1.000000e+00, %399 %401 = fmul float %397, %400 %402 = fsub float 1.000000e+00, %401 %403 = fmul float %402, %300 %404 = fmul float %402, %301 %405 = fmul float %402, %302 %406 = fmul float %402, %303 %407 = fmul float %391, %49 %408 = fcmp olt float %407, %104 %.97 = select i1 %408, float 0.000000e+00, float %403 %.98 = select i1 %408, float 0.000000e+00, float %404 %.99 = select i1 %408, float 0.000000e+00, float %405 %.100 = select i1 %408, float 0.000000e+00, float %406 %.97. = select i1 %396, float %.97, float %300 %.98. = select i1 %396, float %.98, float %301 %.99. = select i1 %396, float %.99, float %302 %.100. = select i1 %396, float %.100, float %303 %409 = fmul float %391, %394 %410 = call float @llvm.maxnum.f32(float %104, float %409) %411 = fmul float %391, %393 %412 = call float @llvm.minnum.f32(float %410, float %411) br label %ENDIF82 ENDIF82: ; preds = %main_body, %IF83 %temp20.1 = phi float [ %.97., %IF83 ], [ %300, %main_body ] %temp21.0 = phi float [ %.98., %IF83 ], [ %301, %main_body ] %temp22.0 = phi float [ %.99., %IF83 ], [ %302, %main_body ] %temp23.0 = phi float [ %.100., %IF83 ], [ %303, %main_body ] %temp24.1 = phi float [ %412, %IF83 ], [ %104, %main_body ] %413 = fcmp olt float %temp23.0, 0x3F70101060000000 %.temp24.1 = select i1 %413, float 0.000000e+00, float %temp24.1 %414 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %67, <16 x i8> %70, i32 2) %415 = extractelement <4 x float> %414, i32 0 %416 = extractelement <4 x float> %414, i32 1 %417 = extractelement <4 x float> %414, i32 2 %418 = fmul float %415, 0x3E7AD7F2A0000000 %419 = fmul float %416, 0x3E7AD7F2A0000000 %420 = fmul float %417, 0x3E7AD7F2A0000000 %421 = fmul float %.temp24.1, %378 %422 = fadd float %421, %95 %423 = fmul float %.temp24.1, %380 %424 = fadd float %423, %96 %425 = fmul float %.temp24.1, %382 %426 = fadd float %425, %97 %427 = fadd float %418, %422 %428 = fadd float %419, %424 %429 = fadd float %420, %426 %430 = fmul float %427, %30 %431 = fmul float %428, %34 %432 = fadd float %430, %431 %433 = fmul float %429, %38 %434 = fadd float %432, %433 %435 = fadd float %434, %42 %436 = fmul float %427, %31 %437 = fmul float %428, %35 %438 = fadd float %436, %437 %439 = fmul float %429, %39 %440 = fadd float %438, %439 %441 = fadd float %440, %43 %442 = fmul float %427, %33 %443 = fmul float %428, %37 %444 = fadd float %442, %443 %445 = fmul float %429, %41 %446 = fadd float %444, %445 %447 = fadd float %446, %45 %448 = fmul float %22, %52 %449 = fadd float %448, %427 %450 = fmul float %23, %52 %451 = fadd float %450, %428 %452 = fmul float %24, %52 %453 = fadd float %452, %429 %454 = fsub float %427, %19 %455 = fsub float %428, %20 %456 = fsub float %429, %21 %457 = fmul float %22, %454 %458 = fmul float %23, %455 %459 = fadd float %458, %457 %460 = fmul float %24, %456 %461 = fadd float %459, %460 %462 = fmul float %454, %22 %463 = fmul float %455, %23 %464 = fadd float %463, %462 %465 = fmul float %456, %24 %466 = fadd float %464, %465 %467 = fsub float %466, %28 %468 = fsub float %29, %28 %469 = fdiv float 1.000000e+00, %468 %470 = fmul float %467, %469 %471 = fsub float %427, %19 %472 = fsub float %428, %20 %473 = fsub float %429, %21 %474 = fmul float %471, %471 %475 = fmul float %472, %472 %476 = fadd float %475, %474 %477 = fmul float %473, %473 %478 = fadd float %476, %477 %479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %478) %480 = fmul float %471, %479 %481 = fsub float -0.000000e+00, %480 %482 = fmul float %472, %479 %483 = fsub float -0.000000e+00, %482 %484 = fmul float %473, %479 %485 = fsub float -0.000000e+00, %484 %486 = fsub float -0.000000e+00, %441 %487 = fmul float %449, %32 %488 = fmul float %451, %36 %489 = fadd float %487, %488 %490 = fmul float %453, %40 %491 = fadd float %489, %490 %492 = fadd float %491, %44 %493 = fmul float %449, %33 %494 = fmul float %451, %37 %495 = fadd float %493, %494 %496 = fmul float %453, %41 %497 = fadd float %495, %496 %498 = fadd float %497, %45 %499 = fdiv float 1.000000e+00, %498 %500 = fmul float %492, %499 %501 = fmul float %500, %447 %502 = fmul float %427, %32 %503 = fmul float %428, %36 %504 = fadd float %502, %503 %505 = fmul float %429, %40 %506 = fadd float %504, %505 %507 = fadd float %506, %44 %508 = call float @llvm.minnum.f32(float %507, float 0x3F50624DE0000000) %509 = call float @llvm.maxnum.f32(float %501, float %508) %510 = fmul float %509, 2.000000e+00 %511 = fsub float %510, %447 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %281, float %282, float %283, float %284) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp20.1, float %temp21.0, float %temp22.0, float %temp23.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %435, float %441, float %470, float %447) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %427, float %428, float %429, float %461) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %481, float %483, float %485, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %150, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %435, float %486, float %511, float %447) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v4, s11, v3 ; 4A08060B s_load_dwordx4 s[28:31], s[2:3], 0x10 ; C08E0310 v_mov_b32_e32 v19, 0x3d558919 ; 7E2602FF 3D558919 v_mov_b32_e32 v12, 0x3f72a76f ; 7E1802FF 3F72A76F v_mov_b32_e32 v20, 0x4019999a ; 7E2802FF 4019999A v_mov_b32_e32 v13, 0x3d9e8391 ; 7E1A02FF 3D9E8391 v_mov_b32_e32 v14, 0x3d25aee6 ; 7E1C02FF 3D25AEE6 s_load_dwordx4 s[16:19], s[2:3], 0x14 ; C0880314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_buffer_load_dword s48, s[12:15], 0x1 ; C2180D01 s_buffer_load_dword s27, s[12:15], 0x2 ; C20D8D02 s_buffer_load_dword s49, s[12:15], 0x3 ; C2188D03 s_buffer_load_dword s14, s[28:31], 0x4c ; C2071D4C s_buffer_load_dword s15, s[28:31], 0x4d ; C2079D4D s_buffer_load_dword s12, s[28:31], 0x52 ; C2061D52 s_buffer_load_dword s22, s[28:31], 0x54 ; C20B1D54 s_buffer_load_dword s21, s[28:31], 0x55 ; C20A9D55 s_buffer_load_dword s20, s[28:31], 0x56 ; C20A1D56 s_buffer_load_dword s13, s[28:31], 0x5b ; C2069D5B s_buffer_load_dword s10, s[16:19], 0x8 ; C2051108 s_buffer_load_dword s11, s[16:19], 0xa ; C205910A s_buffer_load_dword s25, s[16:19], 0xc ; C20C910C s_buffer_load_dword s24, s[16:19], 0xd ; C20C110D s_buffer_load_dword s23, s[16:19], 0xe ; C20B910E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v15, -2.0, s48 ; D206000F 000060F5 s_buffer_load_dword s26, s[16:19], 0x1c ; C20D111C s_load_dwordx4 s[52:55], s[8:9], 0x0 ; C09A0900 s_load_dwordx4 s[56:59], s[8:9], 0x4 ; C09C0904 v_rcp_f32_e32 v21, s48 ; 7E2A5430 s_load_dwordx4 s[60:63], s[8:9], 0x8 ; C09E0908 v_mov_b32_e32 v29, s0 ; 7E3A0200 s_load_dwordx4 s[44:47], s[8:9], 0xc ; C096090C s_load_dwordx4 s[0:3], s[8:9], 0x10 ; C0800910 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v16, 1.0, s26 ; D2080010 000034F2 buffer_load_format_xyzw v[8:11], v0, s[52:55], 0 idxen ; E00C2000 800D0800 buffer_load_format_xyzw v[22:25], v4, s[56:59], 0 idxen ; E00C2000 800E1604 buffer_load_format_xyzw v[31:34], v4, s[60:63], 0 idxen ; E00C2000 800F1F04 buffer_load_format_xyzw v[0:3], v4, s[44:47], 0 idxen ; E00C2000 800B0004 buffer_load_format_xyzw v[4:7], v4, s[0:3], 0 idxen ; E00C2000 80000404 s_waitcnt vmcnt(3) ; BF8C0773 v_cmp_gt_f32_e32 vcc, s49, v22 ; 7C082C31 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v3, 0x40400000, v22 ; 10062CFF 40400000 v_cndmask_b32_e32 v3, 0, v3 ; 00060680 v_mac_f32_e32 v29, s27, v3 ; 3E3A061B v_mov_b32_e32 v28, 0 ; 7E380280 v_mov_b32_e32 v30, v28 ; 7E3C031C v_cmp_gt_f32_e64 s[0:1], v31, v14 ; D0080000 00021D1F v_cmp_gt_f32_e32 vcc, v32, v14 ; 7C081D20 v_mul_f32_e32 v35, v13, v31 ; 10463F0D v_mul_f32_e32 v36, v13, v32 ; 1048410D v_mul_f32_e32 v37, v13, v33 ; 104A430D v_mad_f32 v38, v12, v31, v19 ; D2820026 044E3F0C v_mad_f32 v39, v12, v32, v19 ; D2820027 044E410C v_mac_f32_e32 v19, v12, v33 ; 3E26430C v_mul_f32_e32 v3, v31, v16 ; 1006211F image_sample_l v[10:11], 6, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900600 01090A1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, 0x457fff0a, v10 ; 100C14FF 457FFF0A v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_cmp_lt_f32_e64 s[2:3], 0, v11 ; D0020002 00021680 v_mul_f32_e32 v6, s48, v6 ; 100C0C30 v_mul_f32_e32 v6, 0.5, v6 ; 100C0CF0 v_mul_f32_e32 v6, v24, v6 ; 100C0D18 v_mul_f32_e32 v10, s10, v6 ; 10140C0A v_floor_f32_e32 v13, v10 ; 7E1A490A v_add_f32_e32 v10, v13, v13 ; 06141B0D v_mul_f32_e32 v11, v21, v10 ; 10161515 v_floor_f32_e32 v11, v11 ; 7E16490B v_mad_f32 v11, -s48, v11, v10 ; D282000B 242A1630 v_min_f32_e32 v10, v10, v15 ; 1E141F0A v_cndmask_b32_e64 v10, v11, v10, s[2:3] ; D200000A 000A150B v_mul_f32_e32 v26, v10, v21 ; 10342B0A v_cmp_gt_f32_e64 s[2:3], v33, v14 ; D0080002 00021D21 v_mov_b32_e32 v27, v29 ; 7E36031D v_add_f32_e32 v22, s27, v29 ; 062C3A1B image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900900 01090E1A v_mov_b32_e32 v27, v22 ; 7E360316 v_mac_f32_e32 v21, v10, v21 ; 3E2A2B0A image_sample_l v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 0109281A v_mov_b32_e32 v27, v29 ; 7E36031D v_mac_f32_e64 v27, 2.0, s27 ; D23E001B 000036F4 v_mul_f32_e32 v10, v32, v16 ; 10142120 v_mul_f32_e32 v11, v33, v16 ; 10162121 v_mul_f32_e32 v12, v34, v16 ; 10182122 v_mac_f32_e32 v12, s26, v34 ; 3E18441A v_mov_b32_e32 v23, v28 ; 7E2E031C image_sample_l v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 0109181A s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[36:43], s[32:35] ; F0900F00 01091C15 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v16, v28, v30 ; 0A203D1C v_rcp_f32_e32 v16, v16 ; 7E205510 v_subrev_f32_e32 v17, v29, v31 ; 0A223F1D v_rcp_f32_e32 v21, v17 ; 7E2A5511 v_subrev_f32_e32 v18, v40, v42 ; 0A245528 v_mul_f32_e32 v17, v16, v18 ; 10222510 v_mad_f32 v22, -v28, v17, v40 ; D2820016 24A2231C v_subrev_f32_e32 v23, v41, v43 ; 0A2E5729 v_mul_f32_e32 v17, v21, v23 ; 10222F15 v_mad_f32 v32, -v29, v17, v41 ; D2820020 24A6231D v_subrev_f32_e32 v26, v24, v26 ; 0A343518 v_mul_f32_e32 v17, v16, v26 ; 10223510 v_mad_f32 v24, -v28, v17, v24 ; D2820018 2462231C v_subrev_f32_e32 v27, v25, v27 ; 0A363719 v_mul_f32_e32 v17, v21, v27 ; 10223715 v_mad_f32 v25, -v29, v17, v25 ; D2820019 2466231D v_mad_f32 v13, v6, s10, -v13 ; D282000D 84341506 v_rcp_f32_e32 v17, s11 ; 7E22540B v_sub_f32_e32 v6, 1.0, v13 ; 080C1AF2 v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_mac_f32_e32 v6, v15, v13 ; 3E0C1B0F v_add_f32_e64 v13, 0, v17 clamp ; D206080D 00022280 v_mad_f32 v14, 0.5, v8, 0.5 ; D282000E 03C210F0 v_add_f32_e32 v15, -0.5, v14 ; 061E1CF1 v_mad_f32 v15, v15, v13, 0.5 ; D282000F 03C21B0F s_buffer_load_dword s27, s[16:19], 0x18 ; C20D9118 s_buffer_load_dword s32, s[16:19], 0x19 ; C2101119 s_buffer_load_dword s33, s[16:19], 0x1a ; C210911A s_buffer_load_dword s34, s[16:19], 0x1b ; C211111B v_mad_f32 v17, -0.5, v9, 0.5 ; D2820011 03C212F1 v_add_f32_e32 v33, -0.5, v17 ; 064222F1 v_mad_f32 v13, v33, v13, 0.5 ; D282000D 03C21B21 v_mad_f32 v33, 0.5, v6, 0.5 ; D2820021 03C20CF0 v_sub_f32_e32 v34, 1.0, v33 ; 084442F2 v_mul_f32_e32 v40, v14, v34 ; 1050450E v_mul_f32_e32 v34, v17, v34 ; 10444511 v_mac_f32_e32 v40, v15, v33 ; 3E50430F v_mac_f32_e32 v34, v13, v33 ; 3E44430D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v33, s27 ; 7E42021B v_mad_f32 v33, s33, v40, v33 ; D2820021 04865021 v_mov_b32_e32 v40, s32 ; 7E500220 v_mad_f32 v34, s34, v34, v40 ; D2820022 04A24422 v_sub_f32_e32 v40, s27, v33 ; 0850421B v_mad_f32 v40, 2.0, v40, s33 ; D2820028 008650F4 v_sub_f32_e32 v41, s32, v34 ; 08524420 v_mad_f32 v41, 2.0, v41, s34 ; D2820029 008A52F4 v_cmp_gt_f32_e64 s[8:9], 0, v40 ; D0080008 00025080 v_cmp_gt_f32_e64 s[10:11], 0, v41 ; D008000A 00025280 v_cndmask_b32_e64 v28, v28, v30, s[8:9] ; D200001C 00223D1C v_cndmask_b32_e64 v29, v29, v31, s[10:11] ; D200001D 002A3F1D v_mad_f32 v30, 0.5, -v6, 1.0 ; D282001E 43CA0CF0 v_mul_f32_e32 v31, v14, v30 ; 103E3D0E v_mul_f32_e32 v30, v17, v30 ; 103C3D11 v_mul_f32_e32 v14, 0.5, v6 ; 101C0CF0 v_rcp_f32_e32 v17, v40 ; 7E225528 v_mac_f32_e32 v31, v15, v14 ; 3E3E1D0F v_mac_f32_e32 v30, v13, v14 ; 3E3C1D0D v_subrev_f32_e32 v13, v33, v28 ; 0A1A3921 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 v_add_f32_e64 v17, 0, v13 clamp ; D2060811 00021A80 v_mac_f32_e32 v33, v40, v17 ; 3E422328 v_mad_f32 v14, v16, v18, v22 ; D282000E 045A2510 v_rcp_f32_e32 v15, v41 ; 7E1E5529 v_sub_f32_e32 v13, 1.0, v33 ; 081A42F2 v_mul_f32_e32 v13, v22, v13 ; 101A1B16 v_mac_f32_e32 v13, v14, v33 ; 3E1A430E v_subrev_f32_e32 v14, v34, v29 ; 0A1C3B22 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_add_f32_e64 v18, 0, v14 clamp ; D2060812 00021C80 v_mac_f32_e32 v34, v41, v18 ; 3E442529 v_mad_f32 v15, v21, v23, v32 ; D282000F 04822F15 v_sub_f32_e32 v14, 1.0, v34 ; 081C44F2 v_mul_f32_e32 v14, v32, v14 ; 101C1D20 v_mac_f32_e32 v14, v15, v34 ; 3E1C450F v_mov_b32_e32 v15, s27 ; 7E1E021B v_mad_f32 v22, s33, v31, v15 ; D2820016 043E3E21 v_mad_f32 v16, v16, v26, v24 ; D2820010 04623510 v_sub_f32_e32 v15, s27, v22 ; 081E2C1B v_mad_f32 v15, 2.0, v15, s33 ; D282000F 00861EF4 v_mac_f32_e32 v22, v15, v17 ; 3E2C230F v_sub_f32_e32 v15, 1.0, v22 ; 081E2CF2 v_mul_f32_e32 v15, v24, v15 ; 101E1F18 v_mac_f32_e32 v15, v16, v22 ; 3E1E2D10 v_mov_b32_e32 v16, s32 ; 7E200220 v_mad_f32 v22, s34, v30, v16 ; D2820016 04423C22 s_buffer_load_dword s8, s[28:31], 0x51 ; C2041D51 s_buffer_load_dword s9, s[28:31], 0x50 ; C2049D50 v_mad_f32 v21, v21, v27, v25 ; D2820015 04663715 v_sub_f32_e32 v16, s32, v22 ; 08202C20 v_mad_f32 v16, 2.0, v16, s34 ; D2820010 008A20F4 v_mac_f32_e32 v22, v16, v18 ; 3E2C2510 v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2 v_mul_f32_e32 v16, v25, v16 ; 10202119 v_mac_f32_e32 v16, v21, v22 ; 3E202D15 v_mov_b32_e32 v21, s21 ; 7E2A0215 v_mul_f32_e32 v21, s12, v21 ; 102A2A0C v_mov_b32_e32 v22, s20 ; 7E2C0214 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v24, v22, s8, -v21 ; D2820018 84541116 v_mov_b32_e32 v21, s20 ; 7E2A0214 v_mul_f32_e32 v21, s9, v21 ; 102A2A09 v_mov_b32_e32 v22, s22 ; 7E2C0216 v_mad_f32 v25, v22, s12, -v21 ; D2820019 84541916 v_mov_b32_e32 v21, s22 ; 7E2A0216 v_mul_f32_e32 v21, s8, v21 ; 102A2A08 v_log_f32_e32 v22, v38 ; 7E2C4F26 v_log_f32_e32 v23, v39 ; 7E2E4F27 v_log_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v26, s21 ; 7E340215 v_mad_f32 v26, v26, s9, -v21 ; D282001A 8454131A v_mul_legacy_f32_e32 v21, v20, v22 ; 0E2A2D14 v_mul_legacy_f32_e32 v22, v20, v23 ; 0E2C2F14 v_mul_legacy_f32_e32 v19, v20, v19 ; 0E262714 v_exp_f32_e32 v20, v21 ; 7E284B15 v_cndmask_b32_e64 v20, v35, v20, s[0:1] ; D2000014 00022923 v_exp_f32_e32 v21, v22 ; 7E2A4B16 v_cndmask_b32_e32 v21, v36, v21 ; 002A2B24 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e64 v19, v37, v19, s[2:3] ; D2000013 000A2725 v_mac_f32_e32 v3, s26, v20 ; 3E06281A v_mac_f32_e32 v10, s26, v21 ; 3E142A1A v_mac_f32_e32 v11, s26, v19 ; 3E16261A s_buffer_load_dword s33, s[28:31], 0x4e ; C2109D4E s_buffer_load_dword s0, s[28:31], 0x5d ; C2001D5D s_buffer_load_dword s35, s[28:31], 0x80 ; C2119D80 s_buffer_load_dword s34, s[28:31], 0x81 ; C2111D81 s_buffer_load_dword s2, s[28:31], 0x82 ; C2011D82 s_buffer_load_dword s3, s[28:31], 0x83 ; C2019D83 s_buffer_load_dword s38, s[28:31], 0x84 ; C2131D84 s_buffer_load_dword s36, s[28:31], 0x85 ; C2121D85 s_buffer_load_dword s26, s[28:31], 0x86 ; C20D1D86 s_buffer_load_dword s27, s[28:31], 0x87 ; C20D9D87 s_buffer_load_dword s39, s[28:31], 0x88 ; C2139D88 s_buffer_load_dword s37, s[28:31], 0x89 ; C2129D89 s_buffer_load_dword s10, s[28:31], 0x8a ; C2051D8A s_buffer_load_dword s32, s[28:31], 0x8b ; C2101D8B s_buffer_load_dword s41, s[28:31], 0x8c ; C2149D8C s_buffer_load_dword s40, s[28:31], 0x8d ; C2141D8D s_buffer_load_dword s11, s[28:31], 0x8e ; C2059D8E s_buffer_load_dword s28, s[28:31], 0x8f ; C20E1D8F v_mov_b32_e32 v19, 0x3e22f983 ; 7E2602FF 3E22F983 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mul_f32_e32 v5, v19, v5 ; 100A0B13 v_fract_f32_e32 v4, v4 ; 7E084104 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_cos_f32_e32 v29, v4 ; 7E3A6D04 v_sin_f32_e32 v28, v4 ; 7E386B04 v_mul_f32_e32 v4, v24, v28 ; 10083918 v_mad_f32 v19, v29, s22, -v4 ; D2820013 84102D1D v_mul_f32_e32 v4, v25, v28 ; 10083919 v_mad_f32 v20, v29, s21, -v4 ; D2820014 84102B1D v_mul_f32_e32 v4, v26, v28 ; 1008391A v_mad_f32 v21, v29, s20, -v4 ; D2820015 8410291D s_buffer_load_dword s1, s[16:19], 0x12 ; C2009112 v_cos_f32_e32 v30, v5 ; 7E3C6D05 v_sub_f32_e32 v4, 1.0, v30 ; 08083CF2 v_mul_f32_e32 v22, v19, v19 ; 102C2713 v_mul_f32_e32 v31, v22, v4 ; 103E0916 v_mul_f32_e32 v22, v20, v20 ; 102C2914 v_mul_f32_e32 v23, v22, v4 ; 102E0916 v_mul_f32_e32 v22, v21, v21 ; 102C2B15 v_mul_f32_e32 v22, v22, v4 ; 102C0916 v_sin_f32_e32 v27, v5 ; 7E366B05 v_mac_f32_e32 v31, 1.0, v30 ; 3E3E3CF2 v_mac_f32_e32 v23, 1.0, v30 ; 3E2E3CF2 v_mac_f32_e32 v22, 1.0, v30 ; 3E2C3CF2 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[30:31], 0, s1 ; D002001E 00000280 v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mov_b32_e32 v4, s25 ; 7E080219 s_and_saveexec_b64 s[30:31], s[30:31] ; BE9E241E s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v32, s14, v0 ; 0840000E v_sub_f32_e32 v33, s15, v1 ; 0842020F v_sub_f32_e32 v34, s33, v2 ; 08440421 v_mul_f32_e32 v32, v32, v32 ; 10404120 s_buffer_load_dword s0, s[16:19], 0x1 ; C2001101 s_buffer_load_dword s1, s[16:19], 0x2 ; C2009102 s_buffer_load_dword s25, s[16:19], 0x0 ; C20C9100 s_buffer_load_dword s29, s[16:19], 0x3 ; C20E9103 v_mac_f32_e32 v32, v33, v33 ; 3E404321 v_mac_f32_e32 v32, v34, v34 ; 3E404522 v_sqrt_f32_e32 v32, v32 ; 7E406720 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v33, s0 ; 7E420200 v_sub_f32_e32 v33, s1, v33 ; 08424201 v_mul_f32_e32 v33, v32, v33 ; 10424320 v_rcp_f32_e32 v33, v33 ; 7E425521 v_mul_f32_e32 v34, s0, v32 ; 10444000 v_mad_f32 v35, -v32, s0, v7 ; D2820023 241C0120 v_mad_f32 v33, -v35, v33, 1.0 ; D2820021 23CA4323 v_cmp_lt_f32_e32 vcc, v34, v7 ; 7C020F22 v_mul_f32_e32 v34, s1, v32 ; 10444001 v_mul_f32_e32 v35, s25, v32 ; 10464019 v_max_f32_e32 v35, v35, v7 ; 20460F23 v_cmp_lt_f32_e64 s[0:1], v34, v7 ; D0020000 00020F22 v_mul_f32_e32 v7, v3, v33 ; 100E4303 v_mul_f32_e32 v34, v10, v33 ; 1044430A v_mul_f32_e32 v36, v11, v33 ; 1048430B v_mul_f32_e32 v33, v12, v33 ; 1042430C v_cndmask_b32_e64 v7, v7, 0, s[0:1] ; D2000007 00010107 v_cndmask_b32_e64 v34, v34, 0, s[0:1] ; D2000022 00010122 v_cndmask_b32_e64 v36, v36, 0, s[0:1] ; D2000024 00010124 v_cndmask_b32_e64 v33, v33, 0, s[0:1] ; D2000021 00010121 v_cndmask_b32_e32 v3, v3, v7 ; 00060F03 v_cndmask_b32_e32 v10, v10, v34 ; 0014450A v_cndmask_b32_e32 v11, v11, v36 ; 0016490B v_cndmask_b32_e32 v12, v12, v33 ; 0018430C v_mul_f32_e32 v7, s29, v32 ; 100E401D v_min_f32_e32 v7, v7, v35 ; 1E0E4707 s_or_b64 exec, exec, s[30:31] ; 88FE1E7E v_mad_f32 v17, -2.0, v17, 1.0 ; D2820011 03CA22F5 v_mad_f32 v8, v17, v8, s24 ; D2820008 00621111 v_mad_f32 v17, -2.0, v18, 1.0 ; D2820011 03CA24F5 v_mad_f32 v9, v17, v9, s23 ; D2820009 005E1311 v_mul_f32_e32 v17, v24, v29 ; 10223B18 v_mac_f32_e32 v17, s22, v28 ; 3E223816 v_mul_f32_e32 v18, v25, v29 ; 10243B19 v_mul_f32_e32 v24, v26, v29 ; 10303B1A v_mac_f32_e32 v18, s21, v28 ; 3E243815 v_mac_f32_e32 v24, s20, v28 ; 3E303814 v_sub_f32_e32 v25, 1.0, v30 ; 08323CF2 v_mul_f32_e32 v26, v31, v17 ; 1034231F s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 v_mul_f32_e32 v28, v20, v19 ; 10382714 v_mul_f32_e32 v29, v21, v27 ; 103A3715 v_mul_f32_e32 v30, v21, v19 ; 103C2715 v_mov_b32_e32 v31, 0x3b808083 ; 7E3E02FF 3B808083 v_cmp_gt_f32_e32 vcc, v31, v12 ; 7C08191F v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_mad_f32 v31, v25, v28, -v29 ; D282001F 84763919 v_mac_f32_e32 v29, v28, v25 ; 3E3A331C v_mul_f32_e32 v28, v20, v27 ; 10383714 v_mad_f32 v32, v30, v25, v28 ; D2820020 0472331E v_mad_f32 v28, v25, v30, -v28 ; D282001C 84723D19 v_mul_f32_e32 v27, v19, v27 ; 10363713 v_mul_f32_e32 v30, v21, v20 ; 103C2915 v_mad_f32 v33, v30, v25, -v27 ; D2820021 846E331E v_mac_f32_e32 v27, v25, v30 ; 3E363D19 v_mac_f32_e32 v26, v31, v18 ; 3E34251F v_mul_f32_e32 v25, v29, v17 ; 1032231D v_mac_f32_e32 v25, v23, v18 ; 3E322517 v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mac_f32_e32 v17, v27, v18 ; 3E22251B v_mac_f32_e32 v26, v32, v24 ; 3E343120 v_mac_f32_e32 v25, v33, v24 ; 3E323121 v_mac_f32_e32 v17, v22, v24 ; 3E223116 v_mul_f32_e32 v18, v19, v9 ; 10241313 v_mul_f32_e32 v19, v20, v9 ; 10261314 v_mul_f32_e32 v9, v21, v9 ; 10121315 v_mac_f32_e32 v18, v8, v26 ; 3E243508 v_mac_f32_e32 v19, v8, v25 ; 3E263308 v_mov_b32_e32 v20, 0 ; 7E280280 v_mac_f32_e32 v9, v8, v17 ; 3E122308 v_mov_b32_e32 v21, v20 ; 7E2A0314 v_mad_f32 v0, v18, v7, v0 ; D2820000 04020F12 v_mad_f32 v1, v19, v7, v1 ; D2820001 04060F13 v_mac_f32_e32 v2, v9, v7 ; 3E040F09 v_mov_b32_e32 v22, v20 ; 7E2C0314 v_mov_b32_e32 v7, 0x33d6bf95 ; 7E0E02FF 33D6BF95 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[44:51], s[16:19] ; F0900700 008B1114 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v7, v17 ; 3E002307 v_mac_f32_e32 v1, v7, v18 ; 3E022507 v_mac_f32_e32 v2, v7, v19 ; 3E042707 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 exp 15, 33, 0, 0, 0, v13, v14, v15, v16 ; F800021F 100F0E0D exp 15, 34, 0, 0, 0, v3, v10, v11, v12 ; F800022F 0C0B0A03 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s38, v1 ; 10060226 v_mac_f32_e32 v3, s35, v0 ; 3E060023 v_mac_f32_e32 v3, s39, v2 ; 3E060427 v_add_f32_e32 v3, s41, v3 ; 06060629 v_mul_f32_e32 v7, s36, v1 ; 100E0224 v_mac_f32_e32 v7, s34, v0 ; 3E0E0022 v_mac_f32_e32 v7, s37, v2 ; 3E0E0425 v_add_f32_e32 v7, s40, v7 ; 060E0E28 v_subrev_f32_e32 v8, s14, v0 ; 0A10000E v_subrev_f32_e32 v9, s15, v1 ; 0A12020F v_subrev_f32_e32 v10, s33, v2 ; 0A140421 v_subrev_f32_e32 v5, s13, v5 ; 0A0A0A0D v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mul_f32_e32 v11, s9, v8 ; 10161009 v_mac_f32_e32 v11, s8, v9 ; 3E161208 v_mac_f32_e32 v11, s12, v10 ; 3E16140C v_subrev_f32_e32 v12, s13, v11 ; 0A18160D v_mul_f32_e32 v5, v5, v12 ; 100A1905 v_mul_f32_e32 v12, s27, v1 ; 1018021B v_mac_f32_e32 v12, s3, v0 ; 3E180003 v_mac_f32_e32 v12, s32, v2 ; 3E180420 v_add_f32_e32 v12, s28, v12 ; 0618181C exp 15, 35, 0, 0, 0, v3, v7, v5, v12 ; F800023F 0C050703 exp 15, 36, 0, 0, 0, v0, v1, v2, v11 ; F800024F 0B020100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, v8, v8 ; 100A1108 v_mac_f32_e32 v5, v9, v9 ; 3E0A1309 v_mac_f32_e32 v5, v10, v10 ; 3E0A150A v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v11, s26, v1 ; 1016021A v_mac_f32_e32 v11, s2, v0 ; 3E160002 v_mac_f32_e32 v11, s10, v2 ; 3E16040A v_mad_f32 v2, s12, v4, v2 ; D2820002 040A080C v_mul_f32_e32 v8, v5, v8 ; 10101105 v_mul_f32_e32 v9, v5, v9 ; 10121305 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_xor_b32_e32 v8, v8, v10 ; 3A101508 v_xor_b32_e32 v9, v9, v10 ; 3A121509 v_xor_b32_e32 v5, v5, v10 ; 3A0A1505 exp 15, 37, 0, 0, 0, v8, v9, v5, v20 ; F800025F 14050908 v_mad_f32 v0, s9, v4, v0 ; D2820000 04020809 v_mad_f32 v1, s8, v4, v1 ; D2820001 04060808 v_xor_b32_e32 v4, v7, v10 ; 3A081507 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s26, v1 ; 100A021A v_mul_f32_e32 v1, s27, v1 ; 1002021B v_mac_f32_e32 v1, s3, v0 ; 3E020003 v_mac_f32_e32 v1, s32, v2 ; 3E020420 v_add_f32_e32 v1, s28, v1 ; 0602021C v_rcp_f32_e32 v1, v1 ; 7E025501 v_mac_f32_e32 v5, s2, v0 ; 3E0A0002 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_add_f32_e32 v0, s11, v5 ; 06000A0B v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_add_f32_e32 v1, s11, v11 ; 0602160B v_min_f32_e32 v1, 0x3a83126f, v1 ; 1E0202FF 3A83126F v_max_f32_e32 v0, v1, v0 ; 20000101 exp 15, 38, 0, 0, 0, v6, v20, v20, v20 ; F800026F 14141406 v_mad_f32 v0, 2.0, v0, -v12 ; D2820000 843200F4 exp 15, 12, 0, 0, 0, v3, v4, v0, v12 ; F80000CF 0C000403 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 44 Code Size: 2152 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..14], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 2.0000, -2.0000, 1.0000, 0.0774} IMM[3] FLT32 { 0.9479, 0.0521, 2.4000, 0.0404} IMM[4] UINT32 {112, 64, 3, 320} IMM[5] FLT32 { 0.9000, 1.0000, 0.0000, 0.0039} IMM[6] UINT32 {48, 304, 512, 528} IMM[7] FLT32 { 0.0000, 0.0010, 0.0000, 0.0000} IMM[8] UINT32 {544, 560, 516, 532} IMM[9] UINT32 {548, 564, 524, 540} IMM[10] UINT32 {556, 572, 520, 536} IMM[11] UINT32 {552, 568, 364, 372} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MUL TEMP[5].x, IMM[0].yyyy, CONST[1][0].yyyy 21: MUL TEMP[6].x, IN[1].zzzz, CONST[5][2].xxxx 22: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: FRC TEMP[6].x, TEMP[5].xxxx 24: FLR TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 26: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].yyyy 27: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 28: RCP TEMP[8].x, CONST[1][0].yyyy 29: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 32: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 33: MOV TEMP[4].xy, TEMP[4].xyyy 34: MOV TEMP[4].w, IMM[0].xxxx 35: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 36: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 37: UIF TEMP[4].xxxx :0 38: MOV TEMP[4].x, TEMP[7].xxxx 39: ELSE :0 40: MOV TEMP[4].x, TEMP[5].xxxx 41: ENDIF 42: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 43: MOV TEMP[5].x, TEMP[4].xxxx 44: MOV TEMP[5].y, TEMP[2].xxxx 45: MOV TEMP[5].xy, TEMP[5].xyyy 46: MOV TEMP[5].w, IMM[0].xxxx 47: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 48: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 49: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 50: MOV TEMP[7].x, TEMP[4].xxxx 51: MOV TEMP[7].y, TEMP[6].xxxx 52: MOV TEMP[7].xy, TEMP[7].xyyy 53: MOV TEMP[7].w, IMM[0].xxxx 54: TXL TEMP[7], TEMP[7], SAMP[0], 2D 55: MOV TEMP[8].x, TEMP[4].xxxx 56: MAD TEMP[2].x, IMM[2].xxxx, CONST[1][0].zzzz, TEMP[2].xxxx 57: MOV TEMP[8].y, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[8].xyyy 59: MOV TEMP[2].w, IMM[0].xxxx 60: TXL TEMP[2], TEMP[2], SAMP[0], 2D 61: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 62: MOV TEMP[0].y, TEMP[6].xxxx 63: MOV TEMP[0].xy, TEMP[0].xyyy 64: MOV TEMP[0].w, IMM[0].xxxx 65: TXL TEMP[0], TEMP[0], SAMP[0], 2D 66: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 67: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 68: RCP TEMP[8].x, TEMP[4].xxxx 69: RCP TEMP[8].y, TEMP[4].yyyy 70: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 71: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 72: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 73: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 74: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 75: RCP TEMP[9].x, TEMP[4].xxxx 76: RCP TEMP[9].y, TEMP[4].yyyy 77: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 78: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 79: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 80: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 81: MOV TEMP[8].y, IMM[0].xxxx 82: MOV TEMP[8].x, TEMP[5].xxxx 83: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 84: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 85: MOV TEMP[10].x, TEMP[9].xxxx 86: MOV TEMP[10].y, TEMP[9].xxxx 87: MOV TEMP[10].z, TEMP[5].xxxx 88: MOV TEMP[10].w, TEMP[5].xxxx 89: ADD TEMP[5], TEMP[3], IMM[0].zzzz 90: RCP TEMP[9].x, CONST[5][2].zzzz 91: MOV_SAT TEMP[9].x, TEMP[9].xxxx 92: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 93: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 94: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 95: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 96: MOV TEMP[10].x, TEMP[5].xxxx 97: MOV TEMP[10].y, TEMP[9].xxxx 98: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 99: MOV TEMP[10].z, TEMP[11].xxxx 100: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 101: MOV TEMP[10].w, TEMP[3].xxxx 102: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 103: MAD TEMP[3], TEMP[3], IMM[2].xxxx, CONST[5][6].zwzw 104: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 105: UIF TEMP[11].xxxx :0 106: MOV TEMP[11].x, TEMP[0].zzzz 107: ELSE :0 108: MOV TEMP[11].x, TEMP[0].xxxx 109: ENDIF 110: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 111: UIF TEMP[12].xxxx :0 112: MOV TEMP[12].x, TEMP[0].wwww 113: ELSE :0 114: MOV TEMP[12].x, TEMP[0].yyyy 115: ENDIF 116: MOV TEMP[0].x, TEMP[11].xxxx 117: MOV TEMP[0].y, TEMP[12].xxxx 118: MOV TEMP[5].x, TEMP[5].xxxx 119: MOV TEMP[5].y, TEMP[9].xxxx 120: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 121: RCP TEMP[5].x, TEMP[3].xxxx 122: RCP TEMP[5].y, TEMP[3].yyyy 123: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 124: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 125: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 126: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].xxxx 127: ADD TEMP[0].xy, IMM[2].zzzz, -TEMP[0].xyyy 128: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 129: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 130: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 131: MOV TEMP[3].x, TEMP[5].xxxx 132: MOV TEMP[3].y, TEMP[5].yyyy 133: MOV TEMP[3].z, TEMP[2].xxxx 134: MOV TEMP[3].w, TEMP[2].yyyy 135: MUL TEMP[2].xyz, IN[2].xyzz, IMM[2].wwww 136: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].xxxx, IMM[3].yyyy 137: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].zzzz 138: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].zzzz 139: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].zzzz 140: FSLT TEMP[4].x, IMM[3].wwww, IN[2].xxxx 141: UIF TEMP[4].xxxx :0 142: MOV TEMP[4].x, TEMP[5].xxxx 143: ELSE :0 144: MOV TEMP[4].x, TEMP[2].xxxx 145: ENDIF 146: FSLT TEMP[6].x, IMM[3].wwww, IN[2].yyyy 147: UIF TEMP[6].xxxx :0 148: MOV TEMP[6].x, TEMP[5].yyyy 149: ELSE :0 150: MOV TEMP[6].x, TEMP[2].yyyy 151: ENDIF 152: FSLT TEMP[7].x, IMM[3].wwww, IN[2].zzzz 153: UIF TEMP[7].xxxx :0 154: MOV TEMP[5].x, TEMP[5].zzzz 155: ELSE :0 156: MOV TEMP[5].x, TEMP[2].zzzz 157: ENDIF 158: MOV TEMP[2].x, TEMP[4].xxxx 159: MOV TEMP[2].y, TEMP[6].xxxx 160: MOV TEMP[2].z, TEMP[5].xxxx 161: MOV TEMP[2].w, IN[2].wwww 162: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 163: ABS TEMP[4].x, IN[5].zzzz 164: FSLT TEMP[4].x, IMM[5].xxxx, TEMP[4].xxxx 165: UIF TEMP[4].xxxx :0 166: MOV TEMP[4].xyz, IMM[5].yzzy 167: ELSE :0 168: MOV TEMP[4].xyz, IMM[5].zzyz 169: ENDIF 170: MUL TEMP[5].xyz, IN[5].zxyy, TEMP[4].yzxx 171: MAD TEMP[4].xyz, IN[5].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 172: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 173: RSQ TEMP[5].x, TEMP[5].xxxx 174: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 175: MUL TEMP[5].xyz, IN[5].zxyy, TEMP[4].yzxx 176: MAD TEMP[5].xyz, IN[5].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 177: MOV TEMP[6].x, -CONST[5][4].xxxx 178: DP3 TEMP[7].x, IN[5].xyzz, IN[5].xyzz 179: RSQ TEMP[7].x, TEMP[7].xxxx 180: MUL TEMP[7].xyz, IN[5].xyzz, TEMP[7].xxxx 181: DP3 TEMP[7].x, TEMP[7].xyzz, CONST[4][20].xyzz 182: ABS TEMP[7].x, TEMP[7].xxxx 183: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx 184: ADD TEMP[6].x, CONST[5][4].yyyy, TEMP[6].xxxx 185: RCP TEMP[6].x, TEMP[6].xxxx 186: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 187: MOV_SAT TEMP[6].x, TEMP[6].xxxx 188: MOV TEMP[7].xyz, TEMP[2].xyzx 189: MUL TEMP[9].x, IMM[2].xxxx, TEMP[6].xxxx 190: ADD TEMP[9].x, IMM[0].wwww, -TEMP[9].xxxx 191: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[9].xxxx 192: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 193: ADD TEMP[6].x, IMM[2].zzzz, -TEMP[6].xxxx 194: MUL TEMP[2].x, TEMP[6].xxxx, TEMP[2].wwww 195: MOV TEMP[7].w, TEMP[2].xxxx 196: COS TEMP[2].x, IN[4].xxxx 197: SIN TEMP[6].x, IN[4].xxxx 198: MUL TEMP[9].xyz, TEMP[2].xxxx, TEMP[5].xyzz 199: MAD TEMP[9].xyz, TEMP[6].xxxx, TEMP[4].xyzz, TEMP[9].xyzz 200: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[5].xyzz 201: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, -TEMP[5].xyzz 202: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 203: MUL TEMP[5].x, TEMP[2].yyyy, TEMP[2].yyyy 204: MUL TEMP[6].x, TEMP[2].zzzz, TEMP[2].zzzz 205: MOV TEMP[10].yz, IMM[0].xxxx 206: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[4].xxxx 207: ADD TEMP[10].x, TEMP[11].xxxx, TEMP[4].xxxx 208: MOV TEMP[4].x, IMM[0].xxxx 209: MOV TEMP[11].y, IMM[0].xxxx 210: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[5].xxxx 211: ADD TEMP[11].x, TEMP[12].xxxx, TEMP[5].xxxx 212: MOV TEMP[4].yz, TEMP[11].yxyy 213: MOV TEMP[5].xy, IMM[0].xxxx 214: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[6].xxxx 215: ADD TEMP[6].x, TEMP[11].xxxx, TEMP[6].xxxx 216: MOV TEMP[5].z, TEMP[6].xxxx 217: DP3 TEMP[6].x, TEMP[9].xyzz, TEMP[10].xyzz 218: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[4].xyzz 219: MOV TEMP[6].y, TEMP[11].xxxx 220: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[5].xyzz 221: MOV TEMP[6].z, TEMP[9].xxxx 222: DP3 TEMP[9].x, IN[5].xyzz, TEMP[10].xyzz 223: DP3 TEMP[4].x, IN[5].xyzz, TEMP[4].xyzz 224: MOV TEMP[9].y, TEMP[4].xxxx 225: DP3 TEMP[4].x, IN[5].xyzz, TEMP[5].xyzz 226: MOV TEMP[9].z, TEMP[4].xxxx 227: ADD TEMP[4].x, CONST[5][3].zzzz, TEMP[0].yyyy 228: ADD TEMP[0].x, CONST[5][3].yyyy, TEMP[0].xxxx 229: MUL TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx 230: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 231: ADD TEMP[2].xyz, CONST[4][19].xyzz, -IN[3].xyzz 232: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 233: SQRT TEMP[2].x, TEMP[2].xxxx 234: MOV TEMP[4], TEMP[7] 235: MOV TEMP[5].x, IN[4].wwww 236: FSLT TEMP[6].x, IMM[0].xxxx, CONST[5][4].zzzz 237: UIF TEMP[6].xxxx :0 238: MUL TEMP[6].x, TEMP[2].xxxx, CONST[5][0].yyyy 239: FSLT TEMP[10].x, TEMP[6].xxxx, IN[4].wwww 240: ADD TEMP[6].x, IN[4].wwww, -TEMP[6].xxxx 241: ADD TEMP[11].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 242: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[2].xxxx 243: RCP TEMP[11].x, TEMP[11].xxxx 244: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx 245: ADD TEMP[6].x, IMM[2].zzzz, -TEMP[6].xxxx 246: MUL TEMP[6], TEMP[6].xxxx, TEMP[7] 247: MUL TEMP[11].x, TEMP[2].xxxx, CONST[5][0].zzzz 248: FSLT TEMP[11].x, TEMP[11].xxxx, IN[4].wwww 249: UIF TEMP[11].xxxx :0 250: MOV TEMP[11], IMM[0].xxxx 251: ELSE :0 252: MOV TEMP[11], TEMP[6] 253: ENDIF 254: UIF TEMP[10].xxxx :0 255: MOV TEMP[6], TEMP[11] 256: ELSE :0 257: MOV TEMP[6], TEMP[7] 258: ENDIF 259: MOV TEMP[4], TEMP[6] 260: MUL TEMP[6].x, TEMP[2].xxxx, CONST[5][0].xxxx 261: MAX TEMP[6].x, IN[4].wwww, TEMP[6].xxxx 262: MUL TEMP[2].x, TEMP[2].xxxx, CONST[5][0].wwww 263: MIN TEMP[5].x, TEMP[6].xxxx, TEMP[2].xxxx 264: ENDIF 265: FSLT TEMP[2].x, TEMP[4].wwww, IMM[5].wwww 266: UIF TEMP[2].xxxx :0 267: MOV TEMP[2].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[2].x, TEMP[5].xxxx 270: ENDIF 271: MOV TEMP[5].xy, IMM[0].xxxx 272: MOV TEMP[5].w, IMM[0].xxxx 273: TXL TEMP[5], TEMP[5], SAMP[1], 2D 274: MUL TEMP[5].xyz, TEMP[5], IMM[7].xxxx 275: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 276: ADD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xyzz 277: MOV TEMP[2].w, IMM[2].zzzz 278: MOV TEMP[2].x, TEMP[0].xxxx 279: MOV TEMP[2].y, TEMP[0].yyyy 280: MOV TEMP[2].z, TEMP[0].zzzz 281: MOV TEMP[5].x, CONST[4][32].xxxx 282: MOV TEMP[5].y, CONST[4][33].xxxx 283: MOV TEMP[5].z, CONST[4][34].xxxx 284: MOV TEMP[5].w, CONST[4][35].xxxx 285: DP4 TEMP[5].x, TEMP[2], TEMP[5] 286: MOV TEMP[6].x, CONST[4][32].yyyy 287: MOV TEMP[6].y, CONST[4][33].yyyy 288: MOV TEMP[6].z, CONST[4][34].yyyy 289: MOV TEMP[6].w, CONST[4][35].yyyy 290: DP4 TEMP[6].x, TEMP[2], TEMP[6] 291: MOV TEMP[7].x, CONST[4][32].wwww 292: MOV TEMP[7].y, CONST[4][33].wwww 293: MOV TEMP[7].z, CONST[4][34].wwww 294: MOV TEMP[7].w, CONST[4][35].wwww 295: DP4 TEMP[7].x, TEMP[2], TEMP[7] 296: MAD TEMP[10].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 297: MOV TEMP[11].w, IMM[2].zzzz 298: MOV TEMP[11].x, TEMP[10].xxxx 299: MOV TEMP[11].y, TEMP[10].yyyy 300: MOV TEMP[11].z, TEMP[10].zzzz 301: MOV TEMP[10].x, CONST[4][32].zzzz 302: MOV TEMP[10].y, CONST[4][33].zzzz 303: MOV TEMP[10].z, CONST[4][34].zzzz 304: MOV TEMP[10].w, CONST[4][35].zzzz 305: MOV TEMP[12].x, CONST[4][32].wwww 306: MOV TEMP[12].y, CONST[4][33].wwww 307: MOV TEMP[12].z, CONST[4][34].wwww 308: MOV TEMP[12].w, CONST[4][35].wwww 309: MOV TEMP[13].x, CONST[4][32].zzzz 310: MOV TEMP[13].y, CONST[4][33].zzzz 311: MOV TEMP[13].z, CONST[4][34].zzzz 312: MOV TEMP[13].w, CONST[4][35].zzzz 313: DP4 TEMP[10].x, TEMP[11], TEMP[10] 314: DP4 TEMP[11].x, TEMP[11], TEMP[12] 315: RCP TEMP[11].x, TEMP[11].xxxx 316: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 317: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[7].xxxx 318: DP4 TEMP[2].x, TEMP[2], TEMP[13] 319: MIN TEMP[2].x, IMM[7].yyyy, TEMP[2].xxxx 320: MAX TEMP[2].x, TEMP[10].xxxx, TEMP[2].xxxx 321: MOV TEMP[10].xyz, -CONST[4][19].xyzx 322: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[10].xyzz 323: MOV TEMP[12].x, TEMP[0].xxxx 324: MOV TEMP[12].y, TEMP[0].yyyy 325: MOV TEMP[12].z, TEMP[0].zzzz 326: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 327: MOV TEMP[12].w, TEMP[13].xxxx 328: MOV TEMP[13].x, TEMP[5].xxxx 329: MOV TEMP[13].y, TEMP[6].xxxx 330: MOV TEMP[14].x, -CONST[4][22].wwww 331: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 332: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 333: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 334: RCP TEMP[14].x, TEMP[14].xxxx 335: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 336: MOV TEMP[13].z, TEMP[11].xxxx 337: MOV TEMP[13].w, TEMP[7].xxxx 338: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[10].xyzz 339: DP3 TEMP[0].x, TEMP[9].xyzz, TEMP[0].xyzz 340: FSLT TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 341: UIF TEMP[0].xxxx :0 342: MOV TEMP[0].xyz, TEMP[9].xyzx 343: ELSE :0 344: MOV TEMP[0].xyz, -TEMP[9].xyzx 345: ENDIF 346: MOV TEMP[5].x, TEMP[5].xxxx 347: MOV TEMP[5].y, -TEMP[6].xxxx 348: MAD TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx, -TEMP[7].xxxx 349: MOV TEMP[5].z, TEMP[2].xxxx 350: MOV TEMP[5].w, TEMP[7].xxxx 351: MOV OUT[1], TEMP[1] 352: MOV OUT[6].xyz, TEMP[0].xyzx 353: MOV OUT[2], TEMP[3] 354: MOV OUT[3], TEMP[4] 355: MOV OUT[4], TEMP[13] 356: MOV OUT[0], TEMP[5] 357: MOV OUT[5], TEMP[12] 358: MOV OUT[7].xy, TEMP[8].xyxx 359: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 40) %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 48) %50 = call float @llvm.SI.load.const(<16 x i8> %44, i32 52) %51 = call float @llvm.SI.load.const(<16 x i8> %44, i32 56) %52 = call float @llvm.SI.load.const(<16 x i8> %44, i32 64) %53 = call float @llvm.SI.load.const(<16 x i8> %44, i32 68) %54 = call float @llvm.SI.load.const(<16 x i8> %44, i32 72) %55 = call float @llvm.SI.load.const(<16 x i8> %44, i32 96) %56 = call float @llvm.SI.load.const(<16 x i8> %44, i32 100) %57 = call float @llvm.SI.load.const(<16 x i8> %44, i32 104) %58 = call float @llvm.SI.load.const(<16 x i8> %44, i32 108) %59 = call float @llvm.SI.load.const(<16 x i8> %44, i32 112) %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)* %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)* %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %10, %6 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 2 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %10, %6 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %10, %6 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %10, %6 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 3 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %10, %6 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fmul float %74, 5.000000e-01 %111 = fadd float %110, 5.000000e-01 %112 = fmul float %75, -5.000000e-01 %113 = fadd float %112, 5.000000e-01 %114 = fdiv float 1.000000e+00, %14 %115 = fmul float %80, 3.000000e+00 %116 = fcmp olt float %80, %16 %. = select i1 %116, float %115, float 0.000000e+00 %117 = fmul float %15, %. %118 = fadd float %117, %13 %119 = fmul float %14, 5.000000e-01 %120 = fmul float %81, %47 %121 = fmul float %119, %120 %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = call float @floor(float %121) %124 = fmul float %123, 2.000000e+00 %125 = fadd float %14, -2.000000e+00 %126 = call float @llvm.minnum.f32(float %125, float %124) %127 = fdiv float 1.000000e+00, %14 %128 = fmul float %124, %127 %129 = call float @floor(float %128) %130 = fmul float %14, %129 %131 = fsub float %124, %130 %132 = bitcast float %118 to i32 %133 = insertelement <4 x i32> , i32 %132, i32 1 %134 = insertelement <4 x i32> %133, i32 0, i32 2 %135 = bitcast <8 x i32> %61 to <32 x i8> %136 = bitcast <4 x i32> %63 to <16 x i8> %137 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %135, <16 x i8> %136, i32 2) %138 = extractelement <4 x float> %137, i32 2 %139 = fcmp ogt float %138, 0.000000e+00 %temp16.0 = select i1 %139, float %126, float %131 %140 = fmul float %114, %temp16.0 %141 = bitcast float %140 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <4 x i32> undef, i32 %141, i32 0 %144 = insertelement <4 x i32> %143, i32 %142, i32 1 %145 = insertelement <4 x i32> %144, i32 0, i32 2 %146 = bitcast <8 x i32> %61 to <32 x i8> %147 = bitcast <4 x i32> %63 to <16 x i8> %148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %145, <32 x i8> %146, <16 x i8> %147, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = extractelement <4 x float> %148, i32 3 %151 = call float @llvm.AMDGPU.lrp(float %122, float %150, float %149) %152 = fadd float %118, %15 %153 = bitcast float %140 to i32 %154 = bitcast float %152 to i32 %155 = insertelement <4 x i32> undef, i32 %153, i32 0 %156 = insertelement <4 x i32> %155, i32 %154, i32 1 %157 = insertelement <4 x i32> %156, i32 0, i32 2 %158 = bitcast <8 x i32> %61 to <32 x i8> %159 = bitcast <4 x i32> %63 to <16 x i8> %160 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = extractelement <4 x float> %160, i32 3 %165 = fmul float %15, 2.000000e+00 %166 = fadd float %165, %118 %167 = bitcast float %140 to i32 %168 = bitcast float %166 to i32 %169 = insertelement <4 x i32> undef, i32 %167, i32 0 %170 = insertelement <4 x i32> %169, i32 %168, i32 1 %171 = insertelement <4 x i32> %170, i32 0, i32 2 %172 = bitcast <8 x i32> %61 to <32 x i8> %173 = bitcast <4 x i32> %63 to <16 x i8> %174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = extractelement <4 x float> %174, i32 3 %179 = fadd float %114, %140 %180 = bitcast float %179 to i32 %181 = bitcast float %152 to i32 %182 = insertelement <4 x i32> undef, i32 %180, i32 0 %183 = insertelement <4 x i32> %182, i32 %181, i32 1 %184 = insertelement <4 x i32> %183, i32 0, i32 2 %185 = bitcast <8 x i32> %61 to <32 x i8> %186 = bitcast <4 x i32> %63 to <16 x i8> %187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %184, <32 x i8> %185, <16 x i8> %186, i32 2) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = extractelement <4 x float> %187, i32 3 %192 = fsub float %190, %188 %193 = fsub float %191, %189 %194 = fsub float %163, %161 %195 = fsub float %164, %162 %196 = fdiv float 1.000000e+00, %192 %197 = fdiv float 1.000000e+00, %193 %198 = fmul float %194, %196 %199 = fmul float %195, %197 %200 = fmul float %188, %198 %201 = fmul float %189, %199 %202 = fsub float %161, %200 %203 = fsub float %162, %201 %204 = fadd float %202, %198 %205 = fadd float %203, %199 %206 = fsub float %177, %175 %207 = fsub float %178, %176 %208 = fdiv float 1.000000e+00, %192 %209 = fdiv float 1.000000e+00, %193 %210 = fmul float %206, %208 %211 = fmul float %207, %209 %212 = fmul float %188, %210 %213 = fmul float %189, %211 %214 = fsub float %175, %212 %215 = fsub float %176, %213 %216 = fadd float %214, %210 %217 = fadd float %215, %211 %218 = fmul float %151, 5.000000e-01 %219 = fadd float %218, 5.000000e-01 %220 = fadd float %111, -5.000000e-01 %221 = fadd float %113, -5.000000e-01 %222 = fadd float %111, -5.000000e-01 %223 = fadd float %113, -5.000000e-01 %224 = fdiv float 1.000000e+00, %48 %225 = call float @llvm.AMDIL.clamp.(float %224, float 0.000000e+00, float 1.000000e+00) %226 = fmul float %220, %225 %227 = fadd float %226, 5.000000e-01 %228 = fmul float %221, %225 %229 = fadd float %228, 5.000000e-01 %230 = fmul float %222, %225 %231 = fadd float %230, 5.000000e-01 %232 = fmul float %223, %225 %233 = fadd float %232, 5.000000e-01 %234 = call float @llvm.AMDGPU.lrp(float %219, float %227, float %111) %235 = call float @llvm.AMDGPU.lrp(float %219, float %229, float %113) %236 = call float @llvm.AMDGPU.lrp(float %218, float %231, float %111) %237 = call float @llvm.AMDGPU.lrp(float %218, float %233, float %113) %238 = fmul float %57, %234 %239 = fadd float %238, %55 %240 = fmul float %58, %235 %241 = fadd float %240, %56 %242 = fmul float %57, %236 %243 = fadd float %242, %55 %244 = fmul float %58, %237 %245 = fadd float %244, %56 %246 = fsub float %55, %239 %247 = fsub float %56, %241 %248 = fsub float %55, %243 %249 = fsub float %56, %245 %250 = fmul float %246, 2.000000e+00 %251 = fadd float %250, %57 %252 = fmul float %247, 2.000000e+00 %253 = fadd float %252, %58 %254 = fmul float %248, 2.000000e+00 %255 = fadd float %254, %57 %256 = fmul float %249, 2.000000e+00 %257 = fadd float %256, %58 %258 = fcmp olt float %251, 0.000000e+00 %.96 = select i1 %258, float %190, float %188 %259 = fcmp olt float %253, 0.000000e+00 %temp48.0 = select i1 %259, float %191, float %189 %260 = fsub float %.96, %239 %261 = fsub float %temp48.0, %241 %262 = fdiv float 1.000000e+00, %251 %263 = fdiv float 1.000000e+00, %253 %264 = fmul float %260, %262 %265 = fmul float %261, %263 %266 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %267 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %268 = fmul float %266, %251 %269 = fadd float %268, %239 %270 = fmul float %267, %253 %271 = fadd float %270, %241 %272 = fmul float %266, %255 %273 = fadd float %272, %243 %274 = fmul float %267, %257 %275 = fadd float %274, %245 %276 = fmul float %266, 2.000000e+00 %277 = fmul float %267, 2.000000e+00 %278 = fsub float 1.000000e+00, %276 %279 = fsub float 1.000000e+00, %277 %280 = fmul float %278, %74 %281 = fmul float %279, %75 %282 = call float @llvm.AMDGPU.lrp(float %269, float %204, float %202) %283 = call float @llvm.AMDGPU.lrp(float %271, float %205, float %203) %284 = call float @llvm.AMDGPU.lrp(float %273, float %216, float %214) %285 = call float @llvm.AMDGPU.lrp(float %275, float %217, float %215) %286 = fmul float %86, 0x3FB3D07220000000 %287 = fmul float %87, 0x3FB3D07220000000 %288 = fmul float %88, 0x3FB3D07220000000 %289 = fmul float %86, 0x3FEE54EDE0000000 %290 = fadd float %289, 0x3FAAB12320000000 %291 = fmul float %87, 0x3FEE54EDE0000000 %292 = fadd float %291, 0x3FAAB12320000000 %293 = fmul float %88, 0x3FEE54EDE0000000 %294 = fadd float %293, 0x3FAAB12320000000 %295 = call float @llvm.pow.f32(float %290, float 0x4003333340000000) %296 = call float @llvm.pow.f32(float %292, float 0x4003333340000000) %297 = call float @llvm.pow.f32(float %294, float 0x4003333340000000) %298 = fcmp ogt float %86, 0x3FA4B5DCC0000000 %.97 = select i1 %298, float %295, float %286 %299 = fcmp ogt float %87, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %299, float %296, float %287 %300 = fcmp ogt float %88, 0x3FA4B5DCC0000000 %.98 = select i1 %300, float %297, float %288 %301 = call float @llvm.AMDGPU.lrp(float %59, float %.97, float %86) %302 = call float @llvm.AMDGPU.lrp(float %59, float %temp24.0, float %87) %303 = call float @llvm.AMDGPU.lrp(float %59, float %.98, float %88) %304 = call float @llvm.AMDGPU.lrp(float %59, float %89, float %89) %305 = call float @fabs(float %109) %306 = fcmp ogt float %305, 0x3FECCCCCC0000000 %temp16.2 = select i1 %306, float 1.000000e+00, float 0.000000e+00 %temp18.0 = select i1 %306, float 0.000000e+00, float 1.000000e+00 %307 = fmul float %109, 0.000000e+00 %308 = fmul float %107, %temp18.0 %309 = fmul float %108, %temp16.2 %310 = fmul float %108, %temp18.0 %311 = fsub float %310, %307 %312 = fmul float %109, %temp16.2 %313 = fsub float %312, %308 %314 = fmul float %107, 0.000000e+00 %315 = fsub float %314, %309 %316 = fmul float %311, %311 %317 = fmul float %313, %313 %318 = fadd float %317, %316 %319 = fmul float %315, %315 %320 = fadd float %318, %319 %321 = call float @llvm.AMDGPU.rsq.clamped.f32(float %320) %322 = fmul float %311, %321 %323 = fmul float %313, %321 %324 = fmul float %315, %321 %325 = fmul float %109, %323 %326 = fmul float %107, %324 %327 = fmul float %108, %322 %328 = fmul float %108, %324 %329 = fsub float %328, %325 %330 = fmul float %109, %322 %331 = fsub float %330, %326 %332 = fmul float %107, %323 %333 = fsub float %332, %327 %334 = fmul float %107, %107 %335 = fmul float %108, %108 %336 = fadd float %335, %334 %337 = fmul float %109, %109 %338 = fadd float %336, %337 %339 = call float @llvm.AMDGPU.rsq.clamped.f32(float %338) %340 = fmul float %107, %339 %341 = fmul float %108, %339 %342 = fmul float %109, %339 %343 = fmul float %340, %22 %344 = fmul float %341, %23 %345 = fadd float %344, %343 %346 = fmul float %342, %24 %347 = fadd float %345, %346 %348 = call float @fabs(float %347) %349 = fsub float %348, %52 %350 = fsub float %53, %52 %351 = fdiv float 1.000000e+00, %350 %352 = fmul float %349, %351 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fmul float %353, 2.000000e+00 %355 = fsub float 3.000000e+00, %354 %356 = fmul float %353, %355 %357 = fmul float %353, %356 %358 = fsub float 1.000000e+00, %357 %359 = fmul float %358, %304 %360 = call float @llvm.cos.f32(float %101) %361 = call float @llvm.sin.f32(float %101) %362 = fmul float %360, %329 %363 = fmul float %360, %331 %364 = fmul float %360, %333 %365 = fmul float %361, %322 %366 = fadd float %365, %362 %367 = fmul float %361, %323 %368 = fadd float %367, %363 %369 = fmul float %361, %324 %370 = fadd float %369, %364 %371 = fmul float %361, %329 %372 = fmul float %361, %331 %373 = fmul float %361, %333 %374 = fmul float %360, %322 %375 = fsub float %374, %371 %376 = fmul float %360, %323 %377 = fsub float %376, %372 %378 = fmul float %360, %324 %379 = fsub float %378, %373 %380 = fmul float %375, %375 %381 = fmul float %377, %377 %382 = fmul float %379, %379 %383 = fsub float 1.000000e+00, %380 %384 = fadd float %383, %380 %385 = fsub float 1.000000e+00, %381 %386 = fadd float %385, %381 %387 = fsub float 1.000000e+00, %382 %388 = fadd float %387, %382 %389 = fmul float %366, %384 %390 = fmul float %368, 0.000000e+00 %391 = fadd float %390, %389 %392 = fmul float %370, 0.000000e+00 %393 = fadd float %391, %392 %394 = fmul float %366, 0.000000e+00 %395 = fmul float %368, %386 %396 = fadd float %395, %394 %397 = fmul float %370, 0.000000e+00 %398 = fadd float %396, %397 %399 = fmul float %366, 0.000000e+00 %400 = fmul float %368, 0.000000e+00 %401 = fadd float %400, %399 %402 = fmul float %370, %388 %403 = fadd float %401, %402 %404 = fmul float %107, %384 %405 = fmul float %108, 0.000000e+00 %406 = fadd float %405, %404 %407 = fmul float %109, 0.000000e+00 %408 = fadd float %406, %407 %409 = fmul float %107, 0.000000e+00 %410 = fmul float %108, %386 %411 = fadd float %410, %409 %412 = fmul float %109, 0.000000e+00 %413 = fadd float %411, %412 %414 = fmul float %107, 0.000000e+00 %415 = fmul float %108, 0.000000e+00 %416 = fadd float %415, %414 %417 = fmul float %109, %388 %418 = fadd float %416, %417 %419 = fadd float %51, %281 %420 = fadd float %50, %280 %421 = fmul float %393, %420 %422 = fmul float %398, %420 %423 = fmul float %403, %420 %424 = fmul float %419, %375 %425 = fadd float %424, %421 %426 = fmul float %419, %377 %427 = fadd float %426, %422 %428 = fmul float %419, %379 %429 = fadd float %428, %423 %430 = fsub float %19, %94 %431 = fsub float %20, %95 %432 = fsub float %21, %96 %433 = fmul float %430, %430 %434 = fmul float %431, %431 %435 = fadd float %434, %433 %436 = fmul float %432, %432 %437 = fadd float %435, %436 %438 = call float @llvm.sqrt.f32(float %437) %439 = fcmp ogt float %54, 0.000000e+00 br i1 %439, label %IF82, label %ENDIF81 IF82: ; preds = %main_body %440 = call float @llvm.SI.load.const(<16 x i8> %44, i32 12) %441 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %442 = fmul float %438, %45 %443 = fcmp olt float %442, %102 %444 = fsub float %102, %442 %445 = fsub float %46, %45 %446 = fmul float %445, %438 %447 = fdiv float 1.000000e+00, %446 %448 = fmul float %444, %447 %449 = fsub float 1.000000e+00, %448 %450 = fmul float %449, %301 %451 = fmul float %449, %302 %452 = fmul float %449, %303 %453 = fmul float %449, %359 %454 = fmul float %438, %46 %455 = fcmp olt float %454, %102 %.99 = select i1 %455, float 0.000000e+00, float %450 %.100 = select i1 %455, float 0.000000e+00, float %451 %.101 = select i1 %455, float 0.000000e+00, float %452 %.102 = select i1 %455, float 0.000000e+00, float %453 %.99. = select i1 %443, float %.99, float %301 %.100. = select i1 %443, float %.100, float %302 %.101. = select i1 %443, float %.101, float %303 %.102. = select i1 %443, float %.102, float %359 %456 = fmul float %438, %441 %457 = call float @llvm.maxnum.f32(float %102, float %456) %458 = fmul float %438, %440 %459 = call float @llvm.minnum.f32(float %457, float %458) br label %ENDIF81 ENDIF81: ; preds = %main_body, %IF82 %temp16.3 = phi float [ %.99., %IF82 ], [ %301, %main_body ] %temp17.1 = phi float [ %.100., %IF82 ], [ %302, %main_body ] %temp18.1 = phi float [ %.101., %IF82 ], [ %303, %main_body ] %temp19.0 = phi float [ %.102., %IF82 ], [ %359, %main_body ] %temp20.1 = phi float [ %459, %IF82 ], [ %102, %main_body ] %460 = fcmp olt float %temp19.0, 0x3F70101060000000 %.temp20.1 = select i1 %460, float 0.000000e+00, float %temp20.1 %461 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %66, <16 x i8> %69, i32 2) %462 = extractelement <4 x float> %461, i32 0 %463 = extractelement <4 x float> %461, i32 1 %464 = extractelement <4 x float> %461, i32 2 %465 = fmul float %462, 0x3E7AD7F2A0000000 %466 = fmul float %463, 0x3E7AD7F2A0000000 %467 = fmul float %464, 0x3E7AD7F2A0000000 %468 = fmul float %.temp20.1, %425 %469 = fadd float %468, %94 %470 = fmul float %.temp20.1, %427 %471 = fadd float %470, %95 %472 = fmul float %.temp20.1, %429 %473 = fadd float %472, %96 %474 = fadd float %465, %469 %475 = fadd float %466, %471 %476 = fadd float %467, %473 %477 = fmul float %474, %27 %478 = fmul float %475, %31 %479 = fadd float %477, %478 %480 = fmul float %476, %35 %481 = fadd float %479, %480 %482 = fadd float %481, %39 %483 = fmul float %474, %28 %484 = fmul float %475, %32 %485 = fadd float %483, %484 %486 = fmul float %476, %36 %487 = fadd float %485, %486 %488 = fadd float %487, %40 %489 = fmul float %474, %30 %490 = fmul float %475, %34 %491 = fadd float %489, %490 %492 = fmul float %476, %38 %493 = fadd float %491, %492 %494 = fadd float %493, %42 %495 = fmul float %22, %49 %496 = fadd float %495, %474 %497 = fmul float %23, %49 %498 = fadd float %497, %475 %499 = fmul float %24, %49 %500 = fadd float %499, %476 %501 = fmul float %496, %29 %502 = fmul float %498, %33 %503 = fadd float %501, %502 %504 = fmul float %500, %37 %505 = fadd float %503, %504 %506 = fadd float %505, %41 %507 = fmul float %496, %30 %508 = fmul float %498, %34 %509 = fadd float %507, %508 %510 = fmul float %500, %38 %511 = fadd float %509, %510 %512 = fadd float %511, %42 %513 = fdiv float 1.000000e+00, %512 %514 = fmul float %506, %513 %515 = fmul float %514, %494 %516 = fmul float %474, %29 %517 = fmul float %475, %33 %518 = fadd float %516, %517 %519 = fmul float %476, %37 %520 = fadd float %518, %519 %521 = fadd float %520, %41 %522 = call float @llvm.minnum.f32(float %521, float 0x3F50624DE0000000) %523 = call float @llvm.maxnum.f32(float %515, float %522) %524 = fsub float %474, %19 %525 = fsub float %475, %20 %526 = fsub float %476, %21 %527 = fmul float %22, %524 %528 = fmul float %23, %525 %529 = fadd float %528, %527 %530 = fmul float %24, %526 %531 = fadd float %529, %530 %532 = fmul float %524, %22 %533 = fmul float %525, %23 %534 = fadd float %533, %532 %535 = fmul float %526, %24 %536 = fadd float %534, %535 %537 = fsub float %536, %25 %538 = fsub float %26, %25 %539 = fdiv float 1.000000e+00, %538 %540 = fmul float %537, %539 %541 = fsub float %474, %19 %542 = fsub float %475, %20 %543 = fsub float %476, %21 %544 = fmul float %408, %541 %545 = fmul float %413, %542 %546 = fadd float %545, %544 %547 = fmul float %418, %543 %548 = fadd float %546, %547 %549 = fcmp olt float %548, 0.000000e+00 br i1 %549, label %ENDIF93, label %ELSE95 ELSE95: ; preds = %ENDIF81 %550 = fsub float -0.000000e+00, %408 %551 = fsub float -0.000000e+00, %413 %552 = fsub float -0.000000e+00, %418 br label %ENDIF93 ENDIF93: ; preds = %ENDIF81, %ELSE95 %temp.0 = phi float [ %550, %ELSE95 ], [ %408, %ENDIF81 ] %temp1.0 = phi float [ %551, %ELSE95 ], [ %413, %ENDIF81 ] %temp2.0 = phi float [ %552, %ELSE95 ], [ %418, %ENDIF81 ] %553 = fsub float -0.000000e+00, %488 %554 = fmul float %523, 2.000000e+00 %555 = fsub float %554, %494 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %282, float %283, float %284, float %285) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp16.3, float %temp17.1, float %temp18.1, float %temp19.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %482, float %488, float %540, float %494) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %474, float %475, float %476, float %531) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp.0, float %temp1.0, float %temp2.0, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %151, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %482, float %553, float %555, float %494) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v7, s11, v3 ; 4A0E060B s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 v_mov_b32_e32 v18, 0x40400000 ; 7E2402FF 40400000 v_mov_b32_e32 v19, 0x3f666666 ; 7E2602FF 3F666666 v_mov_b32_e32 v20, 0x3d558919 ; 7E2802FF 3D558919 v_mov_b32_e32 v15, 0x3f72a76f ; 7E1E02FF 3F72A76F v_mov_b32_e32 v16, 0x3d9e8391 ; 7E2002FF 3D9E8391 v_mov_b32_e32 v17, 0x3d25aee6 ; 7E2202FF 3D25AEE6 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_buffer_load_dword s36, s[12:15], 0x1 ; C2120D01 s_buffer_load_dword s11, s[12:15], 0x2 ; C2058D02 s_buffer_load_dword s1, s[12:15], 0x3 ; C2008D03 s_load_dwordx4 s[24:27], s[8:9], 0x0 ; C08C0900 s_load_dwordx4 s[28:31], s[8:9], 0x4 ; C08E0904 s_load_dwordx4 s[32:35], s[8:9], 0x8 ; C0900908 s_load_dwordx4 s[40:43], s[8:9], 0xc ; C094090C s_load_dwordx4 s[44:47], s[8:9], 0x10 ; C0960910 s_buffer_load_dword s37, s[20:23], 0x8 ; C2129508 s_buffer_load_dword s10, s[20:23], 0xa ; C205150A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v21, 0.5, s36 ; D2100015 000048F0 v_add_f32_e64 v22, -2.0, s36 ; D2060016 000048F5 s_buffer_load_dword s12, s[20:23], 0xc ; C206150C buffer_load_format_xyzw v[11:14], v0, s[24:27], 0 idxen ; E00C2000 80060B00 buffer_load_format_xyzw v[23:26], v7, s[28:31], 0 idxen ; E00C2000 80071707 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[26:29], v7, s[32:35], 0 idxen ; E00C2000 80081A07 s_buffer_load_dword s14, s[20:23], 0xd ; C207150D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s15, s[20:23], 0x1c ; C207951C s_load_dwordx4 s[48:51], s[8:9], 0x14 ; C0980914 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 buffer_load_format_xyzw v[0:3], v7, s[40:43], 0 idxen ; E00C2000 800A0007 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v7, s[44:47], 0 idxen ; E00C2000 800B0307 v_rcp_f32_e32 v30, s36 ; 7E3C5424 v_mov_b32_e32 v38, s0 ; 7E4C0200 v_sub_f32_e64 v14, 1.0, s15 ; D208000E 00001EF2 buffer_load_format_xyzw v[7:10], v7, s[48:51], 0 idxen ; E00C2000 800C0707 v_cmp_gt_f32_e64 s[8:9], s1, v23 ; D0080008 00022E01 v_cmp_gt_f32_e64 s[2:3], v26, v17 ; D0080002 0002231A v_cmp_gt_f32_e64 s[0:1], v27, v17 ; D0080000 0002231B v_cmp_gt_f32_e32 vcc, v28, v17 ; 7C08231C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, s37, v25 ; 10143225 v_mul_f32_e32 v4, v18, v23 ; 10082F12 v_cndmask_b32_e64 v4, 0, v4, s[8:9] ; D2000004 00220880 v_mac_f32_e32 v38, s11, v4 ; 3E4C080B v_mov_b32_e32 v37, 0 ; 7E4A0280 v_mov_b32_e32 v39, v37 ; 7E4E0325 image_sample_l v4, 4, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[28:35], s[24:27] ; F0900400 00C70425 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e64 s[8:9], 0, v4 ; D0020008 00020880 v_mul_f32_e32 v4, v10, v21 ; 10082B0A v_floor_f32_e32 v17, v4 ; 7E224904 v_add_f32_e32 v4, v17, v17 ; 06082311 v_min_f32_e32 v5, v4, v22 ; 1E0A2D04 v_mul_f32_e32 v13, v30, v4 ; 101A091E v_floor_f32_e32 v13, v13 ; 7E1A490D v_mad_f32 v4, -s36, v13, v4 ; D2820004 24121A24 v_cndmask_b32_e64 v4, v4, v5, s[8:9] ; D2000004 00220B04 v_mul_f32_e32 v35, v4, v30 ; 10463D04 v_mad_f32 v23, v15, v26, v20 ; D2820017 0452350F v_mad_f32 v24, v15, v27, v20 ; D2820018 0452370F v_mac_f32_e32 v20, v15, v28 ; 3E28390F v_mov_b32_e32 v36, v38 ; 7E480326 v_add_f32_e32 v31, s11, v38 ; 063E4C0B image_sample_l v[33:34], 9, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[24:27] ; F0900900 00C72123 v_mov_b32_e32 v36, v31 ; 7E48031F v_mac_f32_e32 v30, v4, v30 ; 3E3C3D04 image_sample_l v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[24:27] ; F0900F00 00C72723 v_mov_b32_e32 v36, v38 ; 7E480326 v_mac_f32_e64 v36, 2.0, s11 ; D23E0024 000016F4 v_mul_f32_e32 v25, v16, v26 ; 10323510 v_mul_f32_e32 v38, v16, v27 ; 104C3710 v_mul_f32_e32 v43, v16, v28 ; 10563910 v_mul_f32_e32 v4, v26, v14 ; 10081D1A v_mul_f32_e32 v5, v27, v14 ; 100A1D1B v_mul_f32_e32 v13, v28, v14 ; 101A1D1C v_mul_f32_e32 v26, v29, v14 ; 10341D1D v_mac_f32_e32 v26, s15, v29 ; 3E343A0F v_mov_b32_e32 v32, v37 ; 7E400325 image_sample_l v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[24:27] ; F0900F00 00C72C23 s_waitcnt vmcnt(2) ; BF8C0772 image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[28:35], s[24:27] ; F0900F00 00C71B1E s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v14, v27, v29 ; 0A1C3B1B v_rcp_f32_e32 v16, v14 ; 7E20550E v_subrev_f32_e32 v14, v28, v30 ; 0A1C3D1C v_rcp_f32_e32 v31, v14 ; 7E3E550E v_subrev_f32_e32 v14, v39, v41 ; 0A1C5327 v_mul_f32_e32 v15, v16, v14 ; 101E1D10 v_mad_f32 v15, -v27, v15, v39 ; D282000F 249E1F1B v_subrev_f32_e32 v32, v40, v42 ; 0A405528 v_mul_f32_e32 v22, v31, v32 ; 102C411F v_mad_f32 v35, -v28, v22, v40 ; D2820023 24A22D1C v_subrev_f32_e32 v36, v44, v46 ; 0A485D2C v_mul_f32_e32 v22, v16, v36 ; 102C4910 v_mad_f32 v37, -v27, v22, v44 ; D2820025 24B22D1B v_subrev_f32_e32 v39, v45, v47 ; 0A4E5F2D v_mul_f32_e32 v22, v31, v39 ; 102C4F1F v_mad_f32 v40, -v28, v22, v45 ; D2820028 24B62D1C v_mad_f32 v17, v21, v10, -v17 ; D2820011 84461515 v_rcp_f32_e32 v21, s10 ; 7E2A540A v_sub_f32_e32 v10, 1.0, v17 ; 081422F2 v_mul_f32_e32 v10, v33, v10 ; 10141521 v_mac_f32_e32 v10, v34, v17 ; 3E142322 v_add_f32_e64 v17, 0, v21 clamp ; D2060811 00022A80 v_mad_f32 v21, 0.5, v11, 0.5 ; D2820015 03C216F0 v_add_f32_e32 v22, -0.5, v21 ; 062C2AF1 v_mad_f32 v22, v22, v17, 0.5 ; D2820016 03C22316 s_buffer_load_dword s24, s[20:23], 0x1a ; C20C151A s_buffer_load_dword s25, s[20:23], 0x1b ; C20C951B s_buffer_load_dword s26, s[20:23], 0x18 ; C20D1518 s_buffer_load_dword s27, s[20:23], 0x19 ; C20D9519 v_mad_f32 v33, -0.5, v12, 0.5 ; D2820021 03C218F1 v_add_f32_e32 v34, -0.5, v33 ; 064442F1 v_mad_f32 v17, v34, v17, 0.5 ; D2820011 03C22322 v_mad_f32 v34, 0.5, v10, 0.5 ; D2820022 03C214F0 v_sub_f32_e32 v41, 1.0, v34 ; 085244F2 v_mul_f32_e32 v42, v21, v41 ; 10545315 v_mul_f32_e32 v41, v33, v41 ; 10525321 v_mac_f32_e32 v42, v22, v34 ; 3E544516 v_mac_f32_e32 v41, v17, v34 ; 3E524511 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v34, s26 ; 7E44021A v_mad_f32 v34, s24, v42, v34 ; D2820022 048A5418 v_mov_b32_e32 v42, s27 ; 7E54021B v_mad_f32 v41, s25, v41, v42 ; D2820029 04AA5219 v_sub_f32_e32 v42, s26, v34 ; 0854441A v_mad_f32 v42, 2.0, v42, s24 ; D282002A 006254F4 v_sub_f32_e32 v44, s27, v41 ; 0858521B v_mad_f32 v44, 2.0, v44, s25 ; D282002C 006658F4 v_cmp_gt_f32_e64 s[8:9], 0, v42 ; D0080008 00025480 v_cmp_gt_f32_e64 s[10:11], 0, v44 ; D008000A 00025880 v_cndmask_b32_e64 v27, v27, v29, s[8:9] ; D200001B 00223B1B v_cndmask_b32_e64 v28, v28, v30, s[10:11] ; D200001C 002A3D1C v_mad_f32 v29, 0.5, -v10, 1.0 ; D282001D 43CA14F0 v_mul_f32_e32 v30, v21, v29 ; 103C3B15 v_mul_f32_e32 v29, v33, v29 ; 103A3B21 v_mul_f32_e32 v21, 0.5, v10 ; 102A14F0 v_rcp_f32_e32 v33, v42 ; 7E42552A v_mac_f32_e32 v30, v22, v21 ; 3E3C2B16 v_mac_f32_e32 v29, v17, v21 ; 3E3A2B11 v_subrev_f32_e32 v17, v34, v27 ; 0A223722 v_mul_f32_e32 v17, v33, v17 ; 10222321 v_add_f32_e64 v21, 0, v17 clamp ; D2060815 00022280 v_mac_f32_e32 v34, v42, v21 ; 3E442B2A v_mad_f32 v17, v16, v14, v15 ; D2820011 043E1D10 v_rcp_f32_e32 v22, v44 ; 7E2C552C v_sub_f32_e32 v14, 1.0, v34 ; 081C44F2 v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_mac_f32_e32 v14, v17, v34 ; 3E1C4511 v_subrev_f32_e32 v15, v41, v28 ; 0A1E3929 v_mul_f32_e32 v15, v22, v15 ; 101E1F16 v_add_f32_e64 v22, 0, v15 clamp ; D2060816 00021E80 v_mac_f32_e32 v41, v44, v22 ; 3E522D2C v_mad_f32 v17, v31, v32, v35 ; D2820011 048E411F v_sub_f32_e32 v15, 1.0, v41 ; 081E52F2 v_mul_f32_e32 v15, v35, v15 ; 101E1F23 v_mac_f32_e32 v15, v17, v41 ; 3E1E5311 v_mov_b32_e32 v17, s26 ; 7E22021A v_mad_f32 v17, s24, v30, v17 ; D2820011 04463C18 v_mad_f32 v27, v16, v36, v37 ; D282001B 04964910 v_sub_f32_e32 v16, s26, v17 ; 0820221A v_mad_f32 v16, 2.0, v16, s24 ; D2820010 006220F4 v_mac_f32_e32 v17, v16, v21 ; 3E222B10 v_sub_f32_e32 v16, 1.0, v17 ; 082022F2 v_mul_f32_e32 v16, v37, v16 ; 10202125 v_mac_f32_e32 v16, v27, v17 ; 3E20231B v_mov_b32_e32 v17, s27 ; 7E22021B v_mad_f32 v27, s25, v29, v17 ; D282001B 04463A19 v_mad_f32 v28, v31, v39, v40 ; D282001C 04A24F1F v_sub_f32_e32 v17, s27, v27 ; 0822361B v_mad_f32 v17, 2.0, v17, s25 ; D2820011 006622F4 v_mac_f32_e32 v27, v17, v22 ; 3E362D11 v_sub_f32_e32 v17, 1.0, v27 ; 082236F2 v_mul_f32_e32 v17, v40, v17 ; 10222328 v_mac_f32_e32 v17, v28, v27 ; 3E22371C v_cmp_gt_f32_e64 s[10:11], |v9|, v19 ; D008010A 00022709 v_log_f32_e32 v19, v23 ; 7E264F17 v_log_f32_e32 v23, v24 ; 7E2E4F18 v_log_f32_e32 v20, v20 ; 7E284F14 v_mov_b32_e32 v24, 0x4019999a ; 7E3002FF 4019999A v_mul_legacy_f32_e32 v19, v24, v19 ; 0E262718 v_mul_legacy_f32_e32 v23, v24, v23 ; 0E2E2F18 v_mul_legacy_f32_e32 v20, v24, v20 ; 0E282918 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e64 v19, v25, v19, s[2:3] ; D2000013 000A2719 v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_cndmask_b32_e64 v23, v38, v23, s[0:1] ; D2000017 00022F26 v_exp_f32_e32 v20, v20 ; 7E284B14 v_cndmask_b32_e32 v20, v43, v20 ; 0028292B v_mac_f32_e32 v4, s15, v19 ; 3E08260F v_mul_f32_e32 v19, v7, v7 ; 10260F07 v_mac_f32_e32 v19, v8, v8 ; 3E261108 s_buffer_load_dword s2, s[16:19], 0x50 ; C2011150 v_mac_f32_e32 v19, v9, v9 ; 3E261309 s_buffer_load_dword s3, s[16:19], 0x51 ; C2019151 s_buffer_load_dword s0, s[20:23], 0x10 ; C2001510 s_buffer_load_dword s1, s[20:23], 0x11 ; C2009511 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mac_f32_e32 v5, s15, v23 ; 3E0A2E0F v_mac_f32_e32 v13, s15, v20 ; 3E1A280F s_buffer_load_dword s8, s[16:19], 0x52 ; C2041152 v_mul_f32_e32 v20, v19, v7 ; 10280F13 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s2, v20 ; 10282802 v_mul_f32_e32 v23, v19, v8 ; 102E1113 v_mac_f32_e32 v20, s3, v23 ; 3E282E03 s_buffer_load_dword s24, s[20:23], 0x12 ; C20C1512 v_mov_b32_e32 v23, s0 ; 7E2E0200 v_sub_f32_e32 v23, s1, v23 ; 082E2E01 v_mul_f32_e32 v19, v19, v9 ; 10261313 v_rcp_f32_e32 v23, v23 ; 7E2E5517 s_buffer_load_dword s15, s[16:19], 0x5d ; C207915D v_mac_f32_e32 v20, s8, v19 ; 3E282608 v_sub_f32_e64 v19, |v20|, s0 ; D2080113 00000114 v_mul_f32_e32 v19, v23, v19 ; 10262717 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_mac_f32_e32 v18, -2.0, v19 ; 3E2426F5 v_mul_f32_e32 v18, v18, v19 ; 10242712 v_mad_f32 v18, -v19, v18, 1.0 ; D2820012 23CA2513 v_mul_f32_e32 v18, v26, v18 ; 1024251A v_mov_b32_e32 v19, 0x80000000 ; 7E2602FF 80000000 v_mul_f32_e32 v23, v9, v19 ; 102E2709 v_mul_f32_e32 v3, 0x3e22f983, v3 ; 100606FF 3E22F983 v_fract_f32_e32 v19, v3 ; 7E264103 v_cndmask_b32_e64 v3, 1.0, 0, s[10:11] ; D2000003 002900F2 v_mac_f32_e32 v23, v3, v8 ; 3E2E1103 v_mul_f32_e32 v3, v3, v7 ; 10060F03 v_cndmask_b32_e64 v20, 0, 1.0, s[10:11] ; D2000014 0029E480 v_mad_f32 v3, v9, v20, -v3 ; D2820003 840E2909 s_buffer_load_dword s10, s[16:19], 0x4c ; C205114C s_buffer_load_dword s11, s[16:19], 0x4d ; C205914D s_buffer_load_dword s9, s[16:19], 0x4e ; C204914E v_mul_f32_e32 v20, v20, v8 ; 10281114 v_mad_f32 v24, 0, v7, -v20 ; D2820018 84520E80 v_mul_f32_e32 v20, v23, v23 ; 10282F17 v_mac_f32_e32 v20, v3, v3 ; 3E280703 v_mac_f32_e32 v20, v24, v24 ; 3E283118 v_rsq_clamp_f32_e32 v25, v20 ; 7E325914 v_cos_f32_e32 v20, v19 ; 7E286D13 v_sin_f32_e32 v19, v19 ; 7E266B13 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[0:1], 0, s24 ; D0020000 00003080 s_and_saveexec_b64 s[24:25], s[0:1] ; BE982400 s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v26, s10, v0 ; 0834000A v_sub_f32_e32 v27, s11, v1 ; 0836020B v_sub_f32_e32 v28, s9, v2 ; 08380409 v_mul_f32_e32 v26, v26, v26 ; 1034351A s_buffer_load_dword s0, s[20:23], 0x1 ; C2001501 s_buffer_load_dword s1, s[20:23], 0x2 ; C2009502 s_buffer_load_dword s26, s[20:23], 0x0 ; C20D1500 s_buffer_load_dword s27, s[20:23], 0x3 ; C20D9503 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_mac_f32_e32 v26, v28, v28 ; 3E34391C v_sqrt_f32_e32 v26, v26 ; 7E34671A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v27, s0 ; 7E360200 v_sub_f32_e32 v27, s1, v27 ; 08363601 v_mul_f32_e32 v27, v26, v27 ; 1036371A v_rcp_f32_e32 v27, v27 ; 7E36551B v_mul_f32_e32 v28, s0, v26 ; 10383400 v_mad_f32 v29, -v26, s0, v6 ; D282001D 2418011A v_mad_f32 v27, -v29, v27, 1.0 ; D282001B 23CA371D v_cmp_lt_f32_e32 vcc, v28, v6 ; 7C020D1C v_mul_f32_e32 v28, s1, v26 ; 10383401 v_mul_f32_e32 v29, s26, v26 ; 103A341A v_max_f32_e32 v29, v29, v6 ; 203A0D1D v_cmp_lt_f32_e64 s[0:1], v28, v6 ; D0020000 00020D1C v_mul_f32_e32 v6, v4, v27 ; 100C3704 v_mul_f32_e32 v28, v5, v27 ; 10383705 v_mul_f32_e32 v30, v13, v27 ; 103C370D v_mul_f32_e32 v27, v18, v27 ; 10363712 v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_cndmask_b32_e64 v28, v28, 0, s[0:1] ; D200001C 0001011C v_cndmask_b32_e64 v30, v30, 0, s[0:1] ; D200001E 0001011E v_cndmask_b32_e64 v27, v27, 0, s[0:1] ; D200001B 0001011B v_cndmask_b32_e32 v4, v4, v6 ; 00080D04 v_cndmask_b32_e32 v5, v5, v28 ; 000A3905 v_cndmask_b32_e32 v13, v13, v30 ; 001A3D0D v_cndmask_b32_e32 v18, v18, v27 ; 00243712 v_mul_f32_e32 v6, s27, v26 ; 100C341B v_min_f32_e32 v6, v6, v29 ; 1E0C3B06 s_or_b64 exec, exec, s[24:25] ; 88FE187E v_mad_f32 v21, -2.0, v21, 1.0 ; D2820015 03CA2AF5 v_mad_f32 v21, v21, v11, s14 ; D2820015 003A1715 v_mad_f32 v11, -2.0, v22, 1.0 ; D282000B 03CA2CF5 v_mad_f32 v12, v11, v12, s13 ; D282000C 0036190B v_mul_f32_e32 v22, v25, v23 ; 102C2F19 v_mul_f32_e32 v23, v25, v3 ; 102E0719 v_mul_f32_e32 v24, v25, v24 ; 10303119 v_mul_f32_e32 v3, v23, v9 ; 10061317 v_mad_f32 v25, v8, v24, -v3 ; D2820019 840E3108 v_mul_f32_e32 v3, v24, v7 ; 10060F18 v_mad_f32 v26, v9, v22, -v3 ; D282001A 840E2D09 v_mul_f32_e32 v3, v22, v8 ; 10061116 v_mad_f32 v27, v7, v23, -v3 ; D282001B 840E2F07 v_mul_f32_e32 v3, v25, v19 ; 10062719 v_mad_f32 v28, v20, v22, -v3 ; D282001C 840E2D14 v_mul_f32_e32 v3, v26, v19 ; 1006271A v_mad_f32 v29, v20, v23, -v3 ; D282001D 840E2F14 v_mul_f32_e32 v3, v27, v19 ; 1006271B v_mad_f32 v30, v20, v24, -v3 ; D282001E 840E3114 v_mad_f32 v31, -v28, v28, 1.0 ; D282001F 23CA391C v_mac_f32_e32 v31, v28, v28 ; 3E3E391C v_mad_f32 v32, -v29, v29, 1.0 ; D2820020 23CA3B1D v_mac_f32_e32 v32, v29, v29 ; 3E403B1D v_mad_f32 v33, -v30, v30, 1.0 ; D2820021 23CA3D1E v_mac_f32_e32 v33, v30, v30 ; 3E423D1E v_mul_f32_e32 v3, v31, v7 ; 10060F1F v_mac_f32_e32 v3, 0, v8 ; 3E061080 v_mac_f32_e32 v3, 0, v9 ; 3E061280 v_mul_f32_e32 v7, 0, v7 ; 100E0E80 v_mad_f32 v11, v32, v8, v7 ; D282000B 041E1120 v_mac_f32_e32 v11, 0, v9 ; 3E161280 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_mac_f32_e32 v7, 0, v8 ; 3E0E1080 v_mac_f32_e32 v7, v33, v9 ; 3E0E1321 v_mov_b32_e32 v8, 0x3b808083 ; 7E1002FF 3B808083 v_cmp_gt_f32_e32 vcc, v8, v18 ; 7C082508 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mul_f32_e32 v8, v25, v20 ; 10102919 v_mov_b32_e32 v34, 0 ; 7E440280 v_mul_f32_e32 v9, v26, v20 ; 1012291A v_mov_b32_e32 v35, v34 ; 7E460322 v_mul_f32_e32 v20, v27, v20 ; 1028291B v_mov_b32_e32 v36, v34 ; 7E480322 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[24:31], s[20:23] ; F0900700 00A61922 s_buffer_load_dword s0, s[16:19], 0x5b ; C200115B s_buffer_load_dword s24, s[16:19], 0x80 ; C20C1180 s_buffer_load_dword s20, s[16:19], 0x81 ; C20A1181 s_buffer_load_dword s1, s[16:19], 0x82 ; C2009182 s_buffer_load_dword s7, s[16:19], 0x83 ; C2039183 s_buffer_load_dword s27, s[16:19], 0x84 ; C20D9184 s_buffer_load_dword s23, s[16:19], 0x85 ; C20B9185 s_buffer_load_dword s4, s[16:19], 0x86 ; C2021186 s_buffer_load_dword s13, s[16:19], 0x87 ; C2069187 s_buffer_load_dword s25, s[16:19], 0x88 ; C20C9188 s_buffer_load_dword s21, s[16:19], 0x89 ; C20A9189 s_buffer_load_dword s5, s[16:19], 0x8a ; C202918A s_buffer_load_dword s14, s[16:19], 0x8b ; C207118B s_buffer_load_dword s26, s[16:19], 0x8c ; C20D118C s_buffer_load_dword s22, s[16:19], 0x8d ; C20B118D s_buffer_load_dword s6, s[16:19], 0x8e ; C203118E s_buffer_load_dword s16, s[16:19], 0x8f ; C208118F v_mac_f32_e32 v8, v22, v19 ; 3E102716 v_mac_f32_e32 v9, v23, v19 ; 3E122717 v_mac_f32_e32 v20, v24, v19 ; 3E282718 v_mul_f32_e32 v19, v31, v8 ; 1026111F v_mac_f32_e32 v19, 0, v9 ; 3E261280 v_mul_f32_e32 v8, 0, v8 ; 10101080 v_mad_f32 v22, v32, v9, v8 ; D2820016 04221320 v_mac_f32_e32 v8, 0, v9 ; 3E101280 v_mac_f32_e32 v8, v33, v20 ; 3E102921 v_mac_f32_e32 v19, 0, v20 ; 3E262880 v_mac_f32_e32 v22, 0, v20 ; 3E2C2880 v_mul_f32_e32 v9, v21, v19 ; 10122715 v_mul_f32_e32 v19, v21, v22 ; 10262D15 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v9, v28, v12 ; 3E12191C v_mac_f32_e32 v19, v29, v12 ; 3E26191D v_mac_f32_e32 v8, v30, v12 ; 3E10191E v_mad_f32 v0, v9, v6, v0 ; D2820000 04020D09 v_mad_f32 v1, v19, v6, v1 ; D2820001 04060D13 v_mac_f32_e32 v2, v8, v6 ; 3E040D08 v_mov_b32_e32 v6, 0x33d6bf95 ; 7E0C02FF 33D6BF95 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v25 ; 3E003306 v_mac_f32_e32 v1, v6, v26 ; 3E023506 v_mac_f32_e32 v2, v6, v27 ; 3E043706 v_subrev_f32_e32 v6, s10, v0 ; 0A0C000A v_mul_f32_e32 v9, v6, v3 ; 10120706 v_subrev_f32_e32 v6, s11, v1 ; 0A0C020B v_mac_f32_e32 v9, v6, v11 ; 3E121706 v_subrev_f32_e32 v6, s9, v2 ; 0A0C0409 v_mac_f32_e32 v9, v6, v7 ; 3E120F06 v_mov_b32_e32 v8, s15 ; 7E10020F v_mov_b32_e32 v6, s12 ; 7E0C020C v_cmp_ngt_f32_e32 vcc, 0, v9 ; 7C161280 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mov_b32_e32 v9, 0x80000000 ; 7E1202FF 80000000 v_xor_b32_e32 v3, v3, v9 ; 3A061303 v_xor_b32_e32 v11, v11, v9 ; 3A16130B v_xor_b32_e32 v7, v7, v9 ; 3A0E1307 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v8, s0, v8 ; 0A101000 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mul_f32_e32 v9, s27, v1 ; 1012021B v_mac_f32_e32 v9, s24, v0 ; 3E120018 v_mac_f32_e32 v9, s25, v2 ; 3E120419 v_add_f32_e32 v9, s26, v9 ; 0612121A v_mul_f32_e32 v12, s23, v1 ; 10180217 v_mac_f32_e32 v12, s20, v0 ; 3E180014 v_mac_f32_e32 v12, s21, v2 ; 3E180415 v_add_f32_e32 v12, s22, v12 ; 06181816 v_mov_b32_e32 v19, 0 ; 7E260280 exp 15, 32, 0, 0, 0, v19, v19, v19, v19 ; F800020F 13131313 exp 15, 33, 0, 0, 0, v14, v15, v16, v17 ; F800021F 11100F0E exp 15, 34, 0, 0, 0, v4, v5, v13, v18 ; F800022F 120D0504 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v4, s10, v0 ; 0A08000A v_subrev_f32_e32 v5, s11, v1 ; 0A0A020B v_mul_f32_e32 v4, s2, v4 ; 10080802 v_mac_f32_e32 v4, s3, v5 ; 3E080A03 v_subrev_f32_e32 v5, s9, v2 ; 0A0A0409 v_mac_f32_e32 v4, s8, v5 ; 3E080A08 v_mad_f32 v5, s2, v6, v0 ; D2820005 04020C02 v_mad_f32 v13, s3, v6, v1 ; D282000D 04060C03 v_mad_f32 v6, s8, v6, v2 ; D2820006 040A0C08 v_mul_f32_e32 v14, s13, v1 ; 101C020D v_mac_f32_e32 v14, s7, v0 ; 3E1C0007 v_mul_f32_e32 v15, s13, v13 ; 101E1A0D v_mac_f32_e32 v15, s7, v5 ; 3E1E0A07 v_mac_f32_e32 v14, s14, v2 ; 3E1C040E v_mac_f32_e32 v15, s14, v6 ; 3E1E0C0E v_add_f32_e32 v14, s16, v14 ; 061C1C10 v_add_f32_e32 v15, s16, v15 ; 061E1E10 v_mul_f32_e32 v13, s4, v13 ; 101A1A04 v_rcp_f32_e32 v15, v15 ; 7E1E550F v_mac_f32_e32 v13, s1, v5 ; 3E1A0A01 v_mac_f32_e32 v13, s5, v6 ; 3E1A0C05 v_add_f32_e32 v5, s6, v13 ; 060A1A06 v_mul_f32_e32 v5, v15, v5 ; 100A0B0F v_mul_f32_e32 v6, s4, v1 ; 100C0204 v_mac_f32_e32 v6, s1, v0 ; 3E0C0001 v_mac_f32_e32 v6, s5, v2 ; 3E0C0405 v_add_f32_e32 v6, s6, v6 ; 060C0C06 v_mul_f32_e32 v5, v14, v5 ; 100A0B0E v_min_f32_e32 v6, 0x3a83126f, v6 ; 1E0C0CFF 3A83126F v_max_f32_e32 v5, v6, v5 ; 200A0B06 v_subrev_f32_e32 v6, s0, v4 ; 0A0C0800 v_mul_f32_e32 v6, v8, v6 ; 100C0D08 v_xor_b32_e32 v8, 0x80000000, v12 ; 3A1018FF 80000000 v_mad_f32 v5, 2.0, v5, -v14 ; D2820005 843A0AF4 exp 15, 35, 0, 0, 0, v9, v12, v6, v14 ; F800023F 0E060C09 exp 15, 36, 0, 0, 0, v0, v1, v2, v4 ; F800024F 04020100 exp 15, 37, 0, 0, 0, v3, v11, v7, v19 ; F800025F 13070B03 exp 15, 38, 0, 0, 0, v10, v19, v19, v19 ; F800026F 1313130A exp 15, 12, 0, 0, 0, v9, v8, v5, v14 ; F80000CF 0E050809 exp 15, 13, 0, 1, 0, v19, v19, v19, v19 ; F80008DF 13131313 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 48 Code Size: 2288 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: MAD TEMP[6].xy, TEMP[3].xyyy, CONST[2].xyyy, TEMP[0].xyyy 17: MOV TEMP[7].xy, TEMP[6].xyyy 18: MOV TEMP[7].w, IMM[0].xxxx 19: TXL TEMP[8], TEMP[7], SAMP[0], 2D 20: ADD TEMP[2], TEMP[2], TEMP[8] 21: ADD TEMP[9].x, TEMP[3].yyyy, IMM[0].yyyy 22: MOV TEMP[3].y, TEMP[9].xxxx 23: ENDLOOP :0 24: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 25: ENDLOOP :0 26: MUL TEMP[1], TEMP[2], CONST[0].wwww 27: MOV TEMP[0].w, IMM[0].yyyy 28: MOV TEMP[0].xyz, IN[1].xyzx 29: MUL TEMP[0], TEMP[1], TEMP[0] 30: MUL TEMP[1], TEMP[0], IN[1].wwww 31: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 32: MOV OUT[0], TEMP[1] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %43 = fsub float -0.000000e+00, %24 %44 = fsub float -0.000000e+00, %25 br label %LOOP LOOP: ; preds = %IF43, %main_body %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %temp10.1, %IF43 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF43 ] %temp12.0 = phi float [ %43, %main_body ], [ %74, %IF43 ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %temp9.1, %IF43 ] %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %temp8.1, %IF43 ] %45 = fcmp olt float %24, %temp12.0 br i1 %45, label %IF, label %ENDIF IF: ; preds = %LOOP %46 = fmul float %temp8.0, %26 %47 = fmul float %temp9.0, %26 %48 = fmul float %temp10.0, %26 %49 = fmul float %temp11.0, %26 %50 = fmul float %46, %37 %51 = fmul float %47, %38 %52 = fmul float %48, %39 %53 = fmul float %50, %40 %54 = fmul float %51, %40 %55 = fmul float %52, %40 %56 = fmul float %49, %40 %57 = fmul float %33, %56 %58 = fadd float %57, %53 %59 = fmul float %34, %56 %60 = fadd float %59, %54 %61 = fmul float %35, %56 %62 = fadd float %61, %55 %63 = fmul float %36, %56 %64 = fadd float %63, %56 %65 = call i32 @llvm.SI.packf16(float %58, float %60) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %62, float %64) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void ENDIF: ; preds = %LOOP %69 = fmul float %temp12.0, %27 %70 = fadd float %69, %41 %71 = bitcast float %70 to i32 %72 = insertelement <4 x i32> undef, i32 %71, i32 0 br label %LOOP41 LOOP41: ; preds = %ENDIF42, %ENDIF %temp10.1 = phi float [ %temp10.0, %ENDIF ], [ %87, %ENDIF42 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %88, %ENDIF42 ] %temp13.0 = phi float [ %44, %ENDIF ], [ %89, %ENDIF42 ] %temp9.1 = phi float [ %temp9.0, %ENDIF ], [ %86, %ENDIF42 ] %temp8.1 = phi float [ %temp8.0, %ENDIF ], [ %85, %ENDIF42 ] %73 = fcmp olt float %25, %temp13.0 br i1 %73, label %IF43, label %ENDIF42 IF43: ; preds = %LOOP41 %74 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF42: ; preds = %LOOP41 %75 = fmul float %temp13.0, %28 %76 = fadd float %75, %42 %77 = bitcast float %76 to i32 %78 = insertelement <4 x i32> %72, i32 %77, i32 1 %79 = insertelement <4 x i32> %78, i32 0, i32 2 %80 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %79, <32 x i8> %30, <16 x i8> %32, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %temp8.1, %81 %86 = fadd float %temp9.1, %82 %87 = fadd float %temp10.1, %83 %88 = fadd float %temp11.1, %84 %89 = fadd float %temp13.0, 1.000000e+00 br label %LOOP41 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00 s_buffer_load_dword s2, s[12:15], 0x1 ; C2010D01 s_buffer_load_dword s0, s[12:15], 0x3 ; C2000D03 s_buffer_load_dword s3, s[12:15], 0x8 ; C2018D08 s_buffer_load_dword s8, s[12:15], 0x9 ; C2040D09 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v13, s1, v1 ; 3A1A0201 v_xor_b32_e32 v15, s2, v1 ; 3A1E0202 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 v_mov_b32_e32 v19, 0 ; 7E260280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v12, v19 ; 7E180313 v_mov_b32_e32 v11, v18 ; 7E160312 v_mov_b32_e32 v1, v17 ; 7E020311 v_mov_b32_e32 v14, v16 ; 7E1C0310 v_cmp_nlt_f32_e32 vcc, s1, v13 ; 7C1C1A01 s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_4 ; BF880000 v_mov_b32_e32 v21, v14 ; 7E2A030E v_mad_f32 v20, s3, v13, v10 ; D2820014 042A1A03 s_mov_b64 s[10:11], 0 ; BE8A0480 v_mov_b32_e32 v23, v1 ; 7E2E0301 v_mov_b32_e32 v22, v15 ; 7E2C030F v_mov_b32_e32 v24, v11 ; 7E30030B v_mov_b32_e32 v25, v12 ; 7E32030C v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v18, v24 ; 7E240318 v_mov_b32_e32 v17, v23 ; 7E220317 v_mov_b32_e32 v16, v21 ; 7E200315 v_cmp_nlt_f32_e32 vcc, s2, v22 ; 7C1C2C02 s_and_saveexec_b64 s[24:25], vcc ; BE98246A s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_6 ; BF880000 v_mad_f32 v21, s8, v22, v0 ; D2820015 04022C08 v_mov_b32_e32 v23, 0 ; 7E2E0280 v_mov_b32_e32 v24, v20 ; 7E300314 v_mov_b32_e32 v25, v21 ; 7E320315 v_mov_b32_e32 v26, v22 ; 7E340316 v_mov_b32_e32 v27, v23 ; 7E360317 v_mov_b32_e32 v26, v23 ; 7E340317 image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[12:19], s[20:23] ; F0900F00 00A31A18 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v25, v26, v19 ; 0632271A v_add_f32_e32 v24, v27, v18 ; 0630251B v_add_f32_e32 v21, v28, v16 ; 062A211C v_add_f32_e32 v23, v29, v17 ; 062E231D v_add_f32_e32 v22, 1.0, v22 ; 062C2CF2 s_or_b64 exec, exec, s[24:25] ; 88FE187E s_or_b64 s[10:11], s[24:25], s[10:11] ; 888A0A18 s_andn2_b64 exec, exec, s[10:11] ; 8AFE0A7E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_add_f32_e32 v13, 1.0, v13 ; 061A1AF2 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_or_b64 s[4:5], s[6:7], s[4:5] ; 88840406 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v0, s0, v12 ; 10001800 v_mul_f32_e32 v10, s0, v11 ; 10141600 v_mul_f32_e32 v11, s0, v14 ; 10161C00 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v6, v7, v10 ; 100C1507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v0, v1, v2 ; 3E000501 v_mac_f32_e32 v6, v1, v3 ; 3E0C0701 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mac_f32_e32 v1, v1, v5 ; 3E020B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 32 Code Size: 444 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 44, 48} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} IMM[2] UINT32 {4, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 6: MUL TEMP[3].x, CONST[1][2].wwww, TEMP[2].zzzz 7: MUL TEMP[4].x, CONST[1][3].xxxx, TEMP[2].xxxx 8: MUL TEMP[4].x, TEMP[4].xxxx, IN[3].wwww 9: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz 10: RSQ TEMP[5].x, TEMP[5].xxxx 11: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx 12: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].xxxx 13: MOV TEMP[6].w, IMM[1].yyyy 14: MOV TEMP[6].x, TEMP[5].xxxx 15: MOV TEMP[6].y, TEMP[5].yyyy 16: MOV TEMP[6].z, TEMP[5].zzzz 17: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 18: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 19: KILL_IF -TEMP[0].xxxx 20: MOV TEMP[0].w, IMM[1].yyyy 21: MOV TEMP[0].x, TEMP[1].xxxx 22: MOV TEMP[0].y, TEMP[1].yyyy 23: MOV TEMP[0].z, TEMP[1].zzzz 24: MOV TEMP[1].w, IMM[1].yyyy 25: MOV TEMP[1].x, TEMP[4].xxxx 26: MOV TEMP[1].y, TEMP[2].yyyy 27: MOV TEMP[1].z, TEMP[3].xxxx 28: MOV OUT[2], IN[1].wwww 29: MOV OUT[0], TEMP[0] 30: MOV OUT[3], TEMP[6] 31: MOV OUT[1], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %50 = bitcast float %40 to i32 %51 = bitcast float %41 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %31, <16 x i8> %33, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %25, %55 %60 = fmul float %26, %56 %61 = fmul float %27, %57 %62 = fmul float %59, %46 %63 = fmul float %60, %47 %64 = fmul float %61, %48 %65 = bitcast float %40 to i32 %66 = bitcast float %41 to i32 %67 = insertelement <2 x i32> undef, i32 %65, i32 0 %68 = insertelement <2 x i32> %67, i32 %66, i32 1 %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %36, <16 x i8> %39, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = fmul float %28, %72 %74 = fmul float %29, %70 %75 = fmul float %74, %49 %76 = fmul float %43, %43 %77 = fmul float %44, %44 %78 = fadd float %77, %76 %79 = fmul float %45, %45 %80 = fadd float %78, %79 %81 = call float @llvm.AMDGPU.rsq.clamped.f32(float %80) %82 = fmul float %43, %81 %83 = fmul float %44, %81 %84 = fmul float %45, %81 %85 = fmul float %82, 5.000000e-01 %86 = fadd float %85, 5.000000e-01 %87 = fmul float %83, 5.000000e-01 %88 = fadd float %87, 5.000000e-01 %89 = fmul float %84, 5.000000e-01 %90 = fadd float %89, 5.000000e-01 %91 = fcmp olt float %58, %24 %92 = select i1 %91, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %92) %93 = call i32 @llvm.SI.packf16(float %62, float %63) %94 = bitcast i32 %93 to float %95 = call i32 @llvm.SI.packf16(float %64, float 0.000000e+00) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %75, float %71) %98 = bitcast i32 %97 to float %99 = call i32 @llvm.SI.packf16(float %73, float 0.000000e+00) %100 = bitcast i32 %99 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %94, float %96, float %94, float %96) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %98, float %100, float %98, float %100) %101 = call i32 @llvm.SI.packf16(float %86, float %88) %102 = bitcast i32 %101 to float %103 = call i32 @llvm.SI.packf16(float %90, float 0.000000e+00) %104 = bitcast i32 %103 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %42, float %42, float %42, float %42) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %102, float %104, float %102, float %104) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440B02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v11, s4, v11 ; 10161604 v_mul_f32_e32 v12, s5, v12 ; 10181805 v_mul_f32_e32 v13, s6, v13 ; 101A1A06 v_cmp_gt_f32_e32 vcc, s7, v14 ; 7C081C07 v_mul_f32_e32 v8, v8, v11 ; 10101708 v_mul_f32_e32 v9, v9, v12 ; 10121909 v_mul_f32_e32 v11, v5, v5 ; 10160B05 v_mac_f32_e32 v11, v6, v6 ; 3E160D06 v_mac_f32_e32 v11, v7, v7 ; 3E160F07 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v10, v10, v13 ; 10141B0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v1, v11, v5 ; 10020B0B v_mul_f32_e32 v5, v11, v6 ; 100A0D0B v_mul_f32_e32 v6, v11, v7 ; 100C0F0B v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cvt_pkrtz_f16_f32_e32 v2, v8, v9 ; 5E041308 v_cvt_pkrtz_f16_f32_e64 v7, v10, 0 ; D25E0007 0001010A exp 15, 0, 1, 0, 0, v2, v7, v2, v7 ; F800040F 07020702 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v2, v3, 0 ; D25E0002 00010103 exp 15, 1, 1, 0, 0, v0, v2, v0, v2 ; F800041F 02000200 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v1, v5 ; 5E000B01 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 344 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..10], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 0.0000} IMM[3] UINT32 {3, 320, 304, 12} IMM[4] UINT32 {28, 44, 60, 0} IMM[5] UINT32 {24, 32, 16, 48} IMM[6] UINT32 {4, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: MOV TEMP[5].x, TEMP[0].xxxx 89: MOV TEMP[5].y, TEMP[2].xxxx 90: MOV TEMP[5].z, TEMP[3].xxxx 91: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 92: DP3 TEMP[4].x, CONST[4][20].xyzz, TEMP[4].xyzz 93: MOV TEMP[5].w, TEMP[4].xxxx 94: MOV TEMP[4].w, IMM[0].xxxx 95: MOV TEMP[4].x, TEMP[7].xxxx 96: MOV TEMP[4].y, TEMP[7].yyyy 97: MOV TEMP[4].z, TEMP[7].zzzz 98: MOV TEMP[7].w, IMM[0].wwww 99: MOV TEMP[7].x, TEMP[0].xxxx 100: MOV TEMP[7].y, TEMP[2].xxxx 101: MOV TEMP[7].z, TEMP[3].xxxx 102: MOV TEMP[0].x, CONST[4][0].wwww 103: MOV TEMP[0].y, CONST[4][1].wwww 104: MOV TEMP[0].z, CONST[4][2].wwww 105: MOV TEMP[0].w, CONST[4][3].wwww 106: DP4 TEMP[0].x, TEMP[7], TEMP[0] 107: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 108: MOV TEMP[3].xy, IN[3].xyyy 109: MOV TEMP[3].w, IMM[0].xxxx 110: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 111: MOV TEMP[8].x, CONST[4][0].xxxx 112: MOV TEMP[8].y, CONST[4][1].xxxx 113: MOV TEMP[8].z, CONST[4][2].xxxx 114: MOV TEMP[8].w, CONST[4][3].xxxx 115: DP4 TEMP[8].x, TEMP[7], TEMP[8] 116: MOV TEMP[9].x, CONST[4][0].yyyy 117: MOV TEMP[9].y, CONST[4][1].yyyy 118: MOV TEMP[9].z, CONST[4][2].yyyy 119: MOV TEMP[9].w, CONST[4][3].yyyy 120: DP4 TEMP[9].x, TEMP[7], TEMP[9] 121: MOV TEMP[8].y, -TEMP[9].xxxx 122: MOV TEMP[9].x, CONST[4][0].zzzz 123: MOV TEMP[9].y, CONST[4][1].zzzz 124: MOV TEMP[9].z, CONST[4][2].zzzz 125: MOV TEMP[9].w, CONST[4][3].zzzz 126: DP4 TEMP[7].x, TEMP[7], TEMP[9] 127: MAD TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz, -TEMP[0].xxxx 128: MOV TEMP[8].z, TEMP[7].xxxx 129: MOV TEMP[8].w, TEMP[0].xxxx 130: MOV OUT[1], TEMP[1] 131: MOV OUT[2].xy, TEMP[2].xyxx 132: MOV OUT[4], TEMP[4] 133: MOV OUT[6], IMM[0].xxxx 134: MOV OUT[7], TEMP[6] 135: MOV OUT[5], TEMP[3] 136: MOV OUT[0], TEMP[8] 137: MOV OUT[3], TEMP[5] 138: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %10, %6 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> undef, i32 %72, i32 0 %75 = insertelement <4 x i32> %74, i32 %73, i32 1 %76 = insertelement <4 x i32> %75, i32 0, i32 2 %77 = bitcast <8 x i32> %42 to <32 x i8> %78 = bitcast <4 x i32> %44 to <16 x i8> %79 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = bitcast float %70 to i32 %85 = bitcast float %71 to i32 %86 = insertelement <4 x i32> , i32 %84, i32 1 %87 = insertelement <4 x i32> %86, i32 %85, i32 2 %88 = insertelement <4 x i32> %87, i32 0, i32 3 %89 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %88, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = extractelement <4 x float> %89, i32 3 %94 = bitcast float %70 to i32 %95 = bitcast float %71 to i32 %96 = insertelement <4 x i32> , i32 %94, i32 1 %97 = insertelement <4 x i32> %96, i32 %95, i32 2 %98 = insertelement <4 x i32> %97, i32 0, i32 3 %99 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %98, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %56, 2.550000e+02 %105 = fadd float %104, -1.280000e+02 %106 = fmul float %57, 2.550000e+02 %107 = fadd float %106, -1.280000e+02 %108 = fmul float %58, 2.550000e+02 %109 = fadd float %108, -1.280000e+02 %110 = fmul float %59, 2.550000e+02 %111 = fadd float %110, -1.280000e+02 %112 = fcmp olt float %105, 0.000000e+00 %113 = fcmp olt float %107, 0.000000e+00 %114 = fcmp olt float %109, 0.000000e+00 %115 = fcmp olt float %111, 0.000000e+00 %116 = select i1 %112, float 1.000000e+00, float 0.000000e+00 %117 = select i1 %114, float 1.000000e+00, float 0.000000e+00 %118 = select i1 %115, float 1.000000e+00, float 0.000000e+00 %119 = call float @fabs(float %105) %120 = call float @fabs(float %107) %121 = call float @fabs(float %109) %122 = call float @fabs(float %111) %123 = fsub float %119, %116 %124 = select i1 %113, float -1.000000e+00, float -0.000000e+00 %125 = fadd float %120, %124 %126 = fsub float %121, %117 %127 = fsub float %122, %118 %128 = fadd float %123, -6.400000e+01 %129 = fadd float %125, -6.400000e+01 %130 = fadd float %126, -6.400000e+01 %131 = fadd float %127, -6.400000e+01 %132 = fcmp olt float %128, 0.000000e+00 %133 = fcmp olt float %129, 0.000000e+00 %134 = fcmp olt float %130, 0.000000e+00 %135 = fcmp olt float %131, 0.000000e+00 %136 = select i1 %132, float 1.000000e+00, float 0.000000e+00 %137 = select i1 %133, float 1.000000e+00, float 0.000000e+00 %138 = select i1 %134, float 1.000000e+00, float 0.000000e+00 %139 = select i1 %135, float 1.000000e+00, float 0.000000e+00 %140 = call float @fabs(float %128) %141 = call float @fabs(float %129) %142 = call float @fabs(float %130) %143 = call float @fabs(float %131) %144 = fsub float %140, %136 %145 = fsub float %141, %137 %146 = fsub float %142, %138 %147 = fsub float %143, %139 %148 = fmul float %144, 0x3F90410420000000 %149 = fmul float %145, 0x3F90410420000000 %150 = fmul float %146, 0x3F90410420000000 %151 = fmul float %147, 0x3F90410420000000 %152 = fmul float %136, 2.000000e+00 %153 = fmul float %137, 2.000000e+00 %154 = fmul float %138, 2.000000e+00 %155 = fmul float %139, 2.000000e+00 %156 = fsub float 1.000000e+00, %152 %157 = fsub float 1.000000e+00, %153 %158 = fsub float 1.000000e+00, %154 %159 = fsub float 1.000000e+00, %155 %160 = fmul float %116, 2.000000e+00 %161 = fmul float %117, 2.000000e+00 %162 = fmul float %118, 2.000000e+00 %163 = fsub float 1.000000e+00, %160 %164 = fsub float 1.000000e+00, %161 %165 = fsub float 1.000000e+00, %162 %166 = fsub float 1.000000e+00, %148 %167 = fsub float %166, %149 %168 = fmul float %148, %148 %169 = fmul float %149, %149 %170 = fadd float %169, %168 %171 = fmul float %167, %167 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = fmul float %148, %173 %175 = fmul float %149, %173 %176 = fmul float %167, %173 %177 = fmul float %174, %156 %178 = fmul float %175, %157 %179 = fsub float 1.000000e+00, %150 %180 = fsub float %179, %151 %181 = fmul float %150, %150 %182 = fmul float %151, %151 %183 = fadd float %182, %181 %184 = fmul float %180, %180 %185 = fadd float %183, %184 %186 = call float @llvm.AMDGPU.rsq.clamped.f32(float %185) %187 = fmul float %150, %186 %188 = fmul float %151, %186 %189 = fmul float %180, %186 %190 = fmul float %187, %158 %191 = fmul float %188, %159 %192 = fmul float %176, %163 %193 = fmul float %177, %80 %194 = fmul float %178, %81 %195 = fadd float %193, %194 %196 = fmul float %192, %82 %197 = fadd float %195, %196 %198 = fmul float %83, 0.000000e+00 %199 = fadd float %197, %198 %200 = fmul float %177, %90 %201 = fmul float %178, %91 %202 = fadd float %200, %201 %203 = fmul float %192, %92 %204 = fadd float %202, %203 %205 = fmul float %93, 0.000000e+00 %206 = fadd float %204, %205 %207 = fmul float %177, %100 %208 = fmul float %178, %101 %209 = fadd float %207, %208 %210 = fmul float %192, %102 %211 = fadd float %209, %210 %212 = fmul float %103, 0.000000e+00 %213 = fadd float %211, %212 %214 = fmul float %199, %199 %215 = fmul float %206, %206 %216 = fadd float %215, %214 %217 = fmul float %213, %213 %218 = fadd float %216, %217 %219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218) %220 = fmul float %199, %219 %221 = fmul float %206, %219 %222 = fmul float %213, %219 %223 = fmul float %189, %164 %224 = fmul float %190, %80 %225 = fmul float %191, %81 %226 = fadd float %224, %225 %227 = fmul float %223, %82 %228 = fadd float %226, %227 %229 = fmul float %83, 0.000000e+00 %230 = fadd float %228, %229 %231 = fmul float %190, %90 %232 = fmul float %191, %91 %233 = fadd float %231, %232 %234 = fmul float %223, %92 %235 = fadd float %233, %234 %236 = fmul float %93, 0.000000e+00 %237 = fadd float %235, %236 %238 = fmul float %190, %100 %239 = fmul float %191, %101 %240 = fadd float %238, %239 %241 = fmul float %223, %102 %242 = fadd float %240, %241 %243 = fmul float %103, 0.000000e+00 %244 = fadd float %242, %243 %245 = fmul float %230, %220 %246 = fmul float %237, %221 %247 = fadd float %246, %245 %248 = fmul float %244, %222 %249 = fadd float %247, %248 %250 = fmul float %249, %220 %251 = fmul float %249, %221 %252 = fmul float %249, %222 %253 = fsub float %230, %250 %254 = fsub float %237, %251 %255 = fsub float %244, %252 %256 = fmul float %253, %253 %257 = fmul float %254, %254 %258 = fadd float %257, %256 %259 = fmul float %255, %255 %260 = fadd float %258, %259 %261 = call float @llvm.AMDGPU.rsq.clamped.f32(float %260) %262 = fmul float %253, %261 %263 = fmul float %254, %261 %264 = fmul float %255, %261 %265 = fmul float %49, %80 %266 = fmul float %50, %81 %267 = fadd float %265, %266 %268 = fmul float %51, %82 %269 = fadd float %267, %268 %270 = fadd float %269, %83 %271 = fmul float %49, %90 %272 = fmul float %50, %91 %273 = fadd float %271, %272 %274 = fmul float %51, %92 %275 = fadd float %273, %274 %276 = fadd float %275, %93 %277 = fmul float %49, %100 %278 = fmul float %50, %101 %279 = fadd float %277, %278 %280 = fmul float %51, %102 %281 = fadd float %279, %280 %282 = fadd float %281, %103 %283 = fsub float %270, %35 %284 = fsub float %276, %36 %285 = fsub float %282, %37 %286 = fmul float %38, %283 %287 = fmul float %39, %284 %288 = fadd float %287, %286 %289 = fmul float %40, %285 %290 = fadd float %288, %289 %291 = fmul float %270, %22 %292 = fmul float %276, %26 %293 = fadd float %291, %292 %294 = fmul float %282, %30 %295 = fadd float %293, %294 %296 = fadd float %295, %34 %297 = fmul float %64, %13 %298 = fadd float %297, %15 %299 = fmul float %65, %14 %300 = fadd float %299, %16 %301 = bitcast float %70 to i32 %302 = bitcast float %71 to i32 %303 = insertelement <4 x i32> , i32 %301, i32 1 %304 = insertelement <4 x i32> %303, i32 %302, i32 2 %305 = insertelement <4 x i32> %304, i32 0, i32 3 %306 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %305, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %307 = extractelement <4 x float> %306, i32 0 %308 = extractelement <4 x float> %306, i32 1 %309 = extractelement <4 x float> %306, i32 2 %310 = extractelement <4 x float> %306, i32 3 %311 = fmul float %270, %19 %312 = fmul float %276, %23 %313 = fadd float %311, %312 %314 = fmul float %282, %27 %315 = fadd float %313, %314 %316 = fadd float %315, %31 %317 = fmul float %270, %20 %318 = fmul float %276, %24 %319 = fadd float %317, %318 %320 = fmul float %282, %28 %321 = fadd float %319, %320 %322 = fadd float %321, %32 %323 = fsub float -0.000000e+00, %322 %324 = fmul float %270, %21 %325 = fmul float %276, %25 %326 = fadd float %324, %325 %327 = fmul float %282, %29 %328 = fadd float %326, %327 %329 = fadd float %328, %33 %330 = fmul float %329, 2.000000e+00 %331 = fsub float %330, %296 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %298, float %300, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %270, float %276, float %282, float %290) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %220, float %221, float %222, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %307, float %308, float %309, float %310) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %262, float %263, float %264, float %165) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %316, float %323, float %331, float %296) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[36:39], s[2:3], 0x4 ; C0920304 s_load_dwordx4 s[40:43], s[2:3], 0x10 ; C0940310 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[20:23], v3, s[8:11], 0 idxen ; E00C2000 80021403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v22, 0 ; 7E2C0280 s_buffer_load_dword s0, s[40:43], 0xf ; C200290F s_buffer_load_dword s19, s[40:43], 0x4c ; C209A94C s_buffer_load_dword s18, s[40:43], 0x4d ; C209294D s_buffer_load_dword s12, s[40:43], 0x4e ; C206294E s_buffer_load_dword s20, s[40:43], 0x50 ; C20A2950 s_buffer_load_dword s23, s[36:39], 0x6 ; C20BA506 s_buffer_load_dword s22, s[36:39], 0x7 ; C20B2507 s_buffer_load_dword s44, s[36:39], 0x8 ; C2162508 s_buffer_load_dword s36, s[36:39], 0x9 ; C2122509 s_buffer_load_dword s21, s[40:43], 0x51 ; C20AA951 s_buffer_load_dword s17, s[40:43], 0x52 ; C208A952 s_buffer_load_dword s6, s[40:43], 0x5 ; C2032905 s_buffer_load_dword s7, s[40:43], 0x6 ; C203A906 s_buffer_load_dword s11, s[40:43], 0x7 ; C205A907 s_buffer_load_dword s3, s[40:43], 0x8 ; C201A908 s_buffer_load_dword s2, s[40:43], 0x9 ; C2012909 s_buffer_load_dword s8, s[40:43], 0x0 ; C2042900 s_buffer_load_dword s9, s[40:43], 0x1 ; C204A901 s_buffer_load_dword s10, s[40:43], 0x2 ; C2052902 s_buffer_load_dword s13, s[40:43], 0x3 ; C206A903 s_buffer_load_dword s15, s[40:43], 0x4 ; C207A904 s_buffer_load_dword s14, s[40:43], 0xa ; C207290A s_buffer_load_dword s16, s[40:43], 0xb ; C208290B s_buffer_load_dword s5, s[40:43], 0xc ; C202A90C s_buffer_load_dword s4, s[40:43], 0xd ; C202290D s_buffer_load_dword s1, s[40:43], 0xe ; C200A90E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s44 ; 7E00022C v_mov_b32_e32 v3, s36 ; 7E060224 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[28:35], s[24:27] ; F0900F00 00C71714 v_mov_b32_e32 v19, 0x10001 ; 7E2602FF 00010001 image_sample_l_o v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[28:35], s[24:27] ; F0D00F00 00C71B13 v_mov_b32_e32 v19, 0x20002 ; 7E2602FF 00020002 image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[28:35], s[24:27] ; F0D00F00 00C71F13 v_mov_b32_e32 v19, 0x30003 ; 7E2602FF 00030003 image_sample_l_o v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[28:35], s[24:27] ; F0D00F00 00C71013 exp 15, 32, 0, 0, 0, v22, v22, v22, v22 ; F800020F 16161616 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v20, v24, v8 ; 10281118 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mad_f32 v11, v2, v11, v1 ; D282000B 04061702 v_mad_f32 v12, v2, v12, v1 ; D282000C 04061902 v_mac_f32_e32 v1, v2, v13 ; 3E021B02 v_mac_f32_e32 v0, s23, v14 ; 3E001C17 v_mac_f32_e32 v3, s22, v15 ; 3E061E16 v_mac_f32_e32 v20, v23, v7 ; 3E280F17 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v2, v28, v8 ; 1004111C v_mac_f32_e32 v2, v27, v7 ; 3E040F1B s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v32, v8 ; 10101120 v_mac_f32_e32 v8, v31, v7 ; 3E100F1F v_mac_f32_e32 v20, v25, v9 ; 3E281319 v_mac_f32_e32 v2, v29, v9 ; 3E04131D v_mac_f32_e32 v8, v33, v9 ; 3E101321 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v4, |v11|, v4 ; D2060104 0002090B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_sub_f32_e64 v9, |v10|, v7 ; D2080109 00020F0A v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v11, |v12|, v10 ; D208010B 0002150C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480 v_sub_f32_e64 v1, |v1|, v12 ; D2080101 00021901 v_add_f32_e32 v9, v5, v9 ; 06121305 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v11, v5, v11 ; 06161705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v9, |v9|, v5 ; D2080109 00020B09 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_sub_f32_e64 v4, |v4|, v13 ; D2080104 00021B04 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v14, 0, 1.0, vcc ; D200000E 01A9E480 v_sub_f32_e64 v11, |v11|, v14 ; D208010B 00021D0B v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 v_sub_f32_e64 v1, |v1|, v15 ; D2080101 00021F01 v_mul_f32_e32 v21, v6, v9 ; 102A1306 v_mad_f32 v9, -v9, v6, 1.0 ; D2820009 23CA0D09 v_mad_f32 v9, -v4, v6, v9 ; D2820009 24260D04 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v35, v6, v11 ; 10461706 v_mad_f32 v11, -v11, v6, 1.0 ; D282000B 23CA0D0B v_mad_f32 v11, -v1, v6, v11 ; D282000B 242E0D01 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v21, v21 ; 100C2B15 v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v9, v9 ; 3E0C1309 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v36, v35, v35 ; 10484723 v_mac_f32_e32 v36, v1, v1 ; 3E480301 v_mac_f32_e32 v36, v11, v11 ; 3E48170B v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v21, v6, v21 ; 102A2B06 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v9 ; 100C1306 v_mul_f32_e32 v9, v36, v35 ; 10124724 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v11, v36, v11 ; 10161724 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v21 ; 100A2B05 v_mad_f32 v13, -2.0, v13, 1.0 ; D282000D 03CA1AF5 v_mul_f32_e32 v4, v13, v4 ; 1008090D v_mad_f32 v13, -2.0, v14, 1.0 ; D282000D 03CA1CF5 v_mul_f32_e32 v9, v13, v9 ; 1012130D v_mad_f32 v13, -2.0, v15, 1.0 ; D282000D 03CA1EF5 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mad_f32 v7, -2.0, v7, 1.0 ; D2820007 03CA0EF5 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v7, -2.0, v10, 1.0 ; D2820007 03CA14F5 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_add_f32_e32 v10, v26, v20 ; 0614291A v_mul_f32_e32 v11, v24, v4 ; 10160918 v_mac_f32_e32 v11, v23, v5 ; 3E160B17 v_mul_f32_e32 v13, v24, v1 ; 101A0318 v_mac_f32_e32 v13, v23, v9 ; 3E1A1317 v_mac_f32_e32 v11, v25, v6 ; 3E160D19 v_mac_f32_e32 v13, v25, v7 ; 3E1A0F19 v_mac_f32_e32 v11, 0, v26 ; 3E163480 v_mac_f32_e32 v13, 0, v26 ; 3E1A3480 v_add_f32_e32 v2, v30, v2 ; 0604051E v_mul_f32_e32 v14, v28, v4 ; 101C091C v_mac_f32_e32 v14, v27, v5 ; 3E1C0B1B v_mul_f32_e32 v15, v28, v1 ; 101E031C v_mac_f32_e32 v15, v27, v9 ; 3E1E131B v_mac_f32_e32 v14, v29, v6 ; 3E1C0D1D v_mac_f32_e32 v15, v29, v7 ; 3E1E0F1D v_mac_f32_e32 v14, 0, v30 ; 3E1C3C80 v_mac_f32_e32 v15, 0, v30 ; 3E1E3C80 v_mul_f32_e32 v4, v32, v4 ; 10080920 v_mac_f32_e32 v4, v31, v5 ; 3E080B1F v_mul_f32_e32 v1, v32, v1 ; 10020320 v_mac_f32_e32 v1, v31, v9 ; 3E02131F v_mac_f32_e32 v4, v33, v6 ; 3E080D21 v_mac_f32_e32 v1, v33, v7 ; 3E020F21 v_add_f32_e32 v5, v34, v8 ; 060A1122 v_mac_f32_e32 v4, 0, v34 ; 3E084480 v_mac_f32_e32 v1, 0, v34 ; 3E024480 exp 15, 33, 0, 0, 0, v0, v3, v22, v22 ; F800021F 16160300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_subrev_f32_e32 v0, s19, v10 ; 0A001413 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_subrev_f32_e32 v3, s18, v2 ; 0A060412 v_mac_f32_e32 v0, s21, v3 ; 3E000615 v_mul_f32_e32 v3, v11, v11 ; 1006170B v_mac_f32_e32 v3, v14, v14 ; 3E061D0E v_mac_f32_e32 v3, v4, v4 ; 3E060904 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_subrev_f32_e32 v6, s12, v5 ; 0A0C0A0C v_mac_f32_e32 v0, s17, v6 ; 3E000C11 exp 15, 34, 0, 0, 0, v10, v2, v5, v0 ; F800022F 0005020A s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v11 ; 10001703 v_mul_f32_e32 v6, v3, v14 ; 100C1D03 v_mul_f32_e32 v3, v3, v4 ; 10060903 exp 15, 35, 0, 0, 0, v0, v6, v3, v22 ; F800023F 16030600 exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 v_mul_f32_e32 v4, s11, v2 ; 1008040B v_mul_f32_e32 v7, s15, v2 ; 100E040F v_mul_f32_e32 v8, s6, v2 ; 10100406 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v4, s13, v10 ; 3E08140D v_mac_f32_e32 v7, s8, v10 ; 3E0E1408 v_mac_f32_e32 v8, s9, v10 ; 3E101409 v_mac_f32_e32 v2, s10, v10 ; 3E04140A v_mac_f32_e32 v4, s16, v5 ; 3E080A10 v_mac_f32_e32 v7, s3, v5 ; 3E0E0A03 v_mac_f32_e32 v8, s2, v5 ; 3E100A02 v_mac_f32_e32 v2, s14, v5 ; 3E040A0E v_mul_f32_e32 v5, v0, v13 ; 100A1B00 v_mac_f32_e32 v5, v6, v15 ; 3E0A1F06 v_mac_f32_e32 v5, v3, v1 ; 3E0A0303 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v0, -v5, v0, v13 ; D2820000 24360105 v_mad_f32 v6, -v5, v6, v15 ; D2820006 243E0D05 v_mad_f32 v1, -v5, v3, v1 ; D2820001 24060705 v_add_f32_e32 v3, s0, v4 ; 06060800 v_add_f32_e32 v4, s5, v7 ; 06080E05 v_add_f32_e32 v5, s4, v8 ; 060A1004 v_mul_f32_e32 v7, v0, v0 ; 100E0100 v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mad_f32 v8, -2.0, v12, 1.0 ; D2820008 03CA18F5 v_add_f32_e32 v2, s1, v2 ; 06040401 exp 15, 37, 0, 0, 0, v22, v22, v22, v22 ; F800025F 16161616 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mul_f32_e32 v1, v7, v1 ; 10020307 exp 15, 38, 0, 0, 0, v0, v6, v1, v8 ; F800026F 08010600 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v5 ; 3A000AFF 80000000 v_mad_f32 v1, 2.0, v2, -v3 ; D2820001 840E04F4 exp 15, 12, 0, 0, 0, v4, v0, v1, v3 ; F80000CF 03010004 exp 15, 13, 0, 1, 0, v22, v22, v22, v22 ; F80008DF 16161616 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 40 Code Size: 1140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 6: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[3].xyzz 7: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 8: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 9: MOV TEMP[4].xy, IN[0].xyyy 10: TEX TEMP[4].yw, TEMP[4], SAMP[1], 2D 11: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 12: MOV TEMP[5].x, TEMP[4].xxxx 13: MOV TEMP[5].y, -TEMP[4].yyyy 14: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 15: MOV TEMP[6].x, TEMP[5].xxxx 16: MOV TEMP[6].y, TEMP[5].yyyy 17: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 18: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 19: MOV_SAT TEMP[4].x, TEMP[4].xxxx 20: SQRT TEMP[4].x, TEMP[4].xxxx 21: MOV TEMP[6].z, TEMP[4].xxxx 22: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 23: RSQ TEMP[4].x, TEMP[4].xxxx 24: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 25: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 26: RSQ TEMP[5].x, TEMP[5].xxxx 27: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 28: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 29: RSQ TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 31: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 32: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 33: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 34: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 35: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 36: RSQ TEMP[3].x, TEMP[3].xxxx 37: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 38: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 39: MOV TEMP[3].w, IMM[2].xxxx 40: MOV TEMP[3].x, TEMP[0].xxxx 41: MOV TEMP[3].y, TEMP[0].yyyy 42: MOV TEMP[3].z, TEMP[0].zzzz 43: FSLT TEMP[0].x, TEMP[1].wwww, CONST[1][0].yyyy 44: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 45: KILL_IF -TEMP[0].xxxx 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[2].xxxx 48: MOV TEMP[0].y, TEMP[2].yyyy 49: MOV TEMP[0].z, TEMP[2].zzzz 50: MOV OUT[2], IN[1].wwww 51: MOV OUT[0], TEMP[0] 52: MOV OUT[3], TEMP[3] 53: MOV OUT[1], IMM[2].xxxx 54: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %52 = fmul float %42, %42 %53 = fmul float %43, %43 %54 = fadd float %53, %52 %55 = fmul float %44, %44 %56 = fadd float %54, %55 %57 = call float @llvm.AMDGPU.rsq.clamped.f32(float %56) %58 = fmul float %42, %57 %59 = fmul float %43, %57 %60 = fmul float %44, %57 %61 = bitcast float %39 to i32 %62 = bitcast float %40 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %30, <16 x i8> %32, i32 2) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = extractelement <4 x float> %65, i32 3 %70 = fmul float %26, %66 %71 = fmul float %27, %67 %72 = fmul float %28, %68 %73 = fmul float %70, %45 %74 = fmul float %71, %46 %75 = fmul float %72, %47 %76 = fmul float %60, %49 %77 = fmul float %58, %50 %78 = fmul float %59, %48 %79 = fmul float %59, %50 %80 = fsub float %79, %76 %81 = fmul float %60, %48 %82 = fsub float %81, %77 %83 = fmul float %58, %49 %84 = fsub float %83, %78 %85 = bitcast float %39 to i32 %86 = bitcast float %40 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %35, <16 x i8> %38, i32 2) %90 = extractelement <4 x float> %89, i32 1 %91 = extractelement <4 x float> %89, i32 3 %92 = fmul float %91, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %90, 2.000000e+00 %95 = fadd float %94, -1.000000e+00 %96 = fmul float %93, %24 %97 = fmul float %95, %24 %98 = fmul float %93, %93 %99 = fmul float %95, %95 %100 = fadd float %98, %99 %101 = fsub float 1.000000e+00, %100 %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00) %103 = call float @llvm.sqrt.f32(float %102) %104 = fmul float %96, %96 %105 = fmul float %97, %97 %106 = fadd float %105, %104 %107 = fmul float %103, %103 %108 = fadd float %106, %107 %109 = call float @llvm.AMDGPU.rsq.clamped.f32(float %108) %110 = fmul float %96, %109 %111 = fmul float %97, %109 %112 = fsub float -0.000000e+00, %111 %113 = fmul float %103, %109 %114 = fmul float %48, %48 %115 = fmul float %49, %49 %116 = fadd float %115, %114 %117 = fmul float %50, %50 %118 = fadd float %116, %117 %119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118) %120 = fmul float %48, %119 %121 = fmul float %49, %119 %122 = fmul float %50, %119 %123 = fmul float %80, %80 %124 = fmul float %82, %82 %125 = fadd float %124, %123 %126 = fmul float %84, %84 %127 = fadd float %125, %126 %128 = call float @llvm.AMDGPU.rsq.clamped.f32(float %127) %129 = fmul float %80, %128 %130 = fmul float %82, %128 %131 = fmul float %84, %128 %132 = fmul float %51, %129 %133 = fmul float %51, %130 %134 = fmul float %51, %131 %135 = fmul float %132, %112 %136 = fmul float %133, %112 %137 = fmul float %134, %112 %138 = fmul float %110, %120 %139 = fadd float %138, %135 %140 = fmul float %110, %121 %141 = fadd float %140, %136 %142 = fmul float %110, %122 %143 = fadd float %142, %137 %144 = fmul float %58, %113 %145 = fadd float %144, %139 %146 = fmul float %59, %113 %147 = fadd float %146, %141 %148 = fmul float %60, %113 %149 = fadd float %148, %143 %150 = fmul float %145, %145 %151 = fmul float %147, %147 %152 = fadd float %151, %150 %153 = fmul float %149, %149 %154 = fadd float %152, %153 %155 = call float @llvm.AMDGPU.rsq.clamped.f32(float %154) %156 = fmul float %145, %155 %157 = fmul float %147, %155 %158 = fmul float %149, %155 %159 = fmul float %156, 5.000000e-01 %160 = fadd float %159, 5.000000e-01 %161 = fmul float %157, 5.000000e-01 %162 = fadd float %161, 5.000000e-01 %163 = fmul float %158, 5.000000e-01 %164 = fadd float %163, 5.000000e-01 %165 = fcmp olt float %69, %25 %166 = select i1 %165, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %166) %167 = call i32 @llvm.SI.packf16(float %73, float %74) %168 = bitcast i32 %167 to float %169 = call i32 @llvm.SI.packf16(float %75, float 0.000000e+00) %170 = bitcast i32 %169 to float %171 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %172 = bitcast i32 %171 to float %173 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %174 = bitcast i32 %173 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %168, float %170, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %172, float %174, float %172, float %174) %175 = call i32 @llvm.SI.packf16(float %160, float %162) %176 = bitcast i32 %175 to float %177 = call i32 @llvm.SI.packf16(float %164, float 0.000000e+00) %178 = bitcast i32 %177 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %41, float %41, float %41, float %41) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %176, float %178, float %176, float %178) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430E02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800A00 00A60102 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_mul_f32_e32 v3, v5, v5 ; 10060B05 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_mac_f32_e32 v3, v7, v7 ; 3E060F07 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v14, s4, v14 ; 101C1C04 v_mul_f32_e32 v15, s5, v15 ; 101E1E05 v_mul_f32_e32 v16, s6, v16 ; 10202006 v_cmp_gt_f32_e32 vcc, s7, v17 ; 7C082207 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 v_mul_f32_e32 v6, v3, v6 ; 100C0D03 v_mul_f32_e32 v3, v3, v7 ; 10060F03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v7, s0, v2 ; 100E0400 v_mul_f32_e32 v17, s0, v1 ; 10220200 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_mad_f32 v1, -v2, v2, v1 ; D2820001 24060502 v_mul_f32_e32 v2, v12, v3 ; 1004070C v_mad_f32 v2, v6, v13, -v2 ; D2820002 840A1B06 v_mul_f32_e32 v18, v13, v5 ; 10240B0D v_mad_f32 v18, v3, v11, -v18 ; D2820012 844A1703 v_mul_f32_e32 v19, v11, v6 ; 10260D0B v_mad_f32 v19, v5, v12, -v19 ; D2820013 844E1905 v_mul_f32_e32 v20, v11, v11 ; 1028170B v_mac_f32_e32 v20, v12, v12 ; 3E28190C v_mac_f32_e32 v20, v13, v13 ; 3E281B0D v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v21, v2, v2 ; 102A0502 v_mac_f32_e32 v21, v18, v18 ; 3E2A2512 v_mac_f32_e32 v21, v19, v19 ; 3E2A2713 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 v_mul_f32_e32 v2, v21, v2 ; 10040515 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mul_f32_e32 v19, v21, v19 ; 10262715 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v20, v7, v7 ; 10280F07 v_mac_f32_e32 v20, v17, v17 ; 3E282311 v_mac_f32_e32 v20, v1, v1 ; 3E280301 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v18, v18, v0 ; 10240112 v_mul_f32_e32 v0, v19, v0 ; 10000113 v_mul_f32_e32 v17, v20, v17 ; 10222314 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mul_f32_e32 v18, v17, v18 ; 10242511 v_mul_f32_e32 v0, v17, v0 ; 10000111 v_mul_f32_e32 v7, v20, v7 ; 100E0F14 v_mad_f32 v2, v7, v11, -v2 ; D2820002 840A1707 v_mad_f32 v11, v7, v12, -v18 ; D282000B 844A1907 v_mad_f32 v0, v7, v13, -v0 ; D2820000 84021B07 v_mul_f32_e32 v1, v20, v1 ; 10020314 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v11, v1, v6 ; 3E160D01 v_mac_f32_e32 v0, v1, v3 ; 3E000701 v_mul_f32_e32 v1, v2, v2 ; 10020502 v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_mac_f32_e32 v1, v0, v0 ; 3E020100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v3, v8, v14 ; 10061D08 v_mul_f32_e32 v5, v9, v15 ; 100A1F09 v_mul_f32_e32 v6, v10, v16 ; 100C210A v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v7, v1, v11 ; 100E1701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0 v_mad_f32 v2, 0.5, v7, 0.5 ; D2820002 03C20EF0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_cvt_pkrtz_f16_f32_e32 v3, v3, v5 ; 5E060B03 v_cvt_pkrtz_f16_f32_e64 v5, v6, 0 ; D25E0005 00010106 exp 15, 0, 1, 0, 0, v3, v5, v3, v5 ; F800040F 05030503 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v3, 0, 0 ; D25E0003 00010080 exp 15, 1, 1, 0, 0, v3, v3, v3, v3 ; F800041F 03030303 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 604 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 6: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[3].xyzz 7: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 8: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 9: MOV TEMP[4].xy, IN[0].xyyy 10: TEX TEMP[4].yw, TEMP[4], SAMP[1], 2D 11: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 12: MOV TEMP[5].x, TEMP[4].xxxx 13: MOV TEMP[5].y, -TEMP[4].yyyy 14: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 15: MOV TEMP[6].x, TEMP[5].xxxx 16: MOV TEMP[6].y, TEMP[5].yyyy 17: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 18: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 19: MOV_SAT TEMP[4].x, TEMP[4].xxxx 20: SQRT TEMP[4].x, TEMP[4].xxxx 21: MOV TEMP[6].z, TEMP[4].xxxx 22: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 23: RSQ TEMP[4].x, TEMP[4].xxxx 24: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 25: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 26: RSQ TEMP[5].x, TEMP[5].xxxx 27: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 28: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 29: RSQ TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 31: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 32: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 33: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 34: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 35: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 36: RSQ TEMP[3].x, TEMP[3].xxxx 37: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 38: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 39: MOV TEMP[3].w, IMM[2].xxxx 40: MOV TEMP[3].x, TEMP[0].xxxx 41: MOV TEMP[3].y, TEMP[0].yyyy 42: MOV TEMP[3].z, TEMP[0].zzzz 43: FSLT TEMP[0].x, TEMP[1].wwww, CONST[1][0].yyyy 44: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 45: KILL_IF -TEMP[0].xxxx 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[2].xxxx 48: MOV TEMP[0].y, TEMP[2].yyyy 49: MOV TEMP[0].z, TEMP[2].zzzz 50: MOV OUT[2], IN[1].wwww 51: MOV OUT[0], TEMP[0] 52: MOV OUT[3], TEMP[3] 53: MOV OUT[1], IMM[2].xxxx 54: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %52 = fmul float %42, %42 %53 = fmul float %43, %43 %54 = fadd float %53, %52 %55 = fmul float %44, %44 %56 = fadd float %54, %55 %57 = call float @llvm.AMDGPU.rsq.clamped.f32(float %56) %58 = fmul float %42, %57 %59 = fmul float %43, %57 %60 = fmul float %44, %57 %61 = bitcast float %39 to i32 %62 = bitcast float %40 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %30, <16 x i8> %32, i32 2) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = extractelement <4 x float> %65, i32 3 %70 = fmul float %26, %66 %71 = fmul float %27, %67 %72 = fmul float %28, %68 %73 = fmul float %70, %45 %74 = fmul float %71, %46 %75 = fmul float %72, %47 %76 = fmul float %60, %49 %77 = fmul float %58, %50 %78 = fmul float %59, %48 %79 = fmul float %59, %50 %80 = fsub float %79, %76 %81 = fmul float %60, %48 %82 = fsub float %81, %77 %83 = fmul float %58, %49 %84 = fsub float %83, %78 %85 = bitcast float %39 to i32 %86 = bitcast float %40 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %35, <16 x i8> %38, i32 2) %90 = extractelement <4 x float> %89, i32 1 %91 = extractelement <4 x float> %89, i32 3 %92 = fmul float %91, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %90, 2.000000e+00 %95 = fadd float %94, -1.000000e+00 %96 = fmul float %93, %24 %97 = fmul float %95, %24 %98 = fmul float %93, %93 %99 = fmul float %95, %95 %100 = fadd float %98, %99 %101 = fsub float 1.000000e+00, %100 %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00) %103 = call float @llvm.sqrt.f32(float %102) %104 = fmul float %96, %96 %105 = fmul float %97, %97 %106 = fadd float %105, %104 %107 = fmul float %103, %103 %108 = fadd float %106, %107 %109 = call float @llvm.AMDGPU.rsq.clamped.f32(float %108) %110 = fmul float %96, %109 %111 = fmul float %97, %109 %112 = fsub float -0.000000e+00, %111 %113 = fmul float %103, %109 %114 = fmul float %48, %48 %115 = fmul float %49, %49 %116 = fadd float %115, %114 %117 = fmul float %50, %50 %118 = fadd float %116, %117 %119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118) %120 = fmul float %48, %119 %121 = fmul float %49, %119 %122 = fmul float %50, %119 %123 = fmul float %80, %80 %124 = fmul float %82, %82 %125 = fadd float %124, %123 %126 = fmul float %84, %84 %127 = fadd float %125, %126 %128 = call float @llvm.AMDGPU.rsq.clamped.f32(float %127) %129 = fmul float %80, %128 %130 = fmul float %82, %128 %131 = fmul float %84, %128 %132 = fmul float %51, %129 %133 = fmul float %51, %130 %134 = fmul float %51, %131 %135 = fmul float %132, %112 %136 = fmul float %133, %112 %137 = fmul float %134, %112 %138 = fmul float %110, %120 %139 = fadd float %138, %135 %140 = fmul float %110, %121 %141 = fadd float %140, %136 %142 = fmul float %110, %122 %143 = fadd float %142, %137 %144 = fmul float %58, %113 %145 = fadd float %144, %139 %146 = fmul float %59, %113 %147 = fadd float %146, %141 %148 = fmul float %60, %113 %149 = fadd float %148, %143 %150 = fmul float %145, %145 %151 = fmul float %147, %147 %152 = fadd float %151, %150 %153 = fmul float %149, %149 %154 = fadd float %152, %153 %155 = call float @llvm.AMDGPU.rsq.clamped.f32(float %154) %156 = fmul float %145, %155 %157 = fmul float %147, %155 %158 = fmul float %149, %155 %159 = fmul float %156, 5.000000e-01 %160 = fadd float %159, 5.000000e-01 %161 = fmul float %157, 5.000000e-01 %162 = fadd float %161, 5.000000e-01 %163 = fmul float %158, 5.000000e-01 %164 = fadd float %163, 5.000000e-01 %165 = fcmp olt float %69, %25 %166 = select i1 %165, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %166) %167 = call i32 @llvm.SI.packf16(float %73, float %74) %168 = bitcast i32 %167 to float %169 = call i32 @llvm.SI.packf16(float %75, float 0.000000e+00) %170 = bitcast i32 %169 to float %171 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %172 = bitcast i32 %171 to float %173 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %174 = bitcast i32 %173 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %168, float %170, float %168, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %172, float %174, float %172, float %174) %175 = call i32 @llvm.SI.packf16(float %160, float %162) %176 = bitcast i32 %175 to float %177 = call i32 @llvm.SI.packf16(float %164, float 0.000000e+00) %178 = bitcast i32 %177 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %41, float %41, float %41, float %41) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %176, float %178, float %176, float %178) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430E02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800A00 00A60102 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_mul_f32_e32 v3, v5, v5 ; 10060B05 v_mac_f32_e32 v3, v6, v6 ; 3E060D06 v_mac_f32_e32 v3, v7, v7 ; 3E060F07 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v14, s4, v14 ; 101C1C04 v_mul_f32_e32 v15, s5, v15 ; 101E1E05 v_mul_f32_e32 v16, s6, v16 ; 10202006 v_cmp_gt_f32_e32 vcc, s7, v17 ; 7C082207 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 v_mul_f32_e32 v6, v3, v6 ; 100C0D03 v_mul_f32_e32 v3, v3, v7 ; 10060F03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v7, s0, v2 ; 100E0400 v_mul_f32_e32 v17, s0, v1 ; 10220200 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_mad_f32 v1, -v2, v2, v1 ; D2820001 24060502 v_mul_f32_e32 v2, v12, v3 ; 1004070C v_mad_f32 v2, v6, v13, -v2 ; D2820002 840A1B06 v_mul_f32_e32 v18, v13, v5 ; 10240B0D v_mad_f32 v18, v3, v11, -v18 ; D2820012 844A1703 v_mul_f32_e32 v19, v11, v6 ; 10260D0B v_mad_f32 v19, v5, v12, -v19 ; D2820013 844E1905 v_mul_f32_e32 v20, v11, v11 ; 1028170B v_mac_f32_e32 v20, v12, v12 ; 3E28190C v_mac_f32_e32 v20, v13, v13 ; 3E281B0D v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v21, v2, v2 ; 102A0502 v_mac_f32_e32 v21, v18, v18 ; 3E2A2512 v_mac_f32_e32 v21, v19, v19 ; 3E2A2713 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 v_mul_f32_e32 v2, v21, v2 ; 10040515 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mul_f32_e32 v19, v21, v19 ; 10262715 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mul_f32_e32 v20, v7, v7 ; 10280F07 v_mac_f32_e32 v20, v17, v17 ; 3E282311 v_mac_f32_e32 v20, v1, v1 ; 3E280301 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v18, v18, v0 ; 10240112 v_mul_f32_e32 v0, v19, v0 ; 10000113 v_mul_f32_e32 v17, v20, v17 ; 10222314 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mul_f32_e32 v18, v17, v18 ; 10242511 v_mul_f32_e32 v0, v17, v0 ; 10000111 v_mul_f32_e32 v7, v20, v7 ; 100E0F14 v_mad_f32 v2, v7, v11, -v2 ; D2820002 840A1707 v_mad_f32 v11, v7, v12, -v18 ; D282000B 844A1907 v_mad_f32 v0, v7, v13, -v0 ; D2820000 84021B07 v_mul_f32_e32 v1, v20, v1 ; 10020314 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v11, v1, v6 ; 3E160D01 v_mac_f32_e32 v0, v1, v3 ; 3E000701 v_mul_f32_e32 v1, v2, v2 ; 10020502 v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_mac_f32_e32 v1, v0, v0 ; 3E020100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v3, v8, v14 ; 10061D08 v_mul_f32_e32 v5, v9, v15 ; 100A1F09 v_mul_f32_e32 v6, v10, v16 ; 100C210A v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v7, v1, v11 ; 100E1701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0 v_mad_f32 v2, 0.5, v7, 0.5 ; D2820002 03C20EF0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_cvt_pkrtz_f16_f32_e32 v3, v3, v5 ; 5E060B03 v_cvt_pkrtz_f16_f32_e64 v5, v6, 0 ; D25E0005 00010106 exp 15, 0, 1, 0, 0, v3, v5, v3, v5 ; F800040F 05030503 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v3, 0, 0 ; D25E0003 00010080 exp 15, 1, 1, 0, 0, v3, v3, v3, v3 ; F800041F 03030303 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 604 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyzy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: MOV OUT[3], IMM[0].yyyy 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[0:3], 0 idxen ; E00C2000 80000400 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_xor_b32_e32 v0, 0x80000000, v5 ; 3A000AFF 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v6, v7, v1, v1 ; F800021F 01010706 exp 15, 34, 0, 0, 0, v2, v2, v2, v2 ; F800022F 02020202 exp 15, 12, 0, 0, 0, v4, v0, v3, v2 ; F80000CF 02030004 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 12 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 3D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.4545, 0.3000, 0.5900, 0.1100} IMM[1] UINT32 {0, 16, 20, 0} IMM[2] FLT32 { 2.2000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: MOV TEMP[1].xyz, TEMP[1].xyzz 6: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 3D 7: LRP TEMP[1].xyz, CONST[1][1].xxxx, TEMP[1].xyzz, TEMP[0].xyzz 8: MOV TEMP[0].w, TEMP[0].wwww 9: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[0].yzww 10: LRP TEMP[1].xyz, CONST[1][1].yyyy, TEMP[2].xxxx, TEMP[1].xyzz 11: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xyzz 12: POW TEMP[2].x, TEMP[1].xxxx, IMM[2].xxxx 13: POW TEMP[2].y, TEMP[1].yyyy, IMM[2].xxxx 14: POW TEMP[2].z, TEMP[1].zzzz, IMM[2].xxxx 15: MOV TEMP[0].xyz, TEMP[2].xyzx 16: MOV OUT[0], TEMP[0] 17: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0 %28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %41 = bitcast float %36 to i32 %42 = bitcast float %37 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %27, <16 x i8> %29, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = call float @llvm.pow.f32(float %46, float 0x3FDD168720000000) %51 = call float @llvm.pow.f32(float %47, float 0x3FDD168720000000) %52 = call float @llvm.pow.f32(float %48, float 0x3FDD168720000000) %53 = bitcast float %50 to i32 %54 = bitcast float %51 to i32 %55 = bitcast float %52 to i32 %56 = insertelement <4 x i32> undef, i32 %53, i32 0 %57 = insertelement <4 x i32> %56, i32 %54, i32 1 %58 = insertelement <4 x i32> %57, i32 %55, i32 2 %59 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %58, <32 x i8> %32, <16 x i8> %35, i32 3) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = call float @llvm.AMDGPU.lrp(float %24, float %60, float %46) %64 = call float @llvm.AMDGPU.lrp(float %24, float %61, float %47) %65 = call float @llvm.AMDGPU.lrp(float %24, float %62, float %48) %66 = fmul float %63, 0x3FD3333340000000 %67 = fmul float %64, 0x3FE2E147A0000000 %68 = fadd float %67, %66 %69 = fmul float %65, 0x3FBC28F5C0000000 %70 = fadd float %68, %69 %71 = call float @llvm.AMDGPU.lrp(float %25, float %70, float %63) %72 = call float @llvm.AMDGPU.lrp(float %25, float %70, float %64) %73 = call float @llvm.AMDGPU.lrp(float %25, float %70, float %65) %74 = fmul float %71, %38 %75 = fmul float %72, %39 %76 = fmul float %73, %40 %77 = call float @llvm.pow.f32(float %74, float 0x40019999A0000000) %78 = call float @llvm.pow.f32(float %75, float 0x40019999A0000000) %79 = call float @llvm.pow.f32(float %76, float 0x40019999A0000000) %80 = call i32 @llvm.SI.packf16(float %77, float %78) %81 = bitcast i32 %80 to float %82 = call i32 @llvm.SI.packf16(float %79, float %49) %83 = bitcast i32 %82 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %81, float %83, float %81, float %83) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3ee8b439 ; 7E0402FF 3EE8B439 v_mov_b32_e32 v3, 0x400ccccd ; 7E0602FF 400CCCCD s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800F00 00430804 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v1, v8 ; 7E024F08 v_log_f32_e32 v4, v9 ; 7E084F09 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 v_mul_legacy_f32_e32 v1, v2, v1 ; 0E020302 v_log_f32_e32 v5, v10 ; 7E0A4F0A v_mul_legacy_f32_e32 v4, v2, v4 ; 0E080902 v_exp_f32_e32 v12, v1 ; 7E184B01 v_exp_f32_e32 v13, v4 ; 7E1A4B04 v_mul_legacy_f32_e32 v1, v2, v5 ; 0E020B02 v_exp_f32_e32 v14, v1 ; 7E1C4B01 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[24:31], s[20:23] ; F0800700 00A60C0C s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v1, 1.0, s4 ; D2080001 000008F2 v_mul_f32_e32 v2, v8, v1 ; 10040308 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, s4, v12 ; 3E041804 v_mul_f32_e32 v4, v9, v1 ; 10080309 v_mac_f32_e32 v4, s4, v13 ; 3E081A04 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mac_f32_e32 v1, s4, v14 ; 3E021C04 v_mul_f32_e32 v5, 0x3e99999a, v2 ; 100A04FF 3E99999A v_madmk_f32_e32 v5, v4, v5, 0x3f170a3d ; 400A0B04 3F170A3D v_madmk_f32_e32 v5, v1, v5, 0x3de147ae ; 400A0B01 3DE147AE v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mac_f32_e32 v2, s0, v5 ; 3E040A00 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mac_f32_e32 v4, s0, v5 ; 3E080A00 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mac_f32_e32 v1, s0, v5 ; 3E020A00 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_log_f32_e32 v1, v4 ; 7E024F04 v_mul_legacy_f32_e32 v2, v3, v2 ; 0E040503 v_log_f32_e32 v0, v0 ; 7E004F00 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_legacy_f32_e32 v1, v3, v1 ; 0E020303 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_legacy_f32_e32 v0, v3, v0 ; 0E000103 v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v11 ; 5E001700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL OUT[8], GENERIC[6] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..7] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..15], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 16, 32, 48} IMM[5] UINT32 {20, 36, 52, 8} IMM[6] UINT32 {24, 40, 56, 12} IMM[7] UINT32 {28, 44, 60, 348} IMM[8] FLT32 { 0.0175, -0.5000, 0.5000, 0.0001} IMM[9] UINT32 {72, 80, 64, 88} IMM[10] UINT32 {100, 104, 96, 112} IMM[11] FLT32 { 0.0774, 0.9479, 0.0521, 2.4000} IMM[12] FLT32 { 0.0404, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[6].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[6].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[6].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4].xy, TEMP[4], IMM[2].yyyy 23: MOV TEMP[7].x, TEMP[4].xxxx 24: MOV TEMP[7].y, TEMP[4].yyyy 25: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 26: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 27: MOV TEMP[7].z, TEMP[4].xxxx 28: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 31: MUL TEMP[6], TEMP[6], IMM[2].zzzz 32: ADD TEMP[6].xy, IMM[0].wwww, -TEMP[6] 33: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 34: MOV TEMP[7].w, IMM[0].xxxx 35: MOV TEMP[7].x, TEMP[6].xxxx 36: MOV TEMP[7].y, TEMP[6].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 38: ADD TEMP[5].x, IMM[0].wwww, -TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 40: MOV TEMP[7].z, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[7], TEMP[0] 42: DP4 TEMP[5].x, TEMP[7], TEMP[2] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[5].x, TEMP[7], TEMP[3] 45: MOV TEMP[4].z, TEMP[5].xxxx 46: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 49: MOV TEMP[5].w, IMM[0].wwww 50: MOV TEMP[5].x, IN[0].xxxx 51: MOV TEMP[5].y, IN[0].yyyy 52: MOV TEMP[5].z, IN[0].zzzz 53: DP4 TEMP[0].x, TEMP[5], TEMP[0] 54: DP4 TEMP[2].x, TEMP[5], TEMP[2] 55: DP4 TEMP[3].x, TEMP[5], TEMP[3] 56: MOV TEMP[5].x, TEMP[0].xxxx 57: MOV TEMP[5].y, TEMP[2].xxxx 58: MOV TEMP[5].z, TEMP[3].xxxx 59: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 60: MOV TEMP[6].x, TEMP[0].xxxx 61: MOV TEMP[6].y, TEMP[2].xxxx 62: MOV TEMP[6].z, TEMP[3].xxxx 63: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 64: MOV TEMP[6].w, TEMP[7].xxxx 65: MOV TEMP[7].x, TEMP[4].xxxx 66: MOV TEMP[7].y, TEMP[4].yyyy 67: MOV TEMP[7].z, TEMP[4].zzzz 68: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 69: RSQ TEMP[8].x, TEMP[8].xxxx 70: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 71: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 72: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 73: MUL TEMP[4].xyz, IMM[2].zzzz, TEMP[4].xyzz 74: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 75: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 76: MOV_SAT TEMP[4].x, TEMP[4].xxxx 77: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 78: MOV_SAT TEMP[4].x, TEMP[4].xxxx 79: MOV TEMP[7].w, TEMP[4].xxxx 80: MOV TEMP[4].w, IMM[0].wwww 81: MOV TEMP[4].x, TEMP[0].xxxx 82: MOV TEMP[4].y, TEMP[2].xxxx 83: MOV TEMP[4].z, TEMP[3].xxxx 84: MOV TEMP[0].x, CONST[4][0].xxxx 85: MOV TEMP[0].y, CONST[4][1].xxxx 86: MOV TEMP[0].z, CONST[4][2].xxxx 87: MOV TEMP[0].w, CONST[4][3].xxxx 88: DP4 TEMP[0].x, TEMP[4], TEMP[0] 89: MOV TEMP[2].x, CONST[4][0].yyyy 90: MOV TEMP[2].y, CONST[4][1].yyyy 91: MOV TEMP[2].z, CONST[4][2].yyyy 92: MOV TEMP[2].w, CONST[4][3].yyyy 93: DP4 TEMP[2].x, TEMP[4], TEMP[2] 94: MOV TEMP[3].x, CONST[4][0].zzzz 95: MOV TEMP[3].y, CONST[4][1].zzzz 96: MOV TEMP[3].z, CONST[4][2].zzzz 97: MOV TEMP[3].w, CONST[4][3].zzzz 98: DP4 TEMP[3].x, TEMP[4], TEMP[3] 99: MOV TEMP[5].x, CONST[4][0].wwww 100: MOV TEMP[5].y, CONST[4][1].wwww 101: MOV TEMP[5].z, CONST[4][2].wwww 102: MOV TEMP[5].w, CONST[4][3].wwww 103: DP4 TEMP[4].x, TEMP[4], TEMP[5] 104: MUL TEMP[5].x, CONST[1][2].xxxx, IMM[8].xxxx 105: ADD TEMP[8].xy, IN[2].xyyy, IMM[8].yyyy 106: COS TEMP[9].x, TEMP[5].xxxx 107: SIN TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[10].x, TEMP[5].xxxx, TEMP[8].yyyy 109: MAD TEMP[10].x, TEMP[9].xxxx, TEMP[8].xxxx, -TEMP[10].xxxx 110: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].yyyy 111: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx, TEMP[9].xxxx 112: MOV TEMP[10].y, TEMP[5].xxxx 113: MAD TEMP[5].xy, CONST[1][1].yyyy, TEMP[10].xyyy, IMM[8].zzzz 114: MAD TEMP[5].xy, CONST[4][21].wwww, CONST[1][3].xyyy, TEMP[5].xyyy 115: ADD TEMP[5].xy, TEMP[5].xyyy, CONST[1][4].zwww 116: MUL TEMP[9].x, IMM[8].xxxx, CONST[1][2].yyyy 117: COS TEMP[10].x, TEMP[9].xxxx 118: SIN TEMP[9].x, TEMP[9].xxxx 119: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[8].yyyy 120: MAD TEMP[11].x, TEMP[10].xxxx, TEMP[8].xxxx, -TEMP[11].xxxx 121: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx 122: MAD TEMP[9].x, TEMP[10].xxxx, TEMP[8].yyyy, TEMP[9].xxxx 123: MOV TEMP[11].y, TEMP[9].xxxx 124: MAD TEMP[9].xy, TEMP[11].xyyy, CONST[1][1].zzzz, IMM[8].zzzz 125: MAD TEMP[9].xy, CONST[4][21].wwww, CONST[1][3].zwww, TEMP[9].xyyy 126: ADD TEMP[9].xy, TEMP[9].xyyy, CONST[1][5].xyyy 127: MOV TEMP[10].x, TEMP[5].xxxx 128: MOV TEMP[10].y, TEMP[5].yyyy 129: MOV TEMP[10].z, TEMP[9].xxxx 130: MOV TEMP[10].w, TEMP[9].yyyy 131: MUL TEMP[5].x, IMM[8].xxxx, CONST[1][2].zzzz 132: COS TEMP[9].x, TEMP[5].xxxx 133: SIN TEMP[5].x, TEMP[5].xxxx 134: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[8].yyyy 135: MAD TEMP[11].x, TEMP[9].xxxx, TEMP[8].xxxx, -TEMP[11].xxxx 136: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[8].xxxx 137: MAD TEMP[5].x, TEMP[9].xxxx, TEMP[8].yyyy, TEMP[5].xxxx 138: MOV TEMP[11].y, TEMP[5].xxxx 139: MAD TEMP[5].xy, TEMP[11].xyyy, CONST[1][1].wwww, IMM[8].zzzz 140: MAD TEMP[5].xy, CONST[4][21].wwww, CONST[1][4].xyyy, TEMP[5].xyyy 141: ADD TEMP[5].xy, TEMP[5].xyyy, CONST[1][5].zwww 142: MUL TEMP[9].x, IMM[8].xxxx, CONST[1][6].yyyy 143: COS TEMP[11].x, TEMP[9].xxxx 144: SIN TEMP[9].x, TEMP[9].xxxx 145: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[8].yyyy 146: MAD TEMP[12].x, TEMP[11].xxxx, TEMP[8].xxxx, -TEMP[12].xxxx 147: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx 148: MAD TEMP[8].x, TEMP[11].xxxx, TEMP[8].yyyy, TEMP[9].xxxx 149: MOV TEMP[12].y, TEMP[8].xxxx 150: MAD TEMP[8].xy, CONST[1][6].xxxx, TEMP[12].xyyy, IMM[8].zzzz 151: MAD TEMP[8].xy, CONST[4][21].wwww, CONST[1][6].zwww, TEMP[8].xyyy 152: ADD TEMP[8].xy, TEMP[8].xyyy, CONST[1][7].xyyy 153: MOV TEMP[9].x, TEMP[5].xxxx 154: MOV TEMP[9].y, TEMP[5].yyyy 155: MOV TEMP[9].z, TEMP[8].xxxx 156: MOV TEMP[9].w, TEMP[8].yyyy 157: MOV TEMP[5].xy, IN[6].xyyy 158: MOV TEMP[5].w, IMM[0].xxxx 159: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[1].wyw 160: MUL TEMP[8].xyz, IN[4].xyzz, IMM[8].zzzz 161: MAX TEMP[8].xyz, TEMP[8].xyzz, IMM[8].wwww 162: MUL TEMP[11].xyz, IN[5].xyzz, IMM[11].xxxx 163: MAD TEMP[12].xyz, IN[5].xyzz, IMM[11].yyyy, IMM[11].zzzz 164: POW TEMP[13].x, TEMP[12].xxxx, IMM[11].wwww 165: POW TEMP[13].y, TEMP[12].yyyy, IMM[11].wwww 166: POW TEMP[13].z, TEMP[12].zzzz, IMM[11].wwww 167: FSLT TEMP[12].x, IMM[12].xxxx, IN[5].xxxx 168: UIF TEMP[12].xxxx :0 169: MOV TEMP[12].x, TEMP[13].xxxx 170: ELSE :0 171: MOV TEMP[12].x, TEMP[11].xxxx 172: ENDIF 173: FSLT TEMP[14].x, IMM[12].xxxx, IN[5].yyyy 174: UIF TEMP[14].xxxx :0 175: MOV TEMP[14].x, TEMP[13].yyyy 176: ELSE :0 177: MOV TEMP[14].x, TEMP[11].yyyy 178: ENDIF 179: FSLT TEMP[15].x, IMM[12].xxxx, IN[5].zzzz 180: UIF TEMP[15].xxxx :0 181: MOV TEMP[13].x, TEMP[13].zzzz 182: ELSE :0 183: MOV TEMP[13].x, TEMP[11].zzzz 184: ENDIF 185: MOV TEMP[11].x, TEMP[12].xxxx 186: MOV TEMP[11].y, TEMP[14].xxxx 187: MOV TEMP[11].z, TEMP[13].xxxx 188: MOV TEMP[12].w, TEMP[5].wwww 189: MUL TEMP[12].xyz, TEMP[11].xyzz, TEMP[5].xyzz 190: MOV TEMP[5].w, IMM[0].xxxx 191: MOV TEMP[5].x, IN[3].xxxx 192: MOV TEMP[5].y, IN[3].yyyy 193: MOV TEMP[5].z, IN[3].zzzz 194: MOV TEMP[11].w, IMM[0].xxxx 195: MOV TEMP[11].x, TEMP[8].xxxx 196: MOV TEMP[11].y, TEMP[8].yyyy 197: MOV TEMP[11].z, TEMP[8].zzzz 198: MOV TEMP[0].x, TEMP[0].xxxx 199: MOV TEMP[0].y, -TEMP[2].xxxx 200: MAD TEMP[2].x, IMM[2].zzzz, TEMP[3].xxxx, -TEMP[4].xxxx 201: MOV TEMP[0].z, TEMP[2].xxxx 202: MOV TEMP[0].w, TEMP[4].xxxx 203: MOV OUT[1], TEMP[1] 204: MOV OUT[3], TEMP[7] 205: MOV OUT[6], TEMP[11] 206: MOV OUT[7], TEMP[10] 207: MOV OUT[4], TEMP[12] 208: MOV OUT[0], TEMP[0] 209: MOV OUT[8], TEMP[9] 210: MOV OUT[2], TEMP[6] 211: MOV OUT[5], TEMP[5] 212: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 0) %40 = call float @llvm.SI.load.const(<16 x i8> %38, i32 4) %41 = call float @llvm.SI.load.const(<16 x i8> %38, i32 8) %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 12) %43 = call float @llvm.SI.load.const(<16 x i8> %38, i32 16) %44 = call float @llvm.SI.load.const(<16 x i8> %38, i32 20) %45 = call float @llvm.SI.load.const(<16 x i8> %38, i32 24) %46 = call float @llvm.SI.load.const(<16 x i8> %38, i32 28) %47 = call float @llvm.SI.load.const(<16 x i8> %38, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %38, i32 36) %49 = call float @llvm.SI.load.const(<16 x i8> %38, i32 40) %50 = call float @llvm.SI.load.const(<16 x i8> %38, i32 44) %51 = call float @llvm.SI.load.const(<16 x i8> %38, i32 48) %52 = call float @llvm.SI.load.const(<16 x i8> %38, i32 52) %53 = call float @llvm.SI.load.const(<16 x i8> %38, i32 56) %54 = call float @llvm.SI.load.const(<16 x i8> %38, i32 60) %55 = call float @llvm.SI.load.const(<16 x i8> %38, i32 304) %56 = call float @llvm.SI.load.const(<16 x i8> %38, i32 308) %57 = call float @llvm.SI.load.const(<16 x i8> %38, i32 312) %58 = call float @llvm.SI.load.const(<16 x i8> %38, i32 320) %59 = call float @llvm.SI.load.const(<16 x i8> %38, i32 324) %60 = call float @llvm.SI.load.const(<16 x i8> %38, i32 328) %61 = call float @llvm.SI.load.const(<16 x i8> %38, i32 348) %62 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.load.const(<16 x i8> %63, i32 0) %65 = call float @llvm.SI.load.const(<16 x i8> %63, i32 4) %66 = call float @llvm.SI.load.const(<16 x i8> %63, i32 8) %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = extractelement <4 x float> %81, i32 2 %85 = extractelement <4 x float> %81, i32 3 %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = add i32 %5, %7 %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %88) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = add i32 %5, %7 %102 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %101) %103 = extractelement <4 x float> %102, i32 0 %104 = extractelement <4 x float> %102, i32 1 %105 = extractelement <4 x float> %102, i32 2 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 6 %114 = load <16 x i8>, <16 x i8> addrspace(2)* %113, align 16, !tbaa !0 %115 = add i32 %10, %6 %116 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %114, i32 0, i32 %115) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = bitcast float %117 to i32 %120 = bitcast float %118 to i32 %121 = insertelement <4 x i32> undef, i32 %119, i32 0 %122 = insertelement <4 x i32> %121, i32 %120, i32 1 %123 = insertelement <4 x i32> %122, i32 0, i32 2 %124 = bitcast <8 x i32> %68 to <32 x i8> %125 = bitcast <4 x i32> %70 to <16 x i8> %126 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %123, <32 x i8> %124, <16 x i8> %125, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = extractelement <4 x float> %126, i32 3 %131 = bitcast float %117 to i32 %132 = bitcast float %118 to i32 %133 = insertelement <4 x i32> , i32 %131, i32 1 %134 = insertelement <4 x i32> %133, i32 %132, i32 2 %135 = insertelement <4 x i32> %134, i32 0, i32 3 %136 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %135, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = bitcast float %117 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <4 x i32> , i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 %142, i32 2 %145 = insertelement <4 x i32> %144, i32 0, i32 3 %146 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %145, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %82, 2.550000e+02 %152 = fadd float %151, -1.280000e+02 %153 = fmul float %83, 2.550000e+02 %154 = fadd float %153, -1.280000e+02 %155 = fmul float %84, 2.550000e+02 %156 = fadd float %155, -1.280000e+02 %157 = fmul float %85, 2.550000e+02 %158 = fadd float %157, -1.280000e+02 %159 = fcmp olt float %152, 0.000000e+00 %160 = fcmp olt float %154, 0.000000e+00 %161 = fcmp olt float %156, 0.000000e+00 %162 = fcmp olt float %158, 0.000000e+00 %163 = select i1 %159, float 1.000000e+00, float 0.000000e+00 %164 = call float @fabs(float %152) %165 = call float @fabs(float %154) %166 = call float @fabs(float %156) %167 = call float @fabs(float %158) %168 = fsub float %164, %163 %169 = select i1 %160, float -1.000000e+00, float -0.000000e+00 %170 = fadd float %165, %169 %171 = select i1 %161, float -1.000000e+00, float -0.000000e+00 %172 = fadd float %166, %171 %173 = select i1 %162, float -1.000000e+00, float -0.000000e+00 %174 = fadd float %167, %173 %175 = fadd float %168, -6.400000e+01 %176 = fadd float %170, -6.400000e+01 %177 = fadd float %172, -6.400000e+01 %178 = fadd float %174, -6.400000e+01 %179 = fcmp olt float %175, 0.000000e+00 %180 = fcmp olt float %176, 0.000000e+00 %181 = select i1 %179, float 1.000000e+00, float 0.000000e+00 %182 = select i1 %180, float 1.000000e+00, float 0.000000e+00 %183 = call float @fabs(float %175) %184 = call float @fabs(float %176) %185 = call float @fabs(float %177) %186 = call float @fabs(float %178) %187 = fsub float %183, %181 %188 = fsub float %184, %182 %189 = fmul float %187, 0x3F90410420000000 %190 = fmul float %188, 0x3F90410420000000 %191 = fsub float 1.000000e+00, %189 %192 = fsub float %191, %190 %193 = fmul float %189, %189 %194 = fmul float %190, %190 %195 = fadd float %194, %193 %196 = fmul float %192, %192 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %189, %198 %200 = fmul float %190, %198 %201 = fmul float %192, %198 %202 = fmul float %181, 2.000000e+00 %203 = fmul float %182, 2.000000e+00 %204 = fsub float 1.000000e+00, %202 %205 = fsub float 1.000000e+00, %203 %206 = fmul float %199, %204 %207 = fmul float %200, %205 %208 = fmul float %163, 2.000000e+00 %209 = fsub float 1.000000e+00, %208 %210 = fmul float %209, %201 %211 = fmul float %206, %127 %212 = fmul float %207, %128 %213 = fadd float %211, %212 %214 = fmul float %210, %129 %215 = fadd float %213, %214 %216 = fmul float %130, 0.000000e+00 %217 = fadd float %215, %216 %218 = fmul float %206, %137 %219 = fmul float %207, %138 %220 = fadd float %218, %219 %221 = fmul float %210, %139 %222 = fadd float %220, %221 %223 = fmul float %140, 0.000000e+00 %224 = fadd float %222, %223 %225 = fmul float %206, %147 %226 = fmul float %207, %148 %227 = fadd float %225, %226 %228 = fmul float %210, %149 %229 = fadd float %227, %228 %230 = fmul float %150, 0.000000e+00 %231 = fadd float %229, %230 %232 = fmul float %217, %217 %233 = fmul float %224, %224 %234 = fadd float %233, %232 %235 = fmul float %231, %231 %236 = fadd float %234, %235 %237 = call float @llvm.AMDGPU.rsq.clamped.f32(float %236) %238 = fmul float %217, %237 %239 = fmul float %224, %237 %240 = fmul float %231, %237 %241 = fmul float %75, %127 %242 = fmul float %76, %128 %243 = fadd float %241, %242 %244 = fmul float %77, %129 %245 = fadd float %243, %244 %246 = fadd float %245, %130 %247 = fmul float %75, %137 %248 = fmul float %76, %138 %249 = fadd float %247, %248 %250 = fmul float %77, %139 %251 = fadd float %249, %250 %252 = fadd float %251, %140 %253 = fmul float %75, %147 %254 = fmul float %76, %148 %255 = fadd float %253, %254 %256 = fmul float %77, %149 %257 = fadd float %255, %256 %258 = fadd float %257, %150 %259 = fsub float %246, %55 %260 = fsub float %252, %56 %261 = fsub float %258, %57 %262 = fmul float %58, %259 %263 = fmul float %59, %260 %264 = fadd float %263, %262 %265 = fmul float %60, %261 %266 = fadd float %264, %265 %267 = fmul float %259, %259 %268 = fmul float %260, %260 %269 = fadd float %268, %267 %270 = fmul float %261, %261 %271 = fadd float %269, %270 %272 = call float @llvm.AMDGPU.rsq.clamped.f32(float %271) %273 = fmul float %259, %272 %274 = fmul float %260, %272 %275 = fmul float %261, %272 %276 = fmul float %238, %64 %277 = fmul float %239, %65 %278 = fadd float %277, %276 %279 = fmul float %240, %66 %280 = fadd float %278, %279 %281 = fmul float %280, %238 %282 = fmul float %280, %239 %283 = fmul float %280, %240 %284 = fmul float %281, 2.000000e+00 %285 = fmul float %282, 2.000000e+00 %286 = fmul float %283, 2.000000e+00 %287 = fsub float %64, %284 %288 = fsub float %65, %285 %289 = fsub float %66, %286 %290 = fmul float %273, %287 %291 = fsub float -0.000000e+00, %290 %292 = fmul float %274, %288 %293 = fsub float %291, %292 %294 = fmul float %275, %289 %295 = fsub float %293, %294 %296 = call float @llvm.AMDIL.clamp.(float %295, float 0.000000e+00, float 1.000000e+00) %297 = call float @llvm.pow.f32(float %296, float 1.600000e+01) %298 = call float @llvm.AMDIL.clamp.(float %297, float 0.000000e+00, float 1.000000e+00) %299 = fmul float %246, %39 %300 = fmul float %252, %43 %301 = fadd float %299, %300 %302 = fmul float %258, %47 %303 = fadd float %301, %302 %304 = fadd float %303, %51 %305 = fmul float %246, %40 %306 = fmul float %252, %44 %307 = fadd float %305, %306 %308 = fmul float %258, %48 %309 = fadd float %307, %308 %310 = fadd float %309, %52 %311 = fmul float %246, %41 %312 = fmul float %252, %45 %313 = fadd float %311, %312 %314 = fmul float %258, %49 %315 = fadd float %313, %314 %316 = fadd float %315, %53 %317 = fmul float %246, %42 %318 = fmul float %252, %46 %319 = fadd float %317, %318 %320 = fmul float %258, %50 %321 = fadd float %319, %320 %322 = fadd float %321, %54 %323 = fmul float %16, 0x3F91DF4720000000 %324 = fadd float %90, -5.000000e-01 %325 = fadd float %91, -5.000000e-01 %326 = call float @llvm.cos.f32(float %323) %327 = call float @llvm.sin.f32(float %323) %328 = fmul float %327, %325 %329 = fmul float %326, %324 %330 = fsub float %329, %328 %331 = fmul float %326, %325 %332 = fmul float %327, %324 %333 = fadd float %332, %331 %334 = fmul float %13, %330 %335 = fadd float %334, 5.000000e-01 %336 = fmul float %13, %333 %337 = fadd float %336, 5.000000e-01 %338 = fmul float %61, %19 %339 = fadd float %338, %335 %340 = fmul float %61, %20 %341 = fadd float %340, %337 %342 = fadd float %339, %25 %343 = fadd float %341, %26 %344 = fmul float %17, 0x3F91DF4720000000 %345 = call float @llvm.cos.f32(float %344) %346 = call float @llvm.sin.f32(float %344) %347 = fmul float %346, %325 %348 = fmul float %345, %324 %349 = fsub float %348, %347 %350 = fmul float %346, %324 %351 = fmul float %345, %325 %352 = fadd float %351, %350 %353 = fmul float %349, %14 %354 = fadd float %353, 5.000000e-01 %355 = fmul float %352, %14 %356 = fadd float %355, 5.000000e-01 %357 = fmul float %61, %21 %358 = fadd float %357, %354 %359 = fmul float %61, %22 %360 = fadd float %359, %356 %361 = fadd float %358, %27 %362 = fadd float %360, %28 %363 = fmul float %18, 0x3F91DF4720000000 %364 = call float @llvm.cos.f32(float %363) %365 = call float @llvm.sin.f32(float %363) %366 = fmul float %365, %325 %367 = fmul float %364, %324 %368 = fsub float %367, %366 %369 = fmul float %365, %324 %370 = fmul float %364, %325 %371 = fadd float %370, %369 %372 = fmul float %368, %15 %373 = fadd float %372, 5.000000e-01 %374 = fmul float %371, %15 %375 = fadd float %374, 5.000000e-01 %376 = fmul float %61, %23 %377 = fadd float %376, %373 %378 = fmul float %61, %24 %379 = fadd float %378, %375 %380 = fadd float %377, %29 %381 = fadd float %379, %30 %382 = fmul float %32, 0x3F91DF4720000000 %383 = call float @llvm.cos.f32(float %382) %384 = call float @llvm.sin.f32(float %382) %385 = fmul float %384, %325 %386 = fmul float %383, %324 %387 = fsub float %386, %385 %388 = fmul float %384, %324 %389 = fmul float %383, %325 %390 = fadd float %389, %388 %391 = fmul float %31, %387 %392 = fadd float %391, 5.000000e-01 %393 = fmul float %31, %390 %394 = fadd float %393, 5.000000e-01 %395 = fmul float %61, %33 %396 = fadd float %395, %392 %397 = fmul float %61, %34 %398 = fadd float %397, %394 %399 = fadd float %396, %35 %400 = fadd float %398, %36 %401 = bitcast float %117 to i32 %402 = bitcast float %118 to i32 %403 = insertelement <4 x i32> , i32 %401, i32 1 %404 = insertelement <4 x i32> %403, i32 %402, i32 2 %405 = insertelement <4 x i32> %404, i32 0, i32 3 %406 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %405, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %407 = extractelement <4 x float> %406, i32 0 %408 = extractelement <4 x float> %406, i32 1 %409 = extractelement <4 x float> %406, i32 2 %410 = extractelement <4 x float> %406, i32 3 %411 = fmul float %103, 5.000000e-01 %412 = fmul float %104, 5.000000e-01 %413 = fmul float %105, 5.000000e-01 %414 = call float @llvm.maxnum.f32(float %411, float 0x3F1A36E2E0000000) %415 = call float @llvm.maxnum.f32(float %412, float 0x3F1A36E2E0000000) %416 = call float @llvm.maxnum.f32(float %413, float 0x3F1A36E2E0000000) %417 = fmul float %110, 0x3FB3D07220000000 %418 = fmul float %111, 0x3FB3D07220000000 %419 = fmul float %112, 0x3FB3D07220000000 %420 = fmul float %110, 0x3FEE54EDE0000000 %421 = fadd float %420, 0x3FAAB12320000000 %422 = fmul float %111, 0x3FEE54EDE0000000 %423 = fadd float %422, 0x3FAAB12320000000 %424 = fmul float %112, 0x3FEE54EDE0000000 %425 = fadd float %424, 0x3FAAB12320000000 %426 = call float @llvm.pow.f32(float %421, float 0x4003333340000000) %427 = call float @llvm.pow.f32(float %423, float 0x4003333340000000) %428 = call float @llvm.pow.f32(float %425, float 0x4003333340000000) %429 = fcmp ogt float %110, 0x3FA4B5DCC0000000 %. = select i1 %429, float %426, float %417 %430 = fcmp ogt float %111, 0x3FA4B5DCC0000000 %temp56.0 = select i1 %430, float %427, float %418 %431 = fcmp ogt float %112, 0x3FA4B5DCC0000000 %.70 = select i1 %431, float %428, float %419 %432 = fmul float %., %407 %433 = fmul float %temp56.0, %408 %434 = fmul float %.70, %409 %435 = fsub float -0.000000e+00, %310 %436 = fmul float %316, 2.000000e+00 %437 = fsub float %436, %322 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %246, float %252, float %258, float %266) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %238, float %239, float %240, float %298) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %432, float %433, float %434, float %410) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %96, float %97, float %98, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %414, float %415, float %416, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %342, float %343, float %361, float %362) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %380, float %381, float %399, float %400) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %304, float %435, float %437, float %322) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v8, 0x80000000 ; 7E1002FF 80000000 v_mov_b32_e32 v9, 0xc2800000 ; 7E1202FF C2800000 v_mov_b32_e32 v10, 0x3c820821 ; 7E1402FF 3C820821 v_mov_b32_e32 v11, 0x3d558919 ; 7E1602FF 3D558919 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 v_mov_b32_e32 v12, 0x3f72a76f ; 7E1802FF 3F72A76F v_mov_b32_e32 v13, 0x3c8efa39 ; 7E1A02FF 3C8EFA39 v_mov_b32_e32 v14, 0x3e22f983 ; 7E1C02FF 3E22F983 v_mov_b32_e32 v15, 0x4019999a ; 7E1E02FF 4019999A v_mov_b32_e32 v16, 0x3d9e8391 ; 7E2002FF 3D9E8391 v_mov_b32_e32 v17, 0x3d25aee6 ; 7E2202FF 3D25AEE6 s_load_dwordx4 s[24:27], s[8:9], 0x0 ; C08C0900 s_load_dwordx4 s[28:31], s[8:9], 0x4 ; C08E0904 s_load_dwordx4 s[32:35], s[8:9], 0x8 ; C0900908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[48:51], s[8:9], 0x18 ; C0980918 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[52:55], s[2:3], 0x14 ; C09A0314 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[18:21], v0, s[24:27], 0 idxen ; E00C2000 80061200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[21:24], v0, s[28:31], 0 idxen ; E00C2000 80071500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[23:26], v0, s[32:35], 0 idxen ; E00C2000 80081700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[25:28], v0, s[36:39], 0 idxen ; E00C2000 80091900 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[28:31], v0, s[40:43], 0 idxen ; E00C2000 800A1C00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[31:34], v0, s[44:47], 0 idxen ; E00C2000 800B1F00 buffer_load_format_xyzw v[4:7], v3, s[48:51], 0 idxen ; E00C2000 800C0403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_buffer_load_dword s26, s[52:55], 0x0 ; C20D3500 s_buffer_load_dword s25, s[52:55], 0x1 ; C20CB501 s_buffer_load_dword s24, s[52:55], 0x2 ; C20C3502 s_buffer_load_dword s28, s[8:11], 0x4c ; C20E094C s_buffer_load_dword s30, s[8:11], 0x4d ; C20F094D s_buffer_load_dword s29, s[8:11], 0x4e ; C20E894E s_buffer_load_dword s27, s[8:11], 0x50 ; C20D8950 image_sample_l v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[16:23], s[12:15] ; F0900F00 00642204 v_mov_b32_e32 v3, 0x10001 ; 7E0602FF 00010001 image_sample_l_o v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[16:23], s[12:15] ; F0D00F00 00642603 v_mov_b32_e32 v3, 0x20002 ; 7E0602FF 00020002 image_sample_l_o v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[16:23], s[12:15] ; F0D00F00 00642A03 s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v0, v35, v19 ; 10002723 v_mad_f32 v3, v2, v21, v1 ; D2820003 04062B02 v_mac_f32_e32 v1, v2, v22 ; 3E022D02 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v8, -1.0, vcc ; D2000002 01A9E708 v_cmp_gt_f32_e64 s[4:5], v31, v17 ; D0080004 0002231F v_cmp_gt_f32_e64 s[0:1], v32, v17 ; D0080000 00022320 v_cmp_gt_f32_e32 vcc, v33, v17 ; 7C082321 v_mad_f32 v7, v12, v31, v11 ; D2820007 042E3F0C v_mad_f32 v8, v12, v32, v11 ; D2820008 042E410C v_mac_f32_e32 v11, v12, v33 ; 3E16430C v_mul_f32_e32 v12, v16, v31 ; 10183F10 v_mul_f32_e32 v17, v16, v32 ; 10224110 v_mul_f32_e32 v16, v16, v33 ; 10204310 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_cmp_gt_f32_e64 s[6:7], 0, v3 ; D0080006 00020680 v_cndmask_b32_e64 v2, 0, 1.0, s[6:7] ; D2000002 0019E480 v_sub_f32_e64 v3, |v3|, v2 ; D2080103 00020503 v_add_f32_e32 v3, v9, v3 ; 06060709 v_add_f32_e32 v1, v9, v1 ; 06020309 v_cmp_gt_f32_e64 s[6:7], 0, v3 ; D0080006 00020680 v_cndmask_b32_e64 v9, 0, 1.0, s[6:7] ; D2000009 0019E480 v_sub_f32_e64 v3, |v3|, v9 ; D2080103 00021303 v_cmp_gt_f32_e64 s[6:7], 0, v1 ; D0080006 00020280 v_cndmask_b32_e64 v21, 0, 1.0, s[6:7] ; D2000015 0019E480 v_sub_f32_e64 v1, |v1|, v21 ; D2080101 00022B01 v_mul_f32_e32 v22, v10, v3 ; 102C070A v_mad_f32 v3, -v3, v10, 1.0 ; D2820003 23CA1503 v_mad_f32 v3, -v1, v10, v3 ; D2820003 240E1501 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mac_f32_e32 v0, v34, v18 ; 3E002522 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v10, v39, v19 ; 10142727 v_mac_f32_e32 v10, v38, v18 ; 3E142526 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v19, v43, v19 ; 1026272B v_mac_f32_e32 v19, v42, v18 ; 3E26252A v_mul_f32_e32 v18, v22, v22 ; 10242D16 v_mac_f32_e32 v18, v1, v1 ; 3E240301 v_mac_f32_e32 v18, v3, v3 ; 3E240703 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_mac_f32_e32 v0, v36, v20 ; 3E002924 v_mac_f32_e32 v10, v40, v20 ; 3E142928 v_mac_f32_e32 v19, v44, v20 ; 3E26292C v_mul_f32_e32 v20, v18, v22 ; 10282D12 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mul_f32_e32 v3, v18, v3 ; 10060712 v_mad_f32 v9, -2.0, v9, 1.0 ; D2820009 03CA12F5 v_mul_f32_e32 v9, v9, v20 ; 10122909 v_mad_f32 v18, -2.0, v21, 1.0 ; D2820012 03CA2AF5 v_mul_f32_e32 v1, v18, v1 ; 10020312 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v35, v1 ; 10060323 v_mul_f32_e32 v18, v39, v1 ; 10240327 v_mul_f32_e32 v1, v43, v1 ; 1002032B v_mac_f32_e32 v3, v34, v9 ; 3E061322 v_mac_f32_e32 v18, v38, v9 ; 3E241326 v_mac_f32_e32 v1, v42, v9 ; 3E02132A v_mac_f32_e32 v3, v36, v2 ; 3E060524 v_mac_f32_e32 v18, v40, v2 ; 3E240528 v_mac_f32_e32 v1, v44, v2 ; 3E02052C v_mac_f32_e32 v3, 0, v37 ; 3E064A80 v_mac_f32_e32 v18, 0, v41 ; 3E245280 v_mac_f32_e32 v1, 0, v45 ; 3E025A80 v_mul_f32_e32 v2, v3, v3 ; 10040703 v_mac_f32_e32 v2, v18, v18 ; 3E042512 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_add_f32_e32 v0, v37, v0 ; 06000125 v_add_f32_e32 v9, v41, v10 ; 06121529 v_add_f32_e32 v10, v45, v19 ; 0614272D v_mul_f32_e32 v19, v2, v3 ; 10260702 v_mul_f32_e32 v18, v2, v18 ; 10242502 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_subrev_f32_e32 v2, s28, v0 ; 0A04001C v_subrev_f32_e32 v3, s30, v9 ; 0A06121E v_mul_f32_e32 v20, v2, v2 ; 10280502 v_mac_f32_e32 v20, v3, v3 ; 3E280703 v_subrev_f32_e32 v21, s29, v10 ; 0A2A141D v_mac_f32_e32 v20, v21, v21 ; 3E282B15 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v22, s26, v19 ; 102C261A v_mac_f32_e32 v22, s25, v18 ; 3E2C2419 v_mac_f32_e32 v22, s24, v1 ; 3E2C0218 v_mul_f32_e32 v31, v19, v22 ; 103E2D13 v_mad_f32 v31, -2.0, v31, s26 ; D282001F 006A3EF5 v_mul_f32_e32 v32, v20, v2 ; 10400514 v_mul_f32_e32 v31, v31, v32 ; 103E411F v_mul_f32_e32 v32, v18, v22 ; 10402D12 v_mad_f32 v32, -2.0, v32, s25 ; D2820020 006640F5 s_buffer_load_dword s6, s[8:11], 0x51 ; C2030951 s_buffer_load_dword s7, s[8:11], 0x52 ; C2038952 v_mul_f32_e32 v33, v20, v3 ; 10420714 v_mad_f32 v31, -v33, v32, -v31 ; D282001F A47E4121 v_mul_f32_e32 v22, v1, v22 ; 102C2D01 v_mad_f32 v22, -2.0, v22, s24 ; D2820016 00622CF5 v_mul_f32_e32 v20, v20, v21 ; 10282B14 v_mad_f32 v20, -v20, v22, v31 ; D2820014 247E2D14 v_mul_f32_e32 v2, s27, v2 ; 1004041B s_buffer_load_dword s24, s[8:11], 0x57 ; C20C0957 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v2, s6, v3 ; 3E040606 v_mac_f32_e32 v2, s7, v21 ; 3E042A07 v_mov_b32_e32 v3, 0x30003 ; 7E0602FF 00030003 image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[16:23], s[12:15] ; F0D00F00 00641F03 exp 15, 32, 0, 0, 0, v6, v6, v6, v6 ; F800020F 06060606 exp 15, 33, 0, 0, 0, v0, v9, v10, v2 ; F800021F 020A0900 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_add_f32_e64 v2, 0, v20 clamp ; D2060802 00022880 v_log_f32_e32 v2, v2 ; 7E044F02 v_log_f32_e32 v3, v7 ; 7E064F07 v_log_f32_e32 v4, v8 ; 7E084F08 v_log_f32_e32 v5, v11 ; 7E0A4F0B v_mul_legacy_f32_e32 v2, 0x41800000, v2 ; 0E0404FF 41800000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 exp 15, 34, 0, 0, 0, v19, v18, v1, v2 ; F800022F 02011213 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v1, v15, v3 ; 0E02070F v_mul_legacy_f32_e32 v2, v15, v4 ; 0E04090F v_mul_legacy_f32_e32 v3, v15, v5 ; 0E060B0F v_exp_f32_e32 v1, v1 ; 7E024B01 v_cndmask_b32_e64 v1, v12, v1, s[4:5] ; D2000001 0012030C v_exp_f32_e32 v2, v2 ; 7E044B02 v_cndmask_b32_e64 v2, v17, v2, s[0:1] ; D2000002 00020511 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_exp_f32_e32 v3, v3 ; 7E064B03 v_cndmask_b32_e32 v3, v16, v3 ; 00060710 v_mul_f32_e32 v1, v31, v1 ; 1002031F v_mul_f32_e32 v2, v32, v2 ; 10040520 v_mul_f32_e32 v3, v33, v3 ; 10060721 exp 15, 35, 0, 0, 0, v1, v2, v3, v34 ; F800023F 22030201 exp 15, 36, 0, 0, 0, v25, v26, v27, v6 ; F800024F 061B1A19 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v1, -0.5, v23 ; 06022EF1 v_add_f32_e32 v2, -0.5, v24 ; 060430F1 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 v_mul_f32_e32 v3, 0.5, v28 ; 100638F0 v_mul_f32_e32 v4, 0.5, v29 ; 10083AF0 v_mul_f32_e32 v5, 0.5, v30 ; 100A3CF0 v_mov_b32_e32 v7, 0x38d1b717 ; 7E0E02FF 38D1B717 v_max_f32_e32 v3, v7, v3 ; 20060707 v_max_f32_e32 v4, v7, v4 ; 20080907 v_max_f32_e32 v5, v7, v5 ; 200A0B07 exp 15, 37, 0, 0, 0, v3, v4, v5, v6 ; F800025F 06050403 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v13 ; 10061A04 v_mul_f32_e32 v3, v14, v3 ; 1006070E v_fract_f32_e32 v3, v3 ; 7E064103 v_cos_f32_e32 v4, v3 ; 7E086D03 v_sin_f32_e32 v3, v3 ; 7E066B03 v_mul_f32_e32 v5, v2, v3 ; 100A0702 v_mad_f32 v5, v4, v1, -v5 ; D2820005 84160304 v_mul_f32_e32 v4, v2, v4 ; 10080902 v_mac_f32_e32 v4, v1, v3 ; 3E080701 v_mul_f32_e32 v3, s5, v13 ; 10061A05 v_mul_f32_e32 v3, v14, v3 ; 1006070E v_fract_f32_e32 v3, v3 ; 7E064103 v_cos_f32_e32 v7, v3 ; 7E0E6D03 v_sin_f32_e32 v3, v3 ; 7E066B03 v_mul_f32_e32 v8, v2, v3 ; 10100702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mad_f32 v8, v7, v1, -v8 ; D2820008 84220307 v_mac_f32_e32 v3, v2, v7 ; 3E060F02 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0xf ; C207010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s4, v5, 0.5 ; D2820005 03C20A04 v_mad_f32 v4, s4, v4, 0.5 ; D2820004 03C20804 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 v_mad_f32 v7, v8, s5, 0.5 ; D2820007 03C00B08 v_mad_f32 v3, v3, s5, 0.5 ; D2820003 03C00B03 v_mov_b32_e32 v8, s7 ; 7E100207 v_mac_f32_e32 v5, s24, v8 ; 3E0A1018 v_mov_b32_e32 v8, s12 ; 7E10020C v_mac_f32_e32 v4, s24, v8 ; 3E081018 v_mov_b32_e32 v8, s13 ; 7E10020D v_mac_f32_e32 v7, s24, v8 ; 3E0E1018 v_mov_b32_e32 v8, s14 ; 7E10020E s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 v_mac_f32_e32 v3, s24, v8 ; 3E061018 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_add_f32_e32 v4, s15, v4 ; 0608080F v_add_f32_e32 v7, s16, v7 ; 060E0E10 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s12, s[0:3], 0x17 ; C2060117 s_buffer_load_dword s13, s[0:3], 0x18 ; C2068118 s_buffer_load_dword s14, s[0:3], 0x19 ; C2070119 v_add_f32_e32 v3, s5, v3 ; 06060605 exp 15, 38, 0, 0, 0, v5, v4, v7, v3 ; F800026F 03070405 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111 s_buffer_load_dword s16, s[0:3], 0x1a ; C208011A s_buffer_load_dword s17, s[0:3], 0x1b ; C208811B s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v3, s4, v13 ; 10061A04 v_mul_f32_e32 v4, s14, v13 ; 10081A0E v_mul_f32_e32 v3, v14, v3 ; 1006070E v_mul_f32_e32 v4, v14, v4 ; 1008090E v_fract_f32_e32 v3, v3 ; 7E064103 v_cos_f32_e32 v5, v3 ; 7E0A6D03 v_sin_f32_e32 v3, v3 ; 7E066B03 v_mul_f32_e32 v7, v2, v3 ; 100E0702 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mad_f32 v7, v5, v1, -v7 ; D2820007 841E0305 v_mac_f32_e32 v3, v2, v5 ; 3E060B02 v_fract_f32_e32 v4, v4 ; 7E084104 v_cos_f32_e32 v5, v4 ; 7E0A6D04 v_sin_f32_e32 v4, v4 ; 7E086B04 v_mul_f32_e32 v8, v2, v4 ; 10100902 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mad_f32 v1, v5, v1, -v8 ; D2820001 84220305 v_mac_f32_e32 v4, v2, v5 ; 3E080B02 v_mad_f32 v2, v7, s6, 0.5 ; D2820002 03C00D07 v_mad_f32 v3, v3, s6, 0.5 ; D2820003 03C00D03 v_mad_f32 v1, s13, v1, 0.5 ; D2820001 03C2020D v_mad_f32 v4, s13, v4, 0.5 ; D2820004 03C2080D v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mac_f32_e32 v2, s24, v5 ; 3E040A18 v_mov_b32_e32 v5, s15 ; 7E0A020F v_mac_f32_e32 v3, s24, v5 ; 3E060A18 v_mov_b32_e32 v5, s16 ; 7E0A0210 v_mac_f32_e32 v1, s24, v5 ; 3E020A18 v_mov_b32_e32 v5, s17 ; 7E0A0211 v_mac_f32_e32 v4, s24, v5 ; 3E080A18 v_add_f32_e32 v2, s7, v2 ; 06040407 v_add_f32_e32 v3, s12, v3 ; 0606060C v_add_f32_e32 v1, s18, v1 ; 06020212 v_add_f32_e32 v4, s0, v4 ; 06080800 exp 15, 39, 0, 0, 0, v2, v3, v1, v4 ; F800027F 04010302 s_buffer_load_dword s0, s[8:11], 0xf ; C200090F s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903 s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906 s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909 s_buffer_load_dword s15, s[8:11], 0xa ; C207890A s_buffer_load_dword s16, s[8:11], 0xb ; C208090B s_buffer_load_dword s17, s[8:11], 0xc ; C208890C s_buffer_load_dword s18, s[8:11], 0xd ; C209090D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s5, v9 ; 10021205 v_mul_f32_e32 v2, s6, v9 ; 10041206 v_mul_f32_e32 v3, s7, v9 ; 10061207 v_mul_f32_e32 v4, s12, v9 ; 1008120C v_mac_f32_e32 v1, s1, v0 ; 3E020001 v_mac_f32_e32 v2, s2, v0 ; 3E040002 v_mac_f32_e32 v3, s3, v0 ; 3E060003 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_mac_f32_e32 v1, s13, v10 ; 3E02140D v_mac_f32_e32 v2, s14, v10 ; 3E04140E v_mac_f32_e32 v3, s15, v10 ; 3E06140F v_mac_f32_e32 v4, s16, v10 ; 3E081410 v_add_f32_e32 v0, s17, v1 ; 06000211 v_add_f32_e32 v1, s18, v2 ; 06020412 v_add_f32_e32 v2, s8, v3 ; 06040608 v_add_f32_e32 v3, s0, v4 ; 06060800 v_xor_b32_e32 v1, 0x80000000, v1 ; 3A0202FF 80000000 v_mad_f32 v2, 2.0, v2, -v3 ; D2820002 840E04F4 exp 15, 12, 0, 0, 0, v0, v1, v2, v3 ; F80000CF 03020100 exp 15, 13, 0, 1, 0, v6, v6, v6, v6 ; F80008DF 06060606 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 48 Code Size: 1664 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SVIEW[0], SHADOW2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL CONST[1][0..24] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 3.0000, 2.0000} IMM[1] UINT32 {0, 228, 3, 304} IMM[2] UINT32 {4, 200, 196, 192} IMM[3] UINT32 {204, 216, 212, 208} IMM[4] UINT32 {220, 232, 224, 236} IMM[5] UINT32 {248, 244, 240, 252} IMM[6] UINT32 {256, 76, 80, 92} IMM[7] UINT32 {96, 32, 44, 48} IMM[8] FLT32 { 0.2060, 0.0749, 0.1236, 0.2125} IMM[9] UINT32 {64, 16, 448, 468} IMM[10] FLT32 { 0.2125, 0.7154, 0.0721, 0.0000} IMM[11] UINT32 {464, 460, 472, 0} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[5].zwww 4: TEX TEMP[1], TEMP[1], SAMP[2], 2D 5: MOV TEMP[2].xy, IN[6].xyyy 6: TEX TEMP[2], TEMP[2], SAMP[3], 2D 7: MOV TEMP[3].xy, IN[6].zwww 8: TEX TEMP[3], TEMP[3], SAMP[4], 2D 9: ADD TEMP[4].x, TEMP[1].wwww, -IN[4].xxxx 10: MAX TEMP[4].x, IMM[0].xxxx, TEMP[4].xxxx 11: ADD TEMP[5].x, IN[3].xxxx, -TEMP[4].xxxx 12: ADD TEMP[6].x, IN[4].xxxx, TEMP[1].wwww 13: MIN TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 14: ADD TEMP[4].x, TEMP[6].xxxx, -TEMP[4].xxxx 15: RCP TEMP[4].x, TEMP[4].xxxx 16: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 17: MOV_SAT TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx 19: ADD TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx 20: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx 21: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 22: ADD TEMP[5].x, IMM[0].yyyy, -TEMP[4].xxxx 23: ADD TEMP[6].x, TEMP[2].wwww, -IN[4].yyyy 24: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx 25: ADD TEMP[7].x, IN[3].yyyy, -TEMP[6].xxxx 26: ADD TEMP[8].x, IN[4].yyyy, TEMP[2].wwww 27: MIN TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx 28: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx 29: RCP TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 31: MOV_SAT TEMP[6].x, TEMP[6].xxxx 32: MUL TEMP[7].x, IMM[0].wwww, TEMP[6].xxxx 33: ADD TEMP[7].x, IMM[0].zzzz, -TEMP[7].xxxx 34: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 35: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 36: MAX TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 37: MIN TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx 38: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[6].xxxx 39: ADD TEMP[7].x, TEMP[3].wwww, -IN[4].zzzz 40: MAX TEMP[7].x, IMM[0].xxxx, TEMP[7].xxxx 41: ADD TEMP[8].x, IN[3].zzzz, -TEMP[7].xxxx 42: ADD TEMP[9].x, IN[4].zzzz, TEMP[3].wwww 43: MIN TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 44: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 45: RCP TEMP[7].x, TEMP[7].xxxx 46: MUL TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx 47: MOV_SAT TEMP[7].x, TEMP[7].xxxx 48: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 49: ADD TEMP[8].x, IMM[0].zzzz, -TEMP[8].xxxx 50: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[8].xxxx 51: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 52: MAX TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx 53: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 54: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[7].xxxx 55: MOV TEMP[8].xy, IN[6].zwww 56: TEX TEMP[8].xyz, TEMP[8], SAMP[8], 2D 57: MOV TEMP[9].xy, IN[6].xyyy 58: TEX TEMP[9].xyz, TEMP[9], SAMP[7], 2D 59: MOV TEMP[10].xy, IN[5].zwww 60: TEX TEMP[10].xyz, TEMP[10], SAMP[6], 2D 61: MOV TEMP[11].xy, IN[5].xyyy 62: TEX TEMP[11].xyz, TEMP[11], SAMP[5], 2D 63: MUL TEMP[11].xyz, TEMP[5].xxxx, TEMP[11].xyzz 64: MAD TEMP[10].xyz, TEMP[4].xxxx, TEMP[10].xyzz, TEMP[11].xyzz 65: MAD TEMP[9].xyz, TEMP[6].xxxx, TEMP[9].xyzz, TEMP[10].xyzz 66: MAD TEMP[8].xz, TEMP[7].xxxx, TEMP[8].xyzz, TEMP[9].xyzz 67: MUL TEMP[9].x, TEMP[8].zzzz, CONST[1][14].yyyy 68: MOV TEMP[10].xyz, -CONST[4][19].xyzx 69: ADD TEMP[11].xyz, IN[0].xyzz, TEMP[10].xyzz 70: DP3 TEMP[12].x, TEMP[11].xyzz, TEMP[11].xyzz 71: RSQ TEMP[12].x, TEMP[12].xxxx 72: MUL TEMP[11].xyz, TEMP[11].xyzz, TEMP[12].xxxx 73: MOV TEMP[11].xyz, -TEMP[11].xyzx 74: MUL TEMP[12].x, IN[0].xxxx, CONST[5][12].xxxx 75: MAD TEMP[12].x, IN[0].yyyy, CONST[5][12].yyyy, TEMP[12].xxxx 76: MAD TEMP[12].x, IN[0].zzzz, CONST[5][12].zzzz, TEMP[12].xxxx 77: ADD TEMP[12].x, TEMP[12].xxxx, CONST[5][12].wwww 78: MUL TEMP[13].x, IN[0].xxxx, CONST[5][13].xxxx 79: MAD TEMP[13].x, IN[0].yyyy, CONST[5][13].yyyy, TEMP[13].xxxx 80: MAD TEMP[13].x, IN[0].zzzz, CONST[5][13].zzzz, TEMP[13].xxxx 81: ADD TEMP[13].x, TEMP[13].xxxx, CONST[5][13].wwww 82: MOV TEMP[12].y, TEMP[13].xxxx 83: MUL TEMP[13].x, IN[0].xxxx, CONST[5][14].xxxx 84: MAD TEMP[13].x, IN[0].yyyy, CONST[5][14].yyyy, TEMP[13].xxxx 85: MAD TEMP[13].x, IN[0].zzzz, CONST[5][14].zzzz, TEMP[13].xxxx 86: ADD TEMP[13].x, TEMP[13].xxxx, CONST[5][14].wwww 87: MOV TEMP[12].z, TEMP[13].xxxx 88: MUL TEMP[13].x, IN[0].xxxx, CONST[5][15].xxxx 89: MAD TEMP[13].x, IN[0].yyyy, CONST[5][15].yyyy, TEMP[13].xxxx 90: MAD TEMP[13].x, IN[0].zzzz, CONST[5][15].zzzz, TEMP[13].xxxx 91: ADD TEMP[13].x, TEMP[13].xxxx, CONST[5][15].wwww 92: RCP TEMP[13].xyz, TEMP[13].xxxx 93: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[13].xyzz 94: MOV_SAT TEMP[13].x, -TEMP[12].zzzz 95: MOV TEMP[14].x, -CONST[5][16].xxxx 96: MOV TEMP[15].x, TEMP[14].xxxx 97: MOV TEMP[15].y, CONST[5][16].xxxx 98: MOV TEMP[16].x, CONST[5][16].xxxx 99: MOV TEMP[16].y, TEMP[14].xxxx 100: ADD TEMP[17].xy, TEMP[12].xyyy, CONST[5][16].xxxx 101: MOV TEMP[17].xy, TEMP[17].xyyy 102: MOV TEMP[17].z, TEMP[13].xxxx 103: MOV TEMP[17].w, IMM[0].xxxx 104: TXL TEMP[17].x, TEMP[17], SAMP[0], SHADOW2D 105: MOV TEMP[17].x, TEMP[17].xxxx 106: ADD TEMP[15].xy, TEMP[15].xyyy, TEMP[12].xyyy 107: MOV TEMP[15].xy, TEMP[15].xyyy 108: MOV TEMP[15].z, TEMP[13].xxxx 109: MOV TEMP[15].w, IMM[0].xxxx 110: TXL TEMP[15].x, TEMP[15], SAMP[0], SHADOW2D 111: MOV TEMP[17].y, TEMP[15].xxxx 112: ADD TEMP[15].xy, TEMP[12].xyyy, TEMP[16].xyyy 113: MOV TEMP[15].xy, TEMP[15].xyyy 114: MOV TEMP[15].z, TEMP[13].xxxx 115: MOV TEMP[15].w, IMM[0].xxxx 116: TXL TEMP[15].x, TEMP[15], SAMP[0], SHADOW2D 117: MOV TEMP[17].z, TEMP[15].xxxx 118: ADD TEMP[15].xy, TEMP[12].xyyy, TEMP[14].xxxx 119: MOV TEMP[15].xy, TEMP[15].xyyy 120: MOV TEMP[15].z, TEMP[13].xxxx 121: MOV TEMP[15].w, IMM[0].xxxx 122: TXL TEMP[15].x, TEMP[15], SAMP[0], SHADOW2D 123: MOV TEMP[17].w, TEMP[15].xxxx 124: MOV TEMP[15].y, IMM[0].xxxx 125: MOV TEMP[15].x, CONST[5][16].xxxx 126: MOV TEMP[16].y, IMM[0].xxxx 127: MOV TEMP[16].x, TEMP[14].xxxx 128: MOV TEMP[18].x, IMM[0].xxxx 129: MOV TEMP[18].y, TEMP[14].xxxx 130: MOV TEMP[14].x, IMM[0].xxxx 131: MOV TEMP[14].y, CONST[5][16].xxxx 132: ADD TEMP[15].xy, TEMP[15].xyyy, TEMP[12].xyyy 133: MOV TEMP[15].xy, TEMP[15].xyyy 134: MOV TEMP[15].z, TEMP[13].xxxx 135: MOV TEMP[15].w, IMM[0].xxxx 136: TXL TEMP[15].x, TEMP[15], SAMP[0], SHADOW2D 137: MOV TEMP[15].x, TEMP[15].xxxx 138: ADD TEMP[16].xy, TEMP[16].xyyy, TEMP[12].xyyy 139: MOV TEMP[16].xy, TEMP[16].xyyy 140: MOV TEMP[16].z, TEMP[13].xxxx 141: MOV TEMP[16].w, IMM[0].xxxx 142: TXL TEMP[16].x, TEMP[16], SAMP[0], SHADOW2D 143: MOV TEMP[15].y, TEMP[16].xxxx 144: ADD TEMP[16].xy, TEMP[18].xyyy, TEMP[12].xyyy 145: MOV TEMP[16].xy, TEMP[16].xyyy 146: MOV TEMP[16].z, TEMP[13].xxxx 147: MOV TEMP[16].w, IMM[0].xxxx 148: TXL TEMP[16].x, TEMP[16], SAMP[0], SHADOW2D 149: MOV TEMP[15].z, TEMP[16].xxxx 150: ADD TEMP[14].xy, TEMP[14].xyyy, TEMP[12].xyyy 151: MOV TEMP[14].xy, TEMP[14].xyyy 152: MOV TEMP[14].z, TEMP[13].xxxx 153: MOV TEMP[14].w, IMM[0].xxxx 154: TXL TEMP[14].x, TEMP[14], SAMP[0], SHADOW2D 155: MOV TEMP[15].w, TEMP[14].xxxx 156: MUL TEMP[14].xyz, CONST[5][4].wwww, CONST[5][5].xyzz 157: MUL TEMP[16].xyz, CONST[5][5].wwww, CONST[5][6].xyzz 158: MOV TEMP[12].xy, TEMP[12].xyyy 159: MOV TEMP[12].z, TEMP[13].xxxx 160: MOV TEMP[12].w, IMM[0].xxxx 161: TXL TEMP[12].x, TEMP[12], SAMP[0], SHADOW2D 162: DP4 TEMP[13].x, TEMP[17], IMM[8].yyyy 163: DP4 TEMP[15].x, TEMP[15], IMM[8].zzzz 164: ADD TEMP[13].x, TEMP[13].xxxx, TEMP[15].xxxx 165: MAD TEMP[12].x, TEMP[12].xxxx, IMM[8].xxxx, TEMP[13].xxxx 166: DP3 TEMP[13].x, -CONST[5][0].xyzz, TEMP[0].xyzz 167: MOV_SAT TEMP[13].x, TEMP[13].xxxx 168: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 169: ADD TEMP[10].xyz, IN[0].xyzz, TEMP[10].xyzz 170: MOV TEMP[13].w, IMM[0].yyyy 171: MUL TEMP[8].x, TEMP[8].xxxx, CONST[1][14].xxxx 172: DP3 TEMP[15].x, TEMP[0].xyzz, TEMP[11].xyzz 173: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 174: MUL TEMP[15].xyz, IMM[0].wwww, TEMP[15].xyzz 175: ADD TEMP[11].xyz, TEMP[11].xyzz, -TEMP[15].xyzz 176: DP3 TEMP[11].x, CONST[5][2].xyzz, TEMP[11].xyzz 177: MOV_SAT TEMP[11].x, TEMP[11].xxxx 178: POW TEMP[11].x, TEMP[11].xxxx, CONST[5][2].wwww 179: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[11].xxxx 180: MUL TEMP[3].xyz, CONST[1][4].xyzz, TEMP[3].xyzz 181: MUL TEMP[2].xyz, CONST[1][3].xyzz, TEMP[2].xyzz 182: MUL TEMP[1].xyz, CONST[1][2].xyzz, TEMP[1].xyzz 183: MOV TEMP[11].xy, IN[5].xyyy 184: TEX TEMP[11].xyz, TEMP[11], SAMP[1], 2D 185: MUL TEMP[11].xyz, CONST[1][1].xyzz, TEMP[11].xyzz 186: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[11].xyzz 187: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, TEMP[5].xyzz 188: MAD TEMP[1].xyz, TEMP[6].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 189: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[7].xxxx, TEMP[1].xyzz 190: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[2].xyzz 191: MAD TEMP[1].xyz, TEMP[8].xxxx, CONST[5][3].xyzz, TEMP[1].xyzz 192: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[12].xxxx 193: DP3 TEMP[3].x, TEMP[16].xyzz, IMM[10].xyzz 194: LRP TEMP[3].xyz, TEMP[9].xxxx, TEMP[3].xxxx, TEMP[16].xyzz 195: DP3 TEMP[0].x, CONST[5][4].xyzz, TEMP[0].xyzz 196: MOV_SAT TEMP[0].x, TEMP[0].xxxx 197: DP3 TEMP[4].x, TEMP[14].xyzz, IMM[10].xyzz 198: LRP TEMP[4].xyz, TEMP[9].xxxx, TEMP[4].xxxx, TEMP[14].xyzz 199: MUL TEMP[5].xyz, TEMP[12].xxxx, CONST[5][1].xyzz 200: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz, TEMP[5].xyzz 201: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[0].xyzz 202: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 203: DP3 TEMP[1].x, TEMP[10].xyzz, TEMP[10].xyzz 204: SQRT TEMP[1].x, TEMP[1].xxxx 205: MAD TEMP[1].x, TEMP[1].xxxx, CONST[4][29].xxxx, CONST[4][28].wwww 206: MOV_SAT TEMP[1].x, TEMP[1].xxxx 207: POW TEMP[1].x, TEMP[1].xxxx, CONST[4][29].zzzz 208: MIN TEMP[1].x, CONST[4][29].yyyy, TEMP[1].xxxx 209: MOV_SAT TEMP[1].x, TEMP[1].xxxx 210: LRP TEMP[13].xyz, TEMP[1].xxxx, CONST[4][28].xyzz, TEMP[0].xyzz 211: MOV OUT[1], IN[0].wwww 212: MOV OUT[0], TEMP[13] 213: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %38 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.load.const(<16 x i8> %39, i32 304) %41 = call float @llvm.SI.load.const(<16 x i8> %39, i32 308) %42 = call float @llvm.SI.load.const(<16 x i8> %39, i32 312) %43 = call float @llvm.SI.load.const(<16 x i8> %39, i32 448) %44 = call float @llvm.SI.load.const(<16 x i8> %39, i32 452) %45 = call float @llvm.SI.load.const(<16 x i8> %39, i32 456) %46 = call float @llvm.SI.load.const(<16 x i8> %39, i32 460) %47 = call float @llvm.SI.load.const(<16 x i8> %39, i32 464) %48 = call float @llvm.SI.load.const(<16 x i8> %39, i32 468) %49 = call float @llvm.SI.load.const(<16 x i8> %39, i32 472) %50 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = call float @llvm.SI.load.const(<16 x i8> %51, i32 0) %53 = call float @llvm.SI.load.const(<16 x i8> %51, i32 4) %54 = call float @llvm.SI.load.const(<16 x i8> %51, i32 8) %55 = call float @llvm.SI.load.const(<16 x i8> %51, i32 16) %56 = call float @llvm.SI.load.const(<16 x i8> %51, i32 20) %57 = call float @llvm.SI.load.const(<16 x i8> %51, i32 24) %58 = call float @llvm.SI.load.const(<16 x i8> %51, i32 32) %59 = call float @llvm.SI.load.const(<16 x i8> %51, i32 36) %60 = call float @llvm.SI.load.const(<16 x i8> %51, i32 40) %61 = call float @llvm.SI.load.const(<16 x i8> %51, i32 44) %62 = call float @llvm.SI.load.const(<16 x i8> %51, i32 48) %63 = call float @llvm.SI.load.const(<16 x i8> %51, i32 52) %64 = call float @llvm.SI.load.const(<16 x i8> %51, i32 56) %65 = call float @llvm.SI.load.const(<16 x i8> %51, i32 64) %66 = call float @llvm.SI.load.const(<16 x i8> %51, i32 68) %67 = call float @llvm.SI.load.const(<16 x i8> %51, i32 72) %68 = call float @llvm.SI.load.const(<16 x i8> %51, i32 76) %69 = call float @llvm.SI.load.const(<16 x i8> %51, i32 80) %70 = call float @llvm.SI.load.const(<16 x i8> %51, i32 84) %71 = call float @llvm.SI.load.const(<16 x i8> %51, i32 88) %72 = call float @llvm.SI.load.const(<16 x i8> %51, i32 92) %73 = call float @llvm.SI.load.const(<16 x i8> %51, i32 96) %74 = call float @llvm.SI.load.const(<16 x i8> %51, i32 100) %75 = call float @llvm.SI.load.const(<16 x i8> %51, i32 104) %76 = call float @llvm.SI.load.const(<16 x i8> %51, i32 192) %77 = call float @llvm.SI.load.const(<16 x i8> %51, i32 196) %78 = call float @llvm.SI.load.const(<16 x i8> %51, i32 200) %79 = call float @llvm.SI.load.const(<16 x i8> %51, i32 204) %80 = call float @llvm.SI.load.const(<16 x i8> %51, i32 208) %81 = call float @llvm.SI.load.const(<16 x i8> %51, i32 212) %82 = call float @llvm.SI.load.const(<16 x i8> %51, i32 216) %83 = call float @llvm.SI.load.const(<16 x i8> %51, i32 220) %84 = call float @llvm.SI.load.const(<16 x i8> %51, i32 224) %85 = call float @llvm.SI.load.const(<16 x i8> %51, i32 228) %86 = call float @llvm.SI.load.const(<16 x i8> %51, i32 232) %87 = call float @llvm.SI.load.const(<16 x i8> %51, i32 236) %88 = call float @llvm.SI.load.const(<16 x i8> %51, i32 240) %89 = call float @llvm.SI.load.const(<16 x i8> %51, i32 244) %90 = call float @llvm.SI.load.const(<16 x i8> %51, i32 248) %91 = call float @llvm.SI.load.const(<16 x i8> %51, i32 252) %92 = call float @llvm.SI.load.const(<16 x i8> %51, i32 256) %93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %94 = load <8 x i32>, <8 x i32> addrspace(2)* %93, align 32, !tbaa !0 %95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %96 = load <4 x i32>, <4 x i32> addrspace(2)* %95, align 16, !tbaa !0 %97 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %98 = bitcast <8 x i32> addrspace(2)* %97 to <32 x i8> addrspace(2)* %99 = load <32 x i8>, <32 x i8> addrspace(2)* %98, align 32, !tbaa !0 %100 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %101 = bitcast <4 x i32> addrspace(2)* %100 to <16 x i8> addrspace(2)* %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %104 = bitcast <8 x i32> addrspace(2)* %103 to <32 x i8> addrspace(2)* %105 = load <32 x i8>, <32 x i8> addrspace(2)* %104, align 32, !tbaa !0 %106 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %107 = bitcast <4 x i32> addrspace(2)* %106 to <16 x i8> addrspace(2)* %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %110 = bitcast <8 x i32> addrspace(2)* %109 to <32 x i8> addrspace(2)* %111 = load <32 x i8>, <32 x i8> addrspace(2)* %110, align 32, !tbaa !0 %112 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %113 = bitcast <4 x i32> addrspace(2)* %112 to <16 x i8> addrspace(2)* %114 = load <16 x i8>, <16 x i8> addrspace(2)* %113, align 16, !tbaa !0 %115 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %116 = bitcast <8 x i32> addrspace(2)* %115 to <32 x i8> addrspace(2)* %117 = load <32 x i8>, <32 x i8> addrspace(2)* %116, align 32, !tbaa !0 %118 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %119 = bitcast <4 x i32> addrspace(2)* %118 to <16 x i8> addrspace(2)* %120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0 %121 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %122 = bitcast <8 x i32> addrspace(2)* %121 to <32 x i8> addrspace(2)* %123 = load <32 x i8>, <32 x i8> addrspace(2)* %122, align 32, !tbaa !0 %124 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %125 = bitcast <4 x i32> addrspace(2)* %124 to <16 x i8> addrspace(2)* %126 = load <16 x i8>, <16 x i8> addrspace(2)* %125, align 16, !tbaa !0 %127 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %128 = bitcast <8 x i32> addrspace(2)* %127 to <32 x i8> addrspace(2)* %129 = load <32 x i8>, <32 x i8> addrspace(2)* %128, align 32, !tbaa !0 %130 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %131 = bitcast <4 x i32> addrspace(2)* %130 to <16 x i8> addrspace(2)* %132 = load <16 x i8>, <16 x i8> addrspace(2)* %131, align 16, !tbaa !0 %133 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %134 = bitcast <8 x i32> addrspace(2)* %133 to <32 x i8> addrspace(2)* %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, align 32, !tbaa !0 %136 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %137 = bitcast <4 x i32> addrspace(2)* %136 to <16 x i8> addrspace(2)* %138 = load <16 x i8>, <16 x i8> addrspace(2)* %137, align 16, !tbaa !0 %139 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %140 = bitcast <8 x i32> addrspace(2)* %139 to <32 x i8> addrspace(2)* %141 = load <32 x i8>, <32 x i8> addrspace(2)* %140, align 32, !tbaa !0 %142 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %143 = bitcast <4 x i32> addrspace(2)* %142 to <16 x i8> addrspace(2)* %144 = load <16 x i8>, <16 x i8> addrspace(2)* %143, align 16, !tbaa !0 %145 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %146 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %147 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %148 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %149 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %150 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %151 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %152 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %153 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %154 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %155 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %156 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %157 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %158 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %159 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %160 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %161 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %162 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %163 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %164 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %165 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %166 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %167 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %168 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %169 = fmul float %149, %149 %170 = fmul float %150, %150 %171 = fadd float %170, %169 %172 = fmul float %151, %151 %173 = fadd float %171, %172 %174 = call float @llvm.AMDGPU.rsq.clamped.f32(float %173) %175 = fmul float %149, %174 %176 = fmul float %150, %174 %177 = fmul float %151, %174 %178 = bitcast float %163 to i32 %179 = bitcast float %164 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %181, <32 x i8> %105, <16 x i8> %108, i32 2) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = extractelement <4 x float> %182, i32 2 %186 = extractelement <4 x float> %182, i32 3 %187 = bitcast float %165 to i32 %188 = bitcast float %166 to i32 %189 = insertelement <2 x i32> undef, i32 %187, i32 0 %190 = insertelement <2 x i32> %189, i32 %188, i32 1 %191 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %190, <32 x i8> %111, <16 x i8> %114, i32 2) %192 = extractelement <4 x float> %191, i32 0 %193 = extractelement <4 x float> %191, i32 1 %194 = extractelement <4 x float> %191, i32 2 %195 = extractelement <4 x float> %191, i32 3 %196 = bitcast float %167 to i32 %197 = bitcast float %168 to i32 %198 = insertelement <2 x i32> undef, i32 %196, i32 0 %199 = insertelement <2 x i32> %198, i32 %197, i32 1 %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %199, <32 x i8> %117, <16 x i8> %120, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = extractelement <4 x float> %200, i32 3 %205 = fsub float %186, %158 %206 = call float @llvm.maxnum.f32(float %205, float 0.000000e+00) %207 = fsub float %155, %206 %208 = fadd float %158, %186 %209 = call float @llvm.minnum.f32(float %208, float 1.000000e+00) %210 = fsub float %209, %206 %211 = fdiv float 1.000000e+00, %210 %212 = fmul float %207, %211 %213 = call float @llvm.AMDIL.clamp.(float %212, float 0.000000e+00, float 1.000000e+00) %214 = fmul float %213, 2.000000e+00 %215 = fsub float 3.000000e+00, %214 %216 = fmul float %213, %215 %217 = fmul float %213, %216 %218 = fsub float 1.000000e+00, %217 %219 = fsub float %195, %159 %220 = call float @llvm.maxnum.f32(float %219, float 0.000000e+00) %221 = fsub float %156, %220 %222 = fadd float %159, %195 %223 = call float @llvm.minnum.f32(float %222, float 1.000000e+00) %224 = fsub float %223, %220 %225 = fdiv float 1.000000e+00, %224 %226 = fmul float %221, %225 %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00) %228 = fmul float %227, 2.000000e+00 %229 = fsub float 3.000000e+00, %228 %230 = fmul float %227, %229 %231 = fmul float %227, %230 %232 = call float @llvm.maxnum.f32(float %231, float 0.000000e+00) %233 = call float @llvm.minnum.f32(float %232, float %218) %234 = fsub float %218, %233 %235 = fsub float %204, %160 %236 = call float @llvm.maxnum.f32(float %235, float 0.000000e+00) %237 = fsub float %157, %236 %238 = fadd float %160, %204 %239 = call float @llvm.minnum.f32(float %238, float 1.000000e+00) %240 = fsub float %239, %236 %241 = fdiv float 1.000000e+00, %240 %242 = fmul float %237, %241 %243 = call float @llvm.AMDIL.clamp.(float %242, float 0.000000e+00, float 1.000000e+00) %244 = fmul float %243, 2.000000e+00 %245 = fsub float 3.000000e+00, %244 %246 = fmul float %243, %245 %247 = fmul float %243, %246 %248 = call float @llvm.maxnum.f32(float %247, float 0.000000e+00) %249 = call float @llvm.minnum.f32(float %248, float %234) %250 = fsub float %234, %249 %251 = bitcast float %167 to i32 %252 = bitcast float %168 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %141, <16 x i8> %144, i32 2) %256 = extractelement <4 x float> %255, i32 0 %257 = extractelement <4 x float> %255, i32 2 %258 = bitcast float %165 to i32 %259 = bitcast float %166 to i32 %260 = insertelement <2 x i32> undef, i32 %258, i32 0 %261 = insertelement <2 x i32> %260, i32 %259, i32 1 %262 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %261, <32 x i8> %135, <16 x i8> %138, i32 2) %263 = extractelement <4 x float> %262, i32 0 %264 = extractelement <4 x float> %262, i32 2 %265 = bitcast float %163 to i32 %266 = bitcast float %164 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %129, <16 x i8> %132, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 2 %272 = bitcast float %161 to i32 %273 = bitcast float %162 to i32 %274 = insertelement <2 x i32> undef, i32 %272, i32 0 %275 = insertelement <2 x i32> %274, i32 %273, i32 1 %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %275, <32 x i8> %123, <16 x i8> %126, i32 2) %277 = extractelement <4 x float> %276, i32 0 %278 = extractelement <4 x float> %276, i32 2 %279 = fmul float %250, %277 %280 = fmul float %250, %278 %281 = fmul float %217, %270 %282 = fadd float %281, %279 %283 = fmul float %217, %271 %284 = fadd float %283, %280 %285 = fmul float %233, %263 %286 = fadd float %285, %282 %287 = fmul float %233, %264 %288 = fadd float %287, %284 %289 = fmul float %249, %256 %290 = fadd float %289, %286 %291 = fmul float %249, %257 %292 = fadd float %291, %288 %293 = fmul float %292, %37 %294 = fsub float %145, %40 %295 = fsub float %146, %41 %296 = fsub float %147, %42 %297 = fmul float %294, %294 %298 = fmul float %295, %295 %299 = fadd float %298, %297 %300 = fmul float %296, %296 %301 = fadd float %299, %300 %302 = call float @llvm.AMDGPU.rsq.clamped.f32(float %301) %303 = fmul float %294, %302 %304 = fmul float %295, %302 %305 = fmul float %296, %302 %306 = fmul float %145, %76 %307 = fmul float %146, %77 %308 = fadd float %307, %306 %309 = fmul float %147, %78 %310 = fadd float %309, %308 %311 = fadd float %310, %79 %312 = fmul float %145, %80 %313 = fmul float %146, %81 %314 = fadd float %313, %312 %315 = fmul float %147, %82 %316 = fadd float %315, %314 %317 = fadd float %316, %83 %318 = fmul float %145, %84 %319 = fmul float %146, %85 %320 = fadd float %319, %318 %321 = fmul float %147, %86 %322 = fadd float %321, %320 %323 = fadd float %322, %87 %324 = fmul float %145, %88 %325 = fmul float %146, %89 %326 = fadd float %325, %324 %327 = fmul float %147, %90 %328 = fadd float %327, %326 %329 = fadd float %328, %91 %330 = fdiv float 1.000000e+00, %329 %331 = fmul float %311, %330 %332 = fmul float %317, %330 %333 = fmul float %323, %330 %334 = fsub float -0.000000e+00, %333 %335 = call float @llvm.AMDIL.clamp.(float %334, float 0.000000e+00, float 1.000000e+00) %336 = fadd float %331, %92 %337 = fadd float %332, %92 %338 = bitcast float %335 to i32 %339 = bitcast float %336 to i32 %340 = bitcast float %337 to i32 %341 = insertelement <4 x i32> undef, i32 %338, i32 0 %342 = insertelement <4 x i32> %341, i32 %339, i32 1 %343 = insertelement <4 x i32> %342, i32 %340, i32 2 %344 = insertelement <4 x i32> %343, i32 0, i32 3 %345 = bitcast <8 x i32> %94 to <32 x i8> %346 = bitcast <4 x i32> %96 to <16 x i8> %347 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %344, <32 x i8> %345, <16 x i8> %346, i32 7) %348 = extractelement <4 x float> %347, i32 0 %349 = fsub float %331, %92 %350 = fadd float %92, %332 %351 = bitcast float %335 to i32 %352 = bitcast float %349 to i32 %353 = bitcast float %350 to i32 %354 = insertelement <4 x i32> undef, i32 %351, i32 0 %355 = insertelement <4 x i32> %354, i32 %352, i32 1 %356 = insertelement <4 x i32> %355, i32 %353, i32 2 %357 = insertelement <4 x i32> %356, i32 0, i32 3 %358 = bitcast <8 x i32> %94 to <32 x i8> %359 = bitcast <4 x i32> %96 to <16 x i8> %360 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %357, <32 x i8> %358, <16 x i8> %359, i32 7) %361 = extractelement <4 x float> %360, i32 0 %362 = fadd float %331, %92 %363 = fsub float %332, %92 %364 = bitcast float %335 to i32 %365 = bitcast float %362 to i32 %366 = bitcast float %363 to i32 %367 = insertelement <4 x i32> undef, i32 %364, i32 0 %368 = insertelement <4 x i32> %367, i32 %365, i32 1 %369 = insertelement <4 x i32> %368, i32 %366, i32 2 %370 = insertelement <4 x i32> %369, i32 0, i32 3 %371 = bitcast <8 x i32> %94 to <32 x i8> %372 = bitcast <4 x i32> %96 to <16 x i8> %373 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %370, <32 x i8> %371, <16 x i8> %372, i32 7) %374 = extractelement <4 x float> %373, i32 0 %375 = fsub float %331, %92 %376 = fsub float %332, %92 %377 = bitcast float %335 to i32 %378 = bitcast float %375 to i32 %379 = bitcast float %376 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = insertelement <4 x i32> %382, i32 0, i32 3 %384 = bitcast <8 x i32> %94 to <32 x i8> %385 = bitcast <4 x i32> %96 to <16 x i8> %386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 7) %387 = extractelement <4 x float> %386, i32 0 %388 = fadd float %92, %331 %389 = fadd float %332, 0.000000e+00 %390 = bitcast float %335 to i32 %391 = bitcast float %388 to i32 %392 = bitcast float %389 to i32 %393 = insertelement <4 x i32> undef, i32 %390, i32 0 %394 = insertelement <4 x i32> %393, i32 %391, i32 1 %395 = insertelement <4 x i32> %394, i32 %392, i32 2 %396 = insertelement <4 x i32> %395, i32 0, i32 3 %397 = bitcast <8 x i32> %94 to <32 x i8> %398 = bitcast <4 x i32> %96 to <16 x i8> %399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 7) %400 = extractelement <4 x float> %399, i32 0 %401 = fsub float %331, %92 %402 = fadd float %332, 0.000000e+00 %403 = bitcast float %335 to i32 %404 = bitcast float %401 to i32 %405 = bitcast float %402 to i32 %406 = insertelement <4 x i32> undef, i32 %403, i32 0 %407 = insertelement <4 x i32> %406, i32 %404, i32 1 %408 = insertelement <4 x i32> %407, i32 %405, i32 2 %409 = insertelement <4 x i32> %408, i32 0, i32 3 %410 = bitcast <8 x i32> %94 to <32 x i8> %411 = bitcast <4 x i32> %96 to <16 x i8> %412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %409, <32 x i8> %410, <16 x i8> %411, i32 7) %413 = extractelement <4 x float> %412, i32 0 %414 = fadd float %331, 0.000000e+00 %415 = fsub float %332, %92 %416 = bitcast float %335 to i32 %417 = bitcast float %414 to i32 %418 = bitcast float %415 to i32 %419 = insertelement <4 x i32> undef, i32 %416, i32 0 %420 = insertelement <4 x i32> %419, i32 %417, i32 1 %421 = insertelement <4 x i32> %420, i32 %418, i32 2 %422 = insertelement <4 x i32> %421, i32 0, i32 3 %423 = bitcast <8 x i32> %94 to <32 x i8> %424 = bitcast <4 x i32> %96 to <16 x i8> %425 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %422, <32 x i8> %423, <16 x i8> %424, i32 7) %426 = extractelement <4 x float> %425, i32 0 %427 = fadd float %331, 0.000000e+00 %428 = fadd float %92, %332 %429 = bitcast float %335 to i32 %430 = bitcast float %427 to i32 %431 = bitcast float %428 to i32 %432 = insertelement <4 x i32> undef, i32 %429, i32 0 %433 = insertelement <4 x i32> %432, i32 %430, i32 1 %434 = insertelement <4 x i32> %433, i32 %431, i32 2 %435 = insertelement <4 x i32> %434, i32 0, i32 3 %436 = bitcast <8 x i32> %94 to <32 x i8> %437 = bitcast <4 x i32> %96 to <16 x i8> %438 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %435, <32 x i8> %436, <16 x i8> %437, i32 7) %439 = extractelement <4 x float> %438, i32 0 %440 = fmul float %68, %69 %441 = fmul float %68, %70 %442 = fmul float %68, %71 %443 = fmul float %72, %73 %444 = fmul float %72, %74 %445 = fmul float %72, %75 %446 = bitcast float %335 to i32 %447 = bitcast float %331 to i32 %448 = bitcast float %332 to i32 %449 = insertelement <4 x i32> undef, i32 %446, i32 0 %450 = insertelement <4 x i32> %449, i32 %447, i32 1 %451 = insertelement <4 x i32> %450, i32 %448, i32 2 %452 = insertelement <4 x i32> %451, i32 0, i32 3 %453 = bitcast <8 x i32> %94 to <32 x i8> %454 = bitcast <4 x i32> %96 to <16 x i8> %455 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %452, <32 x i8> %453, <16 x i8> %454, i32 7) %456 = extractelement <4 x float> %455, i32 0 %457 = fmul float %348, 0x3FB32D10E0000000 %458 = fmul float %361, 0x3FB32D10E0000000 %459 = fadd float %457, %458 %460 = fmul float %374, 0x3FB32D10E0000000 %461 = fadd float %459, %460 %462 = fmul float %387, 0x3FB32D10E0000000 %463 = fadd float %461, %462 %464 = fmul float %400, 0x3FBFA3FCC0000000 %465 = fmul float %413, 0x3FBFA3FCC0000000 %466 = fadd float %464, %465 %467 = fmul float %426, 0x3FBFA3FCC0000000 %468 = fadd float %466, %467 %469 = fmul float %439, 0x3FBFA3FCC0000000 %470 = fadd float %468, %469 %471 = fadd float %463, %470 %472 = fmul float %456, 0x3FCA5DFA80000000 %473 = fadd float %472, %471 %474 = fmul float %52, %175 %475 = fsub float -0.000000e+00, %474 %476 = fmul float %53, %176 %477 = fsub float %475, %476 %478 = fmul float %54, %177 %479 = fsub float %477, %478 %480 = call float @llvm.AMDIL.clamp.(float %479, float 0.000000e+00, float 1.000000e+00) %481 = fmul float %473, %480 %482 = fsub float %145, %40 %483 = fsub float %146, %41 %484 = fsub float %147, %42 %485 = fmul float %290, %36 %486 = fmul float %303, %175 %487 = fsub float -0.000000e+00, %486 %488 = fmul float %304, %176 %489 = fsub float %487, %488 %490 = fmul float %305, %177 %491 = fsub float %489, %490 %492 = fmul float %491, %175 %493 = fmul float %491, %176 %494 = fmul float %491, %177 %495 = fmul float %492, 2.000000e+00 %496 = fmul float %493, 2.000000e+00 %497 = fmul float %494, 2.000000e+00 %498 = fsub float -0.000000e+00, %495 %499 = fsub float %498, %303 %500 = fsub float -0.000000e+00, %496 %501 = fsub float %500, %304 %502 = fsub float -0.000000e+00, %497 %503 = fsub float %502, %305 %504 = fmul float %58, %499 %505 = fmul float %59, %501 %506 = fadd float %505, %504 %507 = fmul float %60, %503 %508 = fadd float %506, %507 %509 = call float @llvm.AMDIL.clamp.(float %508, float 0.000000e+00, float 1.000000e+00) %510 = call float @llvm.pow.f32(float %509, float %61) %511 = fmul float %485, %510 %512 = fmul float %33, %201 %513 = fmul float %34, %202 %514 = fmul float %35, %203 %515 = fmul float %30, %192 %516 = fmul float %31, %193 %517 = fmul float %32, %194 %518 = fmul float %27, %183 %519 = fmul float %28, %184 %520 = fmul float %29, %185 %521 = bitcast float %161 to i32 %522 = bitcast float %162 to i32 %523 = insertelement <2 x i32> undef, i32 %521, i32 0 %524 = insertelement <2 x i32> %523, i32 %522, i32 1 %525 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %524, <32 x i8> %99, <16 x i8> %102, i32 2) %526 = extractelement <4 x float> %525, i32 0 %527 = extractelement <4 x float> %525, i32 1 %528 = extractelement <4 x float> %525, i32 2 %529 = fmul float %24, %526 %530 = fmul float %25, %527 %531 = fmul float %26, %528 %532 = fmul float %250, %529 %533 = fmul float %250, %530 %534 = fmul float %250, %531 %535 = fmul float %217, %518 %536 = fadd float %535, %532 %537 = fmul float %217, %519 %538 = fadd float %537, %533 %539 = fmul float %217, %520 %540 = fadd float %539, %534 %541 = fmul float %233, %515 %542 = fadd float %541, %536 %543 = fmul float %233, %516 %544 = fadd float %543, %538 %545 = fmul float %233, %517 %546 = fadd float %545, %540 %547 = fmul float %512, %249 %548 = fadd float %547, %542 %549 = fmul float %513, %249 %550 = fadd float %549, %544 %551 = fmul float %514, %249 %552 = fadd float %551, %546 %553 = fmul float %548, %152 %554 = fmul float %550, %153 %555 = fmul float %552, %154 %556 = fmul float %511, %62 %557 = fadd float %556, %553 %558 = fmul float %511, %63 %559 = fadd float %558, %554 %560 = fmul float %511, %64 %561 = fadd float %560, %555 %562 = fsub float 1.000000e+00, %481 %563 = fmul float %443, 0x3FCB333340000000 %564 = fmul float %444, 0x3FE6E48E80000000 %565 = fadd float %564, %563 %566 = fmul float %445, 0x3FB2752540000000 %567 = fadd float %565, %566 %568 = call float @llvm.AMDGPU.lrp(float %293, float %567, float %443) %569 = call float @llvm.AMDGPU.lrp(float %293, float %567, float %444) %570 = call float @llvm.AMDGPU.lrp(float %293, float %567, float %445) %571 = fmul float %65, %175 %572 = fmul float %66, %176 %573 = fadd float %572, %571 %574 = fmul float %67, %177 %575 = fadd float %573, %574 %576 = call float @llvm.AMDIL.clamp.(float %575, float 0.000000e+00, float 1.000000e+00) %577 = fmul float %440, 0x3FCB333340000000 %578 = fmul float %441, 0x3FE6E48E80000000 %579 = fadd float %578, %577 %580 = fmul float %442, 0x3FB2752540000000 %581 = fadd float %579, %580 %582 = call float @llvm.AMDGPU.lrp(float %293, float %581, float %440) %583 = call float @llvm.AMDGPU.lrp(float %293, float %581, float %441) %584 = call float @llvm.AMDGPU.lrp(float %293, float %581, float %442) %585 = fmul float %481, %55 %586 = fmul float %481, %56 %587 = fmul float %481, %57 %588 = fmul float %576, %582 %589 = fadd float %588, %585 %590 = fmul float %576, %583 %591 = fadd float %590, %586 %592 = fmul float %576, %584 %593 = fadd float %592, %587 %594 = fmul float %562, %568 %595 = fadd float %594, %589 %596 = fmul float %562, %569 %597 = fadd float %596, %591 %598 = fmul float %562, %570 %599 = fadd float %598, %593 %600 = fmul float %557, %595 %601 = fmul float %559, %597 %602 = fmul float %561, %599 %603 = fmul float %482, %482 %604 = fmul float %483, %483 %605 = fadd float %604, %603 %606 = fmul float %484, %484 %607 = fadd float %605, %606 %608 = call float @llvm.sqrt.f32(float %607) %609 = fmul float %608, %47 %610 = fadd float %609, %46 %611 = call float @llvm.AMDIL.clamp.(float %610, float 0.000000e+00, float 1.000000e+00) %612 = call float @llvm.pow.f32(float %611, float %49) %613 = call float @llvm.minnum.f32(float %48, float %612) %614 = call float @llvm.AMDIL.clamp.(float %613, float 0.000000e+00, float 1.000000e+00) %615 = call float @llvm.AMDGPU.lrp(float %614, float %43, float %600) %616 = call float @llvm.AMDGPU.lrp(float %614, float %44, float %601) %617 = call float @llvm.AMDGPU.lrp(float %614, float %45, float %602) %618 = call i32 @llvm.SI.packf16(float %615, float %616) %619 = bitcast i32 %618 to float %620 = call i32 @llvm.SI.packf16(float %617, float 1.000000e+00) %621 = bitcast i32 %620 to float %622 = call i32 @llvm.SI.packf16(float %148, float %148) %623 = bitcast i32 %622 to float %624 = call i32 @llvm.SI.packf16(float %148, float %148) %625 = bitcast i32 %624 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %619, float %621, float %619, float %621) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %623, float %625, float %623, float %625) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v8, v0, 2, 0, [m0] ; C8200200 v_interp_p2_f32 v8, [v8], v1, 2, 0, [m0] ; C8210201 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 0, 4, [m0] ; C83C1000 v_interp_p2_f32 v15, [v15], v1, 0, 4, [m0] ; C83D1001 v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100 v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101 v_interp_p1_f32 v17, v0, 2, 4, [m0] ; C8441200 v_interp_p2_f32 v17, [v17], v1, 2, 4, [m0] ; C8451201 v_interp_p1_f32 v18, v0, 0, 5, [m0] ; C8481400 v_interp_p2_f32 v18, [v18], v1, 0, 5, [m0] ; C8491401 v_interp_p1_f32 v19, v0, 1, 5, [m0] ; C84C1500 v_interp_p2_f32 v19, [v19], v1, 1, 5, [m0] ; C84D1501 v_interp_p1_f32 v20, v0, 2, 5, [m0] ; C8501600 v_interp_p2_f32 v20, [v20], v1, 2, 5, [m0] ; C8511601 v_interp_p1_f32 v21, v0, 3, 5, [m0] ; C8541700 v_interp_p2_f32 v21, [v21], v1, 3, 5, [m0] ; C8551701 s_load_dwordx4 s[12:15], s[2:3], 0x10 ; C0860310 s_load_dwordx4 s[8:11], s[2:3], 0x14 ; C0840314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[16:19], 0xa ; C201110A s_buffer_load_dword s21, s[16:19], 0xc ; C20A910C s_buffer_load_dword s3, s[16:19], 0xd ; C201910D s_buffer_load_dword s20, s[16:19], 0xe ; C20A110E s_buffer_load_dword s24, s[16:19], 0x10 ; C20C1110 s_buffer_load_dword s23, s[16:19], 0x11 ; C20B9111 s_buffer_load_dword s22, s[16:19], 0x12 ; C20B1112 s_buffer_load_dword s1, s[16:19], 0x38 ; C2009138 s_buffer_load_dword s0, s[16:19], 0x39 ; C2001139 s_buffer_load_dword s34, s[8:11], 0x31 ; C2110931 s_buffer_load_dword s35, s[8:11], 0x32 ; C2118932 s_buffer_load_dword s36, s[8:11], 0x33 ; C2120933 s_buffer_load_dword s28, s[8:11], 0x34 ; C20E0934 s_buffer_load_dword s29, s[8:11], 0x35 ; C20E8935 s_buffer_load_dword s30, s[8:11], 0x36 ; C20F0936 s_buffer_load_dword s31, s[8:11], 0x37 ; C20F8937 s_buffer_load_dword s37, s[8:11], 0x38 ; C2128938 s_buffer_load_dword s38, s[8:11], 0x39 ; C2130939 s_buffer_load_dword s39, s[8:11], 0x3a ; C213893A s_buffer_load_dword s40, s[8:11], 0x3b ; C214093B s_buffer_load_dword s33, s[8:11], 0x3c ; C210893C s_buffer_load_dword s32, s[8:11], 0x3d ; C210093D s_buffer_load_dword s27, s[8:11], 0x3e ; C20D893E s_buffer_load_dword s26, s[8:11], 0x3f ; C20D093F s_buffer_load_dword s25, s[8:11], 0x40 ; C20C8940 s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v22, s28, v6 ; 102C0C1C v_mac_f32_e32 v22, s29, v7 ; 3E2C0E1D v_mac_f32_e32 v22, s30, v8 ; 3E2C101E v_add_f32_e32 v22, s31, v22 ; 062C2C1F s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 v_mul_f32_e32 v23, s37, v6 ; 102E0C25 v_mac_f32_e32 v23, s38, v7 ; 3E2E0E26 v_mac_f32_e32 v23, s39, v8 ; 3E2E1027 v_add_f32_e32 v23, s40, v23 ; 062E2E28 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 v_mul_f32_e32 v24, s41, v6 ; 10300C29 v_mac_f32_e32 v24, s34, v7 ; 3E300E22 v_mac_f32_e32 v24, s35, v8 ; 3E301023 v_add_f32_e32 v24, s36, v24 ; 06303024 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C v_interp_p1_f32 v25, v0, 0, 6, [m0] ; C8641800 v_interp_p2_f32 v25, [v25], v1, 0, 6, [m0] ; C8651801 v_interp_p1_f32 v26, v0, 1, 6, [m0] ; C8681900 s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[40:43], s[4:5], 0x20 ; C0940520 s_load_dwordx8 s[72:79], s[6:7], 0x10 ; C0E40710 s_load_dwordx8 s[80:87], s[6:7], 0x18 ; C0E80718 s_load_dwordx8 s[88:95], s[6:7], 0x20 ; C0EC0720 s_load_dwordx8 s[64:71], s[6:7], 0x40 ; C0E00740 s_load_dwordx8 s[56:63], s[6:7], 0x38 ; C0DC0738 v_interp_p2_f32 v26, [v26], v1, 1, 6, [m0] ; C8691901 v_interp_p1_f32 v27, v0, 2, 6, [m0] ; C86C1A00 v_interp_p2_f32 v27, [v27], v1, 2, 6, [m0] ; C86D1A01 v_interp_p1_f32 v28, v0, 3, 6, [m0] ; C8701B00 v_interp_p2_f32 v28, [v28], v1, 3, 6, [m0] ; C8711B01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[72:79], s[44:47] ; F0800F00 01721D14 image_sample v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[80:87], s[52:55] ; F0800F00 01B42119 image_sample v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[88:95], s[48:51] ; F0800F00 0196251B image_sample v[0:1], 5, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[64:71], s[40:43] ; F0800500 0150001B image_sample v[25:26], 5, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[56:63], s[36:39] ; F0800500 012E1919 v_mul_f32_e32 v27, s33, v6 ; 10360C21 v_mac_f32_e32 v27, s32, v7 ; 3E360E20 v_mac_f32_e32 v27, s27, v8 ; 3E36101B v_add_f32_e32 v27, s26, v27 ; 0636361A v_rcp_f32_e32 v27, v27 ; 7E36551B s_load_dwordx4 s[32:35], s[4:5], 0x18 ; C0900518 s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728 s_load_dwordx8 s[56:63], s[6:7], 0x30 ; C0DC0730 s_load_dwordx4 s[36:39], s[4:5], 0x14 ; C0920514 v_mul_f32_e32 v23, v27, v23 ; 102E2F1B v_add_f32_e64 v41, 0, -v23 clamp ; D2060829 40022E80 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_mad_f32 v42, v24, v27, s25 ; D282002A 00663718 v_mad_f32 v43, v22, v27, s25 ; D282002B 00663716 v_mov_b32_e32 v44, 0 ; 7E580280 v_mad_f32 v23, v24, v27, -s25 ; D2820017 80663718 v_mov_b32_e32 v45, v41 ; 7E5A0329 v_mov_b32_e32 v46, v42 ; 7E5C032A v_mov_b32_e32 v47, v43 ; 7E5E032B v_mov_b32_e32 v48, v44 ; 7E60032C v_mad_f32 v28, v22, v27, -s25 ; D282001C 80663716 v_mov_b32_e32 v46, v23 ; 7E5C0317 v_mov_b32_e32 v49, v41 ; 7E620329 v_mov_b32_e32 v50, v42 ; 7E64032A v_mov_b32_e32 v51, v43 ; 7E66032B v_mov_b32_e32 v52, v44 ; 7E68032C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[20:21], 5, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[56:63], s[32:35] ; F0800500 010E1414 v_mov_b32_e32 v47, v43 ; 7E5E032B v_mov_b32_e32 v51, v28 ; 7E66031C image_sample v[53:54], 5, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[48:55], s[36:39] ; F0800500 012C3512 v_mov_b32_e32 v48, v44 ; 7E60032C v_mov_b32_e32 v52, v44 ; 7E68032C s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx4 s[4:7], s[4:5], 0x4 ; C0820504 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[40:47], s[28:31] ; F0B00100 00EA1729 image_sample_c_l v55, 1, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[40:47], s[28:31] ; F0B00100 00EA372D v_mov_b32_e32 v47, v28 ; 7E5E031C image_sample_c_l v49, 1, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[40:47], s[28:31] ; F0B00100 00EA3131 v_mov_b32_e32 v48, v44 ; 7E60032C image_sample_c_l v50, 1, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[40:47], s[28:31] ; F0B00100 00EA322D v_mad_f32 v47, v22, v27, 0 ; D282002F 02023716 v_mov_b32_e32 v56, v41 ; 7E700329 v_mov_b32_e32 v57, v42 ; 7E72032A v_mov_b32_e32 v58, v43 ; 7E74032B v_mov_b32_e32 v59, v44 ; 7E76032C v_mov_b32_e32 v58, v47 ; 7E74032F v_mov_b32_e32 v59, v44 ; 7E76032C v_mov_b32_e32 v48, v44 ; 7E60032C v_mad_f32 v42, v24, v27, 0 ; D282002A 02023718 v_mov_b32_e32 v60, v41 ; 7E780329 v_mov_b32_e32 v61, v42 ; 7E7A032A v_mov_b32_e32 v62, v43 ; 7E7C032B v_mov_b32_e32 v63, v44 ; 7E7E032C image_sample_c_l v51, 1, 0, 0, 0, 0, 0, 0, 0, v[56:59], s[40:47], s[28:31] ; F0B00100 00EA3338 v_mov_b32_e32 v62, v28 ; 7E7C031C image_sample_c_l v28, 1, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[40:47], s[28:31] ; F0B00100 00EA1C2D v_mov_b32_e32 v63, v44 ; 7E7E032C image_sample_c_l v45, 1, 0, 0, 0, 0, 0, 0, 0, v[60:63], s[40:47], s[28:31] ; F0B00100 00EA2D3C v_mul_f32_e32 v24, v27, v24 ; 1030311B v_mul_f32_e32 v22, v27, v22 ; 102C2D1B image_sample_c_l v27, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[40:47], s[28:31] ; F0B00100 00EA1B29 v_mov_b32_e32 v42, v24 ; 7E540318 v_mov_b32_e32 v43, v22 ; 7E560316 image_sample_c_l v22, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[40:47], s[28:31] ; F0B00100 00EA1629 v_subrev_f32_e32 v24, v17, v40 ; 0A305111 v_add_f32_e32 v17, v40, v17 ; 06222328 v_mul_f32_e32 v37, s24, v37 ; 104A4A18 v_mul_f32_e32 v38, s23, v38 ; 104C4C17 v_mul_f32_e32 v39, s22, v39 ; 104E4E16 v_subrev_f32_e32 v40, v16, v36 ; 0A504910 v_add_f32_e32 v16, v36, v16 ; 06202124 s_buffer_load_dword s22, s[16:19], 0x8 ; C20B1108 s_buffer_load_dword s23, s[16:19], 0x9 ; C20B9109 v_mul_f32_e32 v33, s21, v33 ; 10424215 v_mul_f32_e32 v34, s3, v34 ; 10444403 v_mul_f32_e32 v35, s20, v35 ; 10464614 v_subrev_f32_e32 v36, v15, v32 ; 0A48410F v_max_f32_e32 v36, 0, v36 ; 20484880 v_add_f32_e32 v15, v32, v15 ; 061E1F20 v_min_f32_e32 v15, 1.0, v15 ; 1E1E1EF2 v_subrev_f32_e32 v15, v36, v15 ; 0A1E1F24 v_rcp_f32_e32 v15, v15 ; 7E1E550F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v29, s22, v29 ; 103A3A16 v_mul_f32_e32 v30, s23, v30 ; 103C3C17 v_mul_f32_e32 v31, s2, v31 ; 103E3E02 v_subrev_f32_e32 v12, v36, v12 ; 0A181924 v_mul_f32_e32 v12, v15, v12 ; 1018190F v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mov_b32_e32 v15, 0x40400000 ; 7E1E02FF 40400000 v_mad_f32 v32, -2.0, v12, v15 ; D2820020 043E18F5 v_mul_f32_e32 v32, v32, v12 ; 10401920 v_max_f32_e32 v36, 0, v40 ; 20485080 v_min_f32_e32 v16, 1.0, v16 ; 1E2020F2 v_subrev_f32_e32 v16, v36, v16 ; 0A202124 v_rcp_f32_e32 v16, v16 ; 7E205510 v_mul_f32_e32 v40, v32, v12 ; 10501920 v_mad_f32 v12, -v12, v32, 1.0 ; D282000C 23CA410C v_subrev_f32_e32 v13, v36, v13 ; 0A1A1B24 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_max_f32_e32 v16, 0, v24 ; 20203080 v_min_f32_e32 v17, 1.0, v17 ; 1E2222F2 v_subrev_f32_e32 v17, v16, v17 ; 0A222310 v_rcp_f32_e32 v17, v17 ; 7E225511 v_mad_f32 v24, -2.0, v13, v15 ; D2820018 043E1AF5 v_mul_f32_e32 v24, v24, v13 ; 10301B18 v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_subrev_f32_e32 v14, v16, v14 ; 0A1C1D10 v_mul_f32_e32 v14, v17, v14 ; 101C1D11 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mac_f32_e32 v15, -2.0, v14 ; 3E1E1CF5 v_mul_f32_e32 v15, v15, v14 ; 101E1D0F v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_max_f32_e32 v13, 0, v13 ; 201A1A80 v_min_f32_e32 v13, v12, v13 ; 1E1A1B0C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_max_f32_e32 v14, 0, v14 ; 201C1C80 v_min_f32_e32 v14, v12, v14 ; 1E1C1D0C v_subrev_f32_e32 v12, v14, v12 ; 0A18190E v_mul_f32_e32 v15, v53, v12 ; 101E1935 v_mul_f32_e32 v16, v54, v12 ; 10201936 v_mac_f32_e32 v15, v20, v40 ; 3E1E5114 s_buffer_load_dword s2, s[12:15], 0x4c ; C2010D4C s_buffer_load_dword s3, s[12:15], 0x4d ; C2018D4D s_buffer_load_dword s20, s[12:15], 0x4e ; C20A0D4E v_mac_f32_e32 v16, v21, v40 ; 3E205115 v_mac_f32_e32 v15, v25, v13 ; 3E1E1B19 v_mac_f32_e32 v16, v26, v13 ; 3E201B1A v_mac_f32_e32 v15, v0, v14 ; 3E1E1D00 v_mac_f32_e32 v16, v1, v14 ; 3E201D01 s_buffer_load_dword s21, s[12:15], 0x70 ; C20A8D70 s_buffer_load_dword s22, s[12:15], 0x71 ; C20B0D71 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s2, v6 ; 0A000C02 v_subrev_f32_e32 v1, s3, v7 ; 0A020E03 v_subrev_f32_e32 v6, s20, v8 ; 0A0C1014 v_mul_f32_e32 v7, v9, v9 ; 100E1309 v_mac_f32_e32 v7, v10, v10 ; 3E0E150A v_mac_f32_e32 v7, v11, v11 ; 3E0E170B v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v8, v0, v0 ; 10100100 v_mac_f32_e32 v8, v1, v1 ; 3E100301 v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_rsq_clamp_f32_e32 v17, v8 ; 7E225908 v_mul_f32_e32 v9, v7, v9 ; 10121307 v_mul_f32_e32 v10, v7, v10 ; 10141507 v_mul_f32_e32 v7, v7, v11 ; 100E1707 v_mul_f32_e32 v11, v17, v0 ; 10160111 v_mul_f32_e32 v11, v9, v11 ; 10161709 v_mul_f32_e32 v20, v17, v1 ; 10280311 v_mad_f32 v11, -v20, v10, -v11 ; D282000B A42E1514 v_mul_f32_e32 v20, v17, v6 ; 10280D11 v_mad_f32 v11, -v20, v7, v11 ; D282000B 242E0F14 s_buffer_load_dword s2, s[16:19], 0x4 ; C2011104 s_buffer_load_dword s3, s[16:19], 0x5 ; C2019105 s_buffer_load_dword s16, s[16:19], 0x6 ; C2081106 v_mul_f32_e32 v20, v9, v11 ; 10281709 v_mac_f32_e32 v20, v9, v11 ; 3E281709 v_mul_f32_e32 v21, v10, v11 ; 102A170A v_mac_f32_e32 v21, v10, v11 ; 3E2A170A v_mul_f32_e32 v24, v7, v11 ; 10301707 v_mac_f32_e32 v24, v7, v11 ; 3E301707 v_mad_f32 v0, -v0, v17, -v20 ; D2820000 A4522300 v_mad_f32 v1, -v1, v17, -v21 ; D2820001 A4562301 v_mad_f32 v6, -v6, v17, -v24 ; D2820006 A4622306 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[32:39], s[4:7] ; F0800700 00281112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v11, s2, v17 ; 10162202 v_mul_f32_e32 v17, s3, v18 ; 10222403 v_mul_f32_e32 v18, s16, v19 ; 10242610 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v17, v17, v12 ; 10221911 v_mul_f32_e32 v12, v18, v12 ; 10181912 s_buffer_load_dword s2, s[8:11], 0x8 ; C2010908 s_buffer_load_dword s3, s[8:11], 0x9 ; C2018909 s_buffer_load_dword s4, s[8:11], 0xa ; C202090A v_mac_f32_e32 v11, v29, v40 ; 3E16511D v_mac_f32_e32 v17, v30, v40 ; 3E22511E v_mac_f32_e32 v12, v31, v40 ; 3E18511F v_mac_f32_e32 v11, v33, v13 ; 3E161B21 v_mac_f32_e32 v17, v34, v13 ; 3E221B22 v_mac_f32_e32 v12, v35, v13 ; 3E181B23 s_buffer_load_dword s5, s[8:11], 0xb ; C202890B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s2, v0 ; 10000002 v_mac_f32_e32 v0, s3, v1 ; 3E000203 v_mac_f32_e32 v0, s4, v6 ; 3E000C04 v_mac_f32_e32 v11, v14, v37 ; 3E164B0E v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_log_f32_e32 v0, v0 ; 7E004F00 v_mac_f32_e32 v17, v14, v38 ; 3E224D0E v_mac_f32_e32 v12, v14, v39 ; 3E184F0E v_mul_f32_e32 v1, s1, v15 ; 10021E01 v_mul_legacy_f32_e32 v0, s5, v0 ; 0E000005 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v0, v0, v1 ; 10000300 s_buffer_load_dword s1, s[12:15], 0x72 ; C2008D72 s_buffer_load_dword s2, s[12:15], 0x73 ; C2010D73 s_buffer_load_dword s3, s[12:15], 0x74 ; C2018D74 s_buffer_load_dword s4, s[12:15], 0x75 ; C2020D75 s_buffer_load_dword s5, s[12:15], 0x76 ; C2028D76 s_buffer_load_dword s6, s[8:11], 0x6 ; C2030906 s_buffer_load_dword s7, s[8:11], 0x17 ; C2038917 s_buffer_load_dword s12, s[8:11], 0x18 ; C2060918 s_buffer_load_dword s13, s[8:11], 0x19 ; C2068919 s_buffer_load_dword s14, s[8:11], 0x1a ; C207091A s_buffer_load_dword s15, s[8:11], 0x0 ; C2078900 s_buffer_load_dword s16, s[8:11], 0x1 ; C2080901 s_buffer_load_dword s17, s[8:11], 0x2 ; C2088902 s_buffer_load_dword s18, s[8:11], 0x4 ; C2090904 s_buffer_load_dword s19, s[8:11], 0x5 ; C2098905 s_buffer_load_dword s20, s[8:11], 0xc ; C20A090C s_buffer_load_dword s23, s[8:11], 0xd ; C20B890D s_buffer_load_dword s24, s[8:11], 0xe ; C20C090E s_buffer_load_dword s25, s[8:11], 0x10 ; C20C8910 s_buffer_load_dword s26, s[8:11], 0x11 ; C20D0911 s_buffer_load_dword s27, s[8:11], 0x12 ; C20D8912 s_buffer_load_dword s28, s[8:11], 0x13 ; C20E0913 s_buffer_load_dword s29, s[8:11], 0x14 ; C20E8914 s_buffer_load_dword s30, s[8:11], 0x15 ; C20F0915 s_buffer_load_dword s8, s[8:11], 0x16 ; C2040916 v_mul_f32_e32 v1, v5, v11 ; 10021705 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v1, s20, v0 ; 3E020014 v_mul_f32_e32 v4, v4, v17 ; 10082304 v_mac_f32_e32 v4, s23, v0 ; 3E080017 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_mac_f32_e32 v3, s24, v0 ; 3E060018 v_mul_f32_e32 v0, s15, v9 ; 1000120F v_mad_f32 v0, -s16, v10, -v0 ; D2820000 A4021410 v_mul_f32_e32 v5, s25, v9 ; 100A1219 v_mac_f32_e32 v5, s26, v10 ; 3E0A141A v_mad_f32 v0, -s17, v7, v0 ; D2820000 24020E11 v_mac_f32_e32 v5, s27, v7 ; 3E0A0E1B v_mov_b32_e32 v6, 0x3d996887 ; 7E0C02FF 3D996887 v_mul_f32_e32 v7, v6, v55 ; 100E6F06 v_mac_f32_e32 v7, v6, v23 ; 3E0E2F06 v_mac_f32_e32 v7, v6, v49 ; 3E0E6306 v_mac_f32_e32 v7, v6, v50 ; 3E0E6506 v_mov_b32_e32 v6, 0x3dfd1fe6 ; 7E0C02FF 3DFD1FE6 v_mul_f32_e32 v9, v6, v28 ; 10123906 v_mac_f32_e32 v9, v6, v51 ; 3E126706 v_mac_f32_e32 v9, v6, v45 ; 3E125B06 v_mac_f32_e32 v9, v6, v27 ; 3E123706 v_mov_b32_e32 v6, s29 ; 7E0C021D v_mov_b32_e32 v10, s30 ; 7E14021E v_mov_b32_e32 v11, s8 ; 7E160208 v_mul_f32_e32 v6, s28, v6 ; 100C0C1C v_mul_f32_e32 v10, s28, v10 ; 1014141C v_mul_f32_e32 v11, s28, v11 ; 1016161C v_mov_b32_e32 v12, s12 ; 7E18020C v_mov_b32_e32 v13, s13 ; 7E1A020D v_mov_b32_e32 v14, s14 ; 7E1C020E v_mul_f32_e32 v12, s7, v12 ; 10181807 v_mul_f32_e32 v13, s7, v13 ; 101A1A07 v_mul_f32_e32 v14, s7, v14 ; 101C1C07 v_mul_f32_e32 v15, s0, v16 ; 101E2000 v_mad_f32 v16, -v16, s0, 1.0 ; D2820010 23C80110 v_mov_b32_e32 v17, 0x3e59999a ; 7E2202FF 3E59999A v_mul_f32_e32 v18, v17, v12 ; 10241911 v_mov_b32_e32 v19, 0x3f372474 ; 7E2602FF 3F372474 v_mac_f32_e32 v18, v19, v13 ; 3E241B13 v_mov_b32_e32 v20, 0x3d93a92a ; 7E2802FF 3D93A92A v_mac_f32_e32 v18, v20, v14 ; 3E241D14 v_mul_f32_e32 v12, v12, v16 ; 1018210C v_mac_f32_e32 v12, v18, v15 ; 3E181F12 v_mul_f32_e32 v13, v13, v16 ; 101A210D v_mac_f32_e32 v13, v18, v15 ; 3E1A1F12 v_mul_f32_e32 v14, v14, v16 ; 101C210E v_mac_f32_e32 v14, v18, v15 ; 3E1C1F12 v_mul_f32_e32 v17, v17, v6 ; 10220D11 v_mac_f32_e32 v17, v19, v10 ; 3E221513 v_mac_f32_e32 v17, v20, v11 ; 3E221714 v_mul_f32_e32 v6, v6, v16 ; 100C2106 v_mul_f32_e32 v10, v10, v16 ; 1014210A v_mul_f32_e32 v11, v11, v16 ; 1016210B v_mac_f32_e32 v6, v17, v15 ; 3E0C1F11 v_mac_f32_e32 v10, v17, v15 ; 3E141F11 v_mac_f32_e32 v11, v17, v15 ; 3E161F11 v_add_f32_e32 v7, v9, v7 ; 060E0F09 v_madmk_f32_e32 v7, v22, v7, 0x3e52efd4 ; 400E0F16 3E52EFD4 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v9, v0, v7 ; 10120F00 v_mul_f32_e32 v15, s18, v9 ; 101E1212 v_mul_f32_e32 v16, s19, v9 ; 10201213 v_mul_f32_e32 v9, s6, v9 ; 10121206 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mac_f32_e32 v15, v6, v5 ; 3E1E0B06 v_mac_f32_e32 v16, v10, v5 ; 3E200B0A v_mac_f32_e32 v9, v11, v5 ; 3E120B0B v_mad_f32 v0, -v7, v0, 1.0 ; D2820000 23CA0107 v_mac_f32_e32 v15, v12, v0 ; 3E1E010C v_mac_f32_e32 v16, v13, v0 ; 3E20010D v_mac_f32_e32 v9, v14, v0 ; 3E12010E v_sqrt_f32_e32 v0, v8 ; 7E006708 v_mov_b32_e32 v5, s2 ; 7E0A0202 v_mac_f32_e32 v5, s3, v0 ; 3E0A0003 v_add_f32_e64 v0, 0, v5 clamp ; D2060800 00020A80 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v1, v15, v1 ; 1002030F v_mul_f32_e32 v4, v16, v4 ; 10080910 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_legacy_f32_e32 v0, s5, v0 ; 0E000005 v_exp_f32_e32 v0, v0 ; 7E004B00 v_min_f32_e32 v0, s4, v0 ; 1E000004 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v5, 1.0, v0 ; 080A00F2 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mac_f32_e32 v1, s21, v0 ; 3E020015 v_mac_f32_e32 v4, s22, v0 ; 3E080016 v_mac_f32_e32 v3, s1, v0 ; 3E060001 v_cvt_pkrtz_f16_f32_e32 v0, v1, v4 ; 5E000901 v_cvt_pkrtz_f16_f32_e64 v1, v3, 1.0 ; D25E0001 0001E503 exp 15, 0, 1, 0, 0, v0, v1, v0, v1 ; F800040F 01000100 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v2, v2 ; 5E000502 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 64 Code Size: 1944 bytes LDS: 0 blocks Scratch: 0 bytes per wave ********************