-------------------------------------------------------------- bytecode 12 dw -- 3 gprs --------------------- shader 0 -- 6 0000 00000002 81000400 VTX 2 @4 0004 7C00A000 88CD1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7C00A000 88CD1002 00080010 VFETCH R2.xyzw, R0.x +16b, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0002 00000000 8A000000 RET @0 -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- bytecode 12 dw -- 3 gprs --------------------- shader 1 -- 6 0000 00000002 81000400 VTX 2 @4 0004 7C00A000 88CD1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7C00A000 D88D1002 00080010 VFETCH R2.xyzw, R0.x +16b, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:1 MODE:1) 0002 00000000 8A000000 RET @0 -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- bytecode 12 dw -- 3 gprs --------------------- shader 2 -- 6 0000 00000002 81000400 VTX 2 @4 0004 7C00A000 88CD1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7C00A000 988D1002 00080010 VFETCH R2.xyzw, R0.x +16b, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1) 0002 00000000 8A000000 RET @0 -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- bytecode 8 dw -- 2 gprs --------------------- shader 3 -- 6 0000 00000002 81000000 VTX 1 @4 0004 7C00A000 93564001 00080000 VFETCH R1.x001, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:13 NUM:1 COMP:0 MODE:1) 0002 00000000 8A000000 RET @0 -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) %12 = insertelement <4 x float> undef, float %4, i32 0 %13 = insertelement <4 x float> %12, float %5, i32 1 %14 = insertelement <4 x float> %13, float %6, i32 2 %15 = insertelement <4 x float> %14, float %7, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %15, i32 0, i32 2) ret void } declare float @llvm.R600.load.input(i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T2_W in %vreg0, %T2_Z in %vreg1, %T2_Y in %vreg2, %T2_X in %vreg3, %T1_W in %vreg4, %T1_Z in %vreg5, %T1_Y in %vreg6, %T1_X in %vreg7 BB#0: derived from LLVM BB %main_body Live Ins: %T2_W %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X %T1_X = KILL %T1_X, %T1_XYZW %T1_Y = KILL %T1_Y, %T1_XYZW, %T1_XYZW %T1_Z = KILL %T1_Z, %T1_XYZW, %T1_XYZW %T1_W = KILL %T1_W, %T1_XYZW, %T1_XYZW %T2_X = KILL %T2_X, %T2_XYZW %T2_Y = KILL %T2_Y, %T2_XYZW, %T2_XYZW %T2_Z = KILL %T2_Z, %T2_XYZW, %T2_XYZW %T2_W = KILL %T2_W, %T2_XYZW, %T2_XYZW R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T2_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 6 dw -- 3 gprs --------------------- shader 4 -- 6 0000 00000000 89800000 CALL_FS @0 0002 C000A03C 94000688 EXPORT_DONE POS 60 R1.xyzw ES:3 0004 C0014000 94200688 EXPORT_DONE PARAM 0 R2.xyzw ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END STREAMOUT 0: MEM_STREAM0_BUF0[0..0] <- OUT[0].x ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.stream.output(<4 x float> %7, i32 0, i32 0, i32 1) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) call void @llvm.R600.store.dummy(i32 2) ret void } declare float @llvm.R600.load.input(i32) readnone declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) declare void @llvm.R600.store.dummy(i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T1_W %T1_Z %T1_Y %T1_X %T1_X = KILL %T1_X, %T1_XYZW %T1_Y = KILL %T1_Y, %T1_XYZW, %T1_XYZW %T1_Z = KILL %T1_Z, %T1_XYZW, %T1_XYZW %T1_W = KILL %T1_W, %T1_XYZW, %T1_XYZW R600_ExportBuf %T1_XYZW, 0, 0, 4095, 1, 32, 0 R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T0_XYZW, 2, 0, 7, 7, 7, 7, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 8 dw -- 2 gprs --------------------- shader 5 -- 6 0000 00000000 89800000 CALL_FS @0 0002 00008000 90001FFF MEM_STREAM0 WRITE 0 R1.x___ ES:0 0004 C000A03C 94000688 EXPORT_DONE POS 60 R1.xyzw ES:3 0006 C0004000 94200FFF EXPORT_DONE PARAM 0 R0.____ ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- FRAG DCL IN[0], GENERIC[0], CONSTANT 0: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) call void @llvm.R600.store.dummy(i32 0) ret void } declare float @llvm.R600.load.input(i32) readnone declare void @llvm.R600.store.dummy(i32) # Machine code for function main: Post SSA, not tracking liveness BB#0: derived from LLVM BB %main_body R600_ExportSwz %T0_XYZW, 0, 0, 7, 7, 7, 7, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 2 dw -- 1 gprs --------------------- shader 6 -- 6 0000 C0000000 94200FFF EXPORT_DONE PIXEL 0 R0.____ ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV_SAT OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) %4 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00) %5 = call float @llvm.AMDIL.clamp.(float %1, float 0.000000e+00, float 1.000000e+00) %6 = call float @llvm.AMDIL.clamp.(float %2, float 0.000000e+00, float 1.000000e+00) %7 = call float @llvm.AMDIL.clamp.(float %3, float 0.000000e+00, float 1.000000e+00) %8 = insertelement <4 x float> undef, float %4, i32 0 %9 = insertelement <4 x float> %8, float %5, i32 1 %10 = insertelement <4 x float> %9, float %6, i32 2 %11 = insertelement <4 x float> %10, float %7, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0) ret void } declare float @llvm.R600.load.input(i32) readnone declare float @llvm.AMDIL.clamp.(float, float, float) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T0_W %T0_Z %T0_Y %T0_X %T1_X = MOV 1, 0, 0, 1, %T0_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T1_XYZW %T1_Y = MOV 1, 0, 0, 1, %T0_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T1_XYZW, %T1_XYZW %T1_Z = MOV 1, 0, 0, 1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T1_XYZW, %T1_XYZW %T1_W = MOV 1, 0, 0, 1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T1_XYZW, %T1_XYZW R600_ExportSwz %T1_XYZW, 0, 0, 0, 1, 2, 3, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 12 dw -- 2 gprs --------------------- shader 7 -- 6 0000 00000002 A00C0000 ALU 4 @4 0004 00000000 80200C90 1 SETE_DX10*4_sat R1.x, R0.x, R0.x 0006 00000400 A0200C90 SETE_DX10*4_sat R1.y, R0.y, R0.x 0008 00000800 C0200C90 SETE_DX10*4_sat R1.z, R0.z, R0.x 0010 80000C00 E0200C90 SETE_DX10*4_sat R1.w, R0.w, R0.x 0002 C0008000 94200688 EXPORT_DONE PIXEL 0 R1.xyzw ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..10] DCL TEMP[0..6] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: DP3 TEMP[1].x, IN[1], IN[1] 5: RSQ TEMP[1].x, TEMP[1] 6: MUL TEMP[0], IN[1], TEMP[1].xxxx 7: MOV TEMP[2].w, IN[6].xxxx 8: MOV TEMP[3], IN[3] 9: MAD TEMP[3].xyz, CONST[4], IN[2], IN[5] 10: MOV_SAT OUT[1], TEMP[3] 11: DP3 TEMP[2].x, TEMP[0], CONST[5] 12: DP3 TEMP[2].y, TEMP[0], CONST[7] 13: MUL TEMP[4], CONST[8], IN[2] 14: MUL TEMP[5], CONST[9], IN[3] 15: MUL TEMP[6], CONST[10], IN[4] 16: LIT TEMP[1], TEMP[2] 17: ADD TEMP[3], TEMP[4], TEMP[3] 18: MAD TEMP[3], TEMP[1].yyyy, TEMP[5], TEMP[3] 19: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, TEMP[6], TEMP[3] 20: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = call float @llvm.R600.load.input(i32 12) %9 = call float @llvm.R600.load.input(i32 13) %10 = call float @llvm.R600.load.input(i32 14) %11 = call float @llvm.R600.load.input(i32 15) %12 = call float @llvm.R600.load.input(i32 16) %13 = call float @llvm.R600.load.input(i32 17) %14 = call float @llvm.R600.load.input(i32 18) %15 = call float @llvm.R600.load.input(i32 19) %16 = call float @llvm.R600.load.input(i32 20) %17 = call float @llvm.R600.load.input(i32 21) %18 = call float @llvm.R600.load.input(i32 22) %19 = call float @llvm.R600.load.input(i32 23) %20 = call float @llvm.R600.load.input(i32 24) %21 = call float @llvm.R600.load.input(i32 25) %22 = call float @llvm.R600.load.input(i32 26) %23 = call float @llvm.R600.load.input(i32 27) %24 = call float @llvm.R600.load.input(i32 28) %25 = call float @llvm.R600.load.input(i32 29) %26 = call float @llvm.R600.load.input(i32 30) %27 = call float @llvm.R600.load.input(i32 31) %28 = load <4 x float> addrspace(9)* null %29 = extractelement <4 x float> %28, i32 0 %30 = fmul float %0, %29 %31 = load <4 x float> addrspace(9)* null %32 = extractelement <4 x float> %31, i32 1 %33 = fmul float %0, %32 %34 = load <4 x float> addrspace(9)* null %35 = extractelement <4 x float> %34, i32 2 %36 = fmul float %0, %35 %37 = load <4 x float> addrspace(9)* null %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %0, %38 %40 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %41 = extractelement <4 x float> %40, i32 0 %42 = fmul float %1, %41 %43 = fadd float %42, %30 %44 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %45 = extractelement <4 x float> %44, i32 1 %46 = fmul float %1, %45 %47 = fadd float %46, %33 %48 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %49 = extractelement <4 x float> %48, i32 2 %50 = fmul float %1, %49 %51 = fadd float %50, %36 %52 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %53 = extractelement <4 x float> %52, i32 3 %54 = fmul float %1, %53 %55 = fadd float %54, %39 %56 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %57 = extractelement <4 x float> %56, i32 0 %58 = fmul float %2, %57 %59 = fadd float %58, %43 %60 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %61 = extractelement <4 x float> %60, i32 1 %62 = fmul float %2, %61 %63 = fadd float %62, %47 %64 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %65 = extractelement <4 x float> %64, i32 2 %66 = fmul float %2, %65 %67 = fadd float %66, %51 %68 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %69 = extractelement <4 x float> %68, i32 3 %70 = fmul float %2, %69 %71 = fadd float %70, %55 %72 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %73 = extractelement <4 x float> %72, i32 0 %74 = fmul float %3, %73 %75 = fadd float %74, %59 %76 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %77 = extractelement <4 x float> %76, i32 1 %78 = fmul float %3, %77 %79 = fadd float %78, %63 %80 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %81 = extractelement <4 x float> %80, i32 2 %82 = fmul float %3, %81 %83 = fadd float %82, %67 %84 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %85 = extractelement <4 x float> %84, i32 3 %86 = fmul float %3, %85 %87 = fadd float %86, %71 %88 = insertelement <4 x float> undef, float %4, i32 0 %89 = insertelement <4 x float> %88, float %5, i32 1 %90 = insertelement <4 x float> %89, float %6, i32 2 %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3 %92 = insertelement <4 x float> undef, float %4, i32 0 %93 = insertelement <4 x float> %92, float %5, i32 1 %94 = insertelement <4 x float> %93, float %6, i32 2 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) %97 = call float @fabs(float %96) %98 = call float @llvm.AMDGPU.rsq(float %97) %99 = fmul float %4, %98 %100 = fmul float %5, %98 %101 = fmul float %6, %98 %102 = fmul float %7, %98 %103 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %104 = extractelement <4 x float> %103, i32 0 %105 = fmul float %104, %8 %106 = fadd float %105, %20 %107 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %108 = extractelement <4 x float> %107, i32 1 %109 = fmul float %108, %9 %110 = fadd float %109, %21 %111 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %112 = extractelement <4 x float> %111, i32 2 %113 = fmul float %112, %10 %114 = fadd float %113, %22 %115 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00) %116 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.AMDIL.clamp.(float %114, float 0.000000e+00, float 1.000000e+00) %118 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) %119 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 5) %120 = extractelement <4 x float> %119, i32 0 %121 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 5) %122 = extractelement <4 x float> %121, i32 1 %123 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 5) %124 = extractelement <4 x float> %123, i32 2 %125 = insertelement <4 x float> undef, float %99, i32 0 %126 = insertelement <4 x float> %125, float %100, i32 1 %127 = insertelement <4 x float> %126, float %101, i32 2 %128 = insertelement <4 x float> %127, float 0.000000e+00, i32 3 %129 = insertelement <4 x float> undef, float %120, i32 0 %130 = insertelement <4 x float> %129, float %122, i32 1 %131 = insertelement <4 x float> %130, float %124, i32 2 %132 = insertelement <4 x float> %131, float 0.000000e+00, i32 3 %133 = call float @llvm.AMDGPU.dp4(<4 x float> %128, <4 x float> %132) %134 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %135 = extractelement <4 x float> %134, i32 0 %136 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %137 = extractelement <4 x float> %136, i32 1 %138 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %139 = extractelement <4 x float> %138, i32 2 %140 = insertelement <4 x float> undef, float %99, i32 0 %141 = insertelement <4 x float> %140, float %100, i32 1 %142 = insertelement <4 x float> %141, float %101, i32 2 %143 = insertelement <4 x float> %142, float 0.000000e+00, i32 3 %144 = insertelement <4 x float> undef, float %135, i32 0 %145 = insertelement <4 x float> %144, float %137, i32 1 %146 = insertelement <4 x float> %145, float %139, i32 2 %147 = insertelement <4 x float> %146, float 0.000000e+00, i32 3 %148 = call float @llvm.AMDGPU.dp4(<4 x float> %143, <4 x float> %147) %149 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 8) %150 = extractelement <4 x float> %149, i32 0 %151 = fmul float %150, %8 %152 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 8) %153 = extractelement <4 x float> %152, i32 1 %154 = fmul float %153, %9 %155 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 8) %156 = extractelement <4 x float> %155, i32 2 %157 = fmul float %156, %10 %158 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 8) %159 = extractelement <4 x float> %158, i32 3 %160 = fmul float %159, %11 %161 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %162 = extractelement <4 x float> %161, i32 0 %163 = fmul float %162, %12 %164 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %165 = extractelement <4 x float> %164, i32 1 %166 = fmul float %165, %13 %167 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %168 = extractelement <4 x float> %167, i32 2 %169 = fmul float %168, %14 %170 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %171 = extractelement <4 x float> %170, i32 3 %172 = fmul float %171, %15 %173 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %174 = extractelement <4 x float> %173, i32 0 %175 = fmul float %174, %16 %176 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %177 = extractelement <4 x float> %176, i32 1 %178 = fmul float %177, %17 %179 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %180 = extractelement <4 x float> %179, i32 2 %181 = fmul float %180, %18 %182 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %183 = extractelement <4 x float> %182, i32 3 %184 = fmul float %183, %19 %185 = fcmp uge float %133, 0.000000e+00 %186 = select i1 %185, float %133, float 0.000000e+00 %187 = fcmp uge float %148, 0.000000e+00 %188 = select i1 %187, float %148, float 0.000000e+00 %189 = call float @llvm.pow.f32(float %188, float %24) %190 = fcmp ult float %133, 0.000000e+00 %191 = select i1 %190, float 0.000000e+00, float %189 %192 = fadd float %151, %106 %193 = fadd float %154, %110 %194 = fadd float %157, %114 %195 = fadd float %160, %15 %196 = fmul float %186, %163 %197 = fadd float %196, %192 %198 = fmul float %186, %166 %199 = fadd float %198, %193 %200 = fmul float %186, %169 %201 = fadd float %200, %194 %202 = fmul float %186, %172 %203 = fadd float %202, %195 %204 = fmul float %191, %175 %205 = fadd float %204, %197 %206 = fmul float %191, %178 %207 = fadd float %206, %199 %208 = fmul float %191, %181 %209 = fadd float %208, %201 %210 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00) %211 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %212 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %213 = insertelement <4 x float> undef, float %75, i32 0 %214 = insertelement <4 x float> %213, float %79, i32 1 %215 = insertelement <4 x float> %214, float %83, i32 2 %216 = insertelement <4 x float> %215, float %87, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %216, i32 60, i32 1) %217 = insertelement <4 x float> undef, float %210, i32 0 %218 = insertelement <4 x float> %217, float %211, i32 1 %219 = insertelement <4 x float> %218, float %212, i32 2 %220 = insertelement <4 x float> %219, float %118, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %220, i32 0, i32 2) ret void } declare float @llvm.R600.load.input(i32) readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) readnone declare float @fabs(float) readonly declare float @llvm.AMDGPU.rsq(float) readnone declare float @llvm.AMDIL.clamp.(float, float, float) readnone declare float @llvm.pow.f32(float, float) nounwind readonly declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T7_X in %vreg0, %T6_Z in %vreg1, %T6_Y in %vreg2, %T6_X in %vreg3, %T5_Z in %vreg4, %T5_Y in %vreg5, %T5_X in %vreg6, %T4_W in %vreg7, %T4_Z in %vreg8, %T4_Y in %vreg9, %T4_X in %vreg10, %T3_Z in %vreg11, %T3_Y in %vreg12, %T3_X in %vreg13, %T2_Z in %vreg14, %T2_Y in %vreg15, %T2_X in %vreg16, %T1_W in %vreg17, %T1_Z in %vreg18, %T1_Y in %vreg19, %T1_X in %vreg20 BB#0: derived from LLVM BB %main_body Live Ins: %T7_X %T6_Z %T6_Y %T6_X %T5_Z %T5_Y %T5_X %T4_W %T4_Z %T4_Y %T4_X %T3_Z %T3_Y %T3_X %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X %T2_X = KILL %T2_X, %T2_XYZW %T2_Y = KILL %T2_Y, %T2_XYZW, %T2_XYZW %T2_Z = KILL %T2_Z, %T2_XYZW, %T2_XYZW %T2_W = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T2_X, %PRED_SEL_OFF, %T2_Y, %T2_Z, %T2_W * %T0_X = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T3_W = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T0_X, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW, %T0_XYZW %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW, %T0_XYZW %T0_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW BUNDLE %T3_X, %T3_Y, %T3_Z, %T3_W, %T0_X, %T2_X, %PRED_SEL_OFF, %T0_Y, %T2_Y, %T0_Z, %T2_Z, %T0_W, %T2_W * %T3_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2076, 0, pred:%PRED_SEL_OFF, 0; flags: * %T3_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2077, 0, pred:%PRED_SEL_OFF, 0; flags: * %T3_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2078, 0, pred:%PRED_SEL_OFF, 0; flags: * %T3_W = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T3_W = CNDGE_r600 0, 0, %T3_W, 0, 0, -1, %T3_W, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_W = LOG_IEEE_r600 1, 0, 0, 0, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T8_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T8_XYZW %T2_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T7_X, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T0_X, %T8_X, %PRED_SEL_OFF, %T0_Y, %T8_Y, %T0_Z, %T8_Z, %T0_W, %T8_W * %T0_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2068, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Y = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2069, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2070, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T8_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2064, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = ADD 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T6_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2048, 1, pred:%PRED_SEL_OFF, 0 %T0_X = EXP_IEEE_r600 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = CNDGE_r600 0, 0, %T0_Y, 0, 0, -1, %T0_X, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T2_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2052, 1, pred:%PRED_SEL_OFF, 0 %T2_X = ADD 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = CNDGE_r600 0, 0, %T0_Y, 0, 0, -1, %T0_Y, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2049, 1, pred:%PRED_SEL_OFF, 0 %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2084, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2053, 1, pred:%PRED_SEL_OFF, 0 %T3_W = ADD 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2080, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = ADD 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_W = ADD 0, 0, 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2065, %T3_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_X = ADD 0, 0, 1, 0, 0, 0, %T0_Z, 0, 0, 0, -1, %T6_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2056, 1, pred:%PRED_SEL_OFF, 0 %T2_X = ADD 0, 0, 1, 0, 0, 0, %T0_Z, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2050, 1, pred:%PRED_SEL_OFF, 0 %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_W, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2060, 1, pred:%PRED_SEL_OFF, 0 %T2_X = ADD 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW %T3_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2054, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = ADD 0, 0, 1, 0, 0, 0, %T3_X, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2066, %T3_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_X = ADD 0, 0, 1, 0, 0, 0, %T3_X, 0, 0, 0, -1, %T6_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2085, %T4_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T4_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T5_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2057, 1, pred:%PRED_SEL_OFF, 0 %T3_W = ADD 0, 0, 1, 0, 0, 0, %T5_W, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2081, %T3_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_Y = ADD 0, 0, 1, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_Y = ADD 0, 0, 1, 0, 0, 0, %T4_Y, 0, 0, 0, -1, %T3_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2088, %T5_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T6_X = ADD 0, 0, 1, 0, 0, 1, %T4_X, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T6_XYZW %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0 %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2089, %T5_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T6_Y = ADD 0, 0, 1, 0, 0, 1, %T1_X, 0, 0, 0, -1, %T3_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T6_XYZW, %T6_XYZW %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_W, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2061, 1, pred:%PRED_SEL_OFF, 0 %T2_Y = ADD 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2055, 1, pred:%PRED_SEL_OFF, 0 %T0_W = ADD 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2082, %T3_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_X = ADD 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2058, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = ADD 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2086, %T4_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T1_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T1_X = ADD 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2059, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = ADD 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_W, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2062, 1, pred:%PRED_SEL_OFF, 0 %T2_Z = ADD 0, 0, 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2090, %T5_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T6_Z = ADD 0, 0, 1, 0, 0, 1, %T0_X, 0, 0, 0, -1, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T6_XYZW, %T6_XYZW %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_W, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2063, 1, pred:%PRED_SEL_OFF, 0 %T2_W = ADD 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T0_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW %T6_W = MOV 1, 0, 0, 1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T6_XYZW, %T6_XYZW R600_ExportSwz %T2_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T6_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 176 dw -- 9 gprs --------------------- shader 8 -- 6 0000 00000000 89800000 CALL_FS @0 0002 40000004 A14C0000 ALU 84 @8 KC0[CB0:0-16] 0008 800000F8 60400C90 1 SETE_DX10*4 R2.w, 0, R0.x 0010 00004002 00002810 2 PRED_SETE_PUSH R0.x, R2.x, R2.x 0012 00804402 20002800 PRED_SETE_PUSH __.y, R2.y, R2.y 0014 01004802 40002800 PRED_SETE_PUSH __.z, R2.z, R2.z 0016 819FCCFE 60002800 PRED_SETE_PUSH __.w, PV.w, PV.w 0018 800000FE 60603391 3 NOT_INT*4 R3.w, |PV.x| 0020 019FC002 00000110 4 MUL R0.x, R2.x, PV.w 0022 019FC402 20000110 MUL R0.y, R2.y, PV.w 0024 819FC802 40000110 MUL R0.z, R2.z, PV.w 0026 801FCC02 60000C90 5 SETE_DX10*4 R0.w, R2.w, PV.x 0028 0010E000 00602800 6 PRED_SETE_PUSH __.x, R0.x, KC0[7].x 0030 0090E400 20602800 PRED_SETE_PUSH __.y, R0.y, KC0[7].y 0032 0110E800 40602800 PRED_SETE_PUSH __.z, R0.z, KC0[7].z 0034 81804CFE 60602810 PRED_SETE_PUSH R3.w, PV.w, R2.w 0036 819FCCFE 606340F8 7 CNDGE R3.w, PV.w, PV.w, 0 0038 00000CFE 60603190 8 OR_INT*4 R3.w, PV.w, R0.x 0040 80000C02 61000C90 SETE_DX10*4 R8.w, R2.w, R0.x 0042 019FC007 00400110 9 MUL R2.x, R7.x, PV.w 0044 0090A400 20002810 PRED_SETE_PUSH R0.y, R0.y, KC0[5].y 0046 0110A800 40002800 PRED_SETE_PUSH __.z, R0.z, KC0[5].z 0048 001FEC00 60002800 PRED_SETE_PUSH __.w, R0.w, PS 0050 8010A000 00002800 PRED_SETE_PUSH __.x, R0.x, KC0[5].x 0052 80006084 00000110 10 MUL R0.x, KC0[4].x, R3.x 0054 0000C0FE 40000010 11 ADD R0.z, PV.x, R6.x 0056 80100001 60000110 MUL R0.w, R1.x, KC0[0].x 0058 80000002 00003090 12 AND_INT*4 R0.x, R2.x, R0.x 0060 001FC400 000340F8 13 CNDGE R0.x, R0.y, PV.x, 0 0062 80102401 00400110 MUL R2.x, R1.y, KC0[1].x 0064 018000FF 00400010 14 ADD R2.x, PS, R0.w 0066 00800400 200340F8 CNDGE R0.y, R0.y, R0.y, 0 0068 00008089 60000110 MUL R0.w, KC0[9].x, R4.x 0070 80900001 20400110 MUL R2.y, R1.x, KC0[0].y 0072 00902401 40400110 15 MUL R2.z, R1.y, KC0[1].y 0074 819FC4FE 60000110 MUL R0.w, PV.y, PV.w 0076 00006088 20400110 16 MUL R2.y, KC0[8].x, R3.x 0078 808048FE 60600010 ADD R3.w, PV.z, R2.y 0080 810004FE 40000010 17 ADD R0.z, PV.y, R0.z 0082 00806484 40000110 18 MUL R0.z, KC0[4].y, R3.y 0084 811FCC00 60000010 ADD R0.w, R0.w, PV.z 0086 0080C8FE 00800010 19 ADD R4.x, PV.z, R6.y 0088 80104801 40000110 MUL R0.z, R1.z, KC0[2].x 0090 000048FE 00400010 20 ADD R2.x, PV.z, R2.x 0092 00106C01 20400110 MUL R2.y, R1.w, KC0[3].x 0094 81100001 40000110 MUL R0.z, R1.x, KC0[0].z 0096 001FC4FE 00400010 21 ADD R2.x, PV.y, PV.x 0098 81102401 00600110 MUL R3.x, R1.y, KC0[1].z 0100 01006884 00640110 22 MUL R3.x, KC0[4].z, R3.z BS:1 0102 810000FF 40000010 ADD R0.z, PS, R0.z 0104 0100C0FE 00600010 23 ADD R3.x, PV.x, R6.z 0106 80808489 20800110 MUL R4.y, KC0[9].y, R4.y 0108 009FC400 20800110 24 MUL R4.y, R0.y, PV.y 0110 80904801 60A00110 MUL R5.w, R1.z, KC0[2].y 0112 00806488 20600110 25 MUL R3.y, KC0[8].y, R3.y 0114 81806CFE 60600010 ADD R3.w, PV.w, R3.w 0116 800084FE 20600010 26 ADD R3.y, PV.y, R4.x 0118 0000A08A 00800110 27 MUL R4.x, KC0[10].x, R5.x 0120 809FC404 20600010 ADD R3.y, R4.y, PV.y 0122 801FC000 00800110 28 MUL R4.x, R0.x, PV.x 0124 018000FE 80C00010 29 ADD_sat R6.x, PV.x, R0.w 0126 01900001 60000110 MUL R0.w, R1.x, KC0[0].w 0128 8080A48A 00200110 MUL R1.x, KC0[10].y, R5.y 0130 801FE000 00200110 30 MUL R1.x, R0.x, PS 0132 00906C01 00200110 31 MUL R1.x, R1.w, KC0[3].y 0134 808060FE A0C00010 ADD_sat R6.y, PV.x, R3.y 0136 01902401 00200110 32 MUL R1.x, R1.y, KC0[1].w 0138 818060FE 20400010 ADD R2.y, PV.x, R3.w 0140 01006888 00200110 33 MUL R1.x, KC0[8].z, R3.z 0142 818000FE 60000010 ADD R0.w, PV.x, R0.w 0144 000060FE 00200010 34 ADD R1.x, PV.x, R3.x 0146 81104801 20200110 MUL R1.y, R1.z, KC0[2].z 0148 01008889 20240110 35 MUL R1.y, KC0[9].z, R4.z BS:1 0150 810004FE 40000010 ADD R0.z, PV.y, R0.z 0152 809FC400 20000110 36 MUL R0.y, R0.y, PV.y 0154 000024FE 00200010 37 ADD R1.x, PV.y, R1.x 0156 81904801 20000110 MUL R0.y, R1.z, KC0[2].w 0158 018004FE 20000010 38 ADD R0.y, PV.y, R0.w 0160 81106C01 60000110 MUL R0.w, R1.w, KC0[3].z 0162 01000CFE 40440010 39 ADD R2.z, PV.w, R0.z BS:1 0164 8100A88A 40000110 MUL R0.z, KC0[10].z, R5.z 0166 801FE000 00000110 40 MUL R0.x, R0.x, PS 0168 01906C01 00000110 41 MUL R0.x, R1.w, KC0[3].w 0170 800020FE C0C00010 ADD_sat R6.z, PV.x, R1.x 0172 008000FE 60400010 42 ADD R2.w, PV.x, R0.y 0174 801FCC04 E0C00C90 SETE_DX10*4_sat R6.w, R4.w, PV.x 0004 C001203C 94000688 EXPORT_DONE POS 60 R2.xyzw ES:3 0006 C0034000 94200688 EXPORT_DONE PARAM 0 R6.xyzw ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0) ret void } declare float @llvm.R600.load.input(i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T0_W %T0_Z %T0_Y %T0_X %T0_X = KILL %T0_X, %T0_XYZW %T0_Y = KILL %T0_Y, %T0_XYZW, %T0_XYZW %T0_Z = KILL %T0_Z, %T0_XYZW, %T0_XYZW %T0_W = KILL %T0_W, %T0_XYZW, %T0_XYZW R600_ExportSwz %T0_XYZW, 0, 0, 0, 1, 2, 3, 40, 1 RETURN # End machine code for function main. -------------------------------------------------------------- bytecode 2 dw -- 1 gprs --------------------- shader 9 -- 6 0000 C0000000 94200688 EXPORT_DONE PIXEL 0 R0.xyzw ES:3 EOP -------------------------------------- ______________________________________________________________ -------------------------------------------------------------- VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..12] DCL TEMP[0..3] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: DP3 TEMP[1].x, CONST[4], CONST[4] 5: RSQ TEMP[1].x, TEMP[1] 6: MUL TEMP[0], CONST[4], TEMP[1].xxxx 7: MOV TEMP[2].w, CONST[5].xxxx 8: MOV TEMP[3], CONST[6] 9: MOV_SAT OUT[1], TEMP[3] 10: DP3 TEMP[2].x, TEMP[0], CONST[7] 11: DP3 TEMP[2].y, TEMP[0], CONST[9] 12: LIT TEMP[1], TEMP[2] 13: ADD TEMP[3], CONST[10], TEMP[3] 14: MAD TEMP[3], TEMP[1].yyyy, CONST[11], TEMP[3] 15: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, CONST[12], TEMP[3] 16: END ; ModuleID = 'tgsi' define void @main() { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = load <4 x float> addrspace(9)* null %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %0, %5 %7 = load <4 x float> addrspace(9)* null %8 = extractelement <4 x float> %7, i32 1 %9 = fmul float %0, %8 %10 = load <4 x float> addrspace(9)* null %11 = extractelement <4 x float> %10, i32 2 %12 = fmul float %0, %11 %13 = load <4 x float> addrspace(9)* null %14 = extractelement <4 x float> %13, i32 3 %15 = fmul float %0, %14 %16 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %17 = extractelement <4 x float> %16, i32 0 %18 = fmul float %1, %17 %19 = fadd float %18, %6 %20 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %21 = extractelement <4 x float> %20, i32 1 %22 = fmul float %1, %21 %23 = fadd float %22, %9 %24 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %25 = extractelement <4 x float> %24, i32 2 %26 = fmul float %1, %25 %27 = fadd float %26, %12 %28 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) %29 = extractelement <4 x float> %28, i32 3 %30 = fmul float %1, %29 %31 = fadd float %30, %15 %32 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = fmul float %2, %33 %35 = fadd float %34, %19 %36 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %37 = extractelement <4 x float> %36, i32 1 %38 = fmul float %2, %37 %39 = fadd float %38, %23 %40 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %41 = extractelement <4 x float> %40, i32 2 %42 = fmul float %2, %41 %43 = fadd float %42, %27 %44 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) %45 = extractelement <4 x float> %44, i32 3 %46 = fmul float %2, %45 %47 = fadd float %46, %31 %48 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %49 = extractelement <4 x float> %48, i32 0 %50 = fmul float %3, %49 %51 = fadd float %50, %35 %52 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %53 = extractelement <4 x float> %52, i32 1 %54 = fmul float %3, %53 %55 = fadd float %54, %39 %56 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %57 = extractelement <4 x float> %56, i32 2 %58 = fmul float %3, %57 %59 = fadd float %58, %43 %60 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) %61 = extractelement <4 x float> %60, i32 3 %62 = fmul float %3, %61 %63 = fadd float %62, %47 %64 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %65 = extractelement <4 x float> %64, i32 0 %66 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %67 = extractelement <4 x float> %66, i32 0 %68 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %69 = extractelement <4 x float> %68, i32 1 %70 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %71 = extractelement <4 x float> %70, i32 1 %72 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %73 = extractelement <4 x float> %72, i32 2 %74 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %75 = extractelement <4 x float> %74, i32 2 %76 = insertelement <4 x float> undef, float %65, i32 0 %77 = insertelement <4 x float> %76, float %69, i32 1 %78 = insertelement <4 x float> %77, float %73, i32 2 %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 %80 = insertelement <4 x float> undef, float %67, i32 0 %81 = insertelement <4 x float> %80, float %71, i32 1 %82 = insertelement <4 x float> %81, float %75, i32 2 %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83) %85 = call float @fabs(float %84) %86 = call float @llvm.AMDGPU.rsq(float %85) %87 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %88 = extractelement <4 x float> %87, i32 0 %89 = fmul float %88, %86 %90 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %91 = extractelement <4 x float> %90, i32 1 %92 = fmul float %91, %86 %93 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %94 = extractelement <4 x float> %93, i32 2 %95 = fmul float %94, %86 %96 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 4) %97 = extractelement <4 x float> %96, i32 3 %98 = fmul float %97, %86 %99 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 5) %100 = extractelement <4 x float> %99, i32 0 %101 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 6) %102 = extractelement <4 x float> %101, i32 0 %103 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 6) %104 = extractelement <4 x float> %103, i32 1 %105 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 6) %106 = extractelement <4 x float> %105, i32 2 %107 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 6) %108 = extractelement <4 x float> %107, i32 3 %109 = call float @llvm.AMDIL.clamp.(float %102, float 0.000000e+00, float 1.000000e+00) %110 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %111 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00) %112 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00) %113 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %114 = extractelement <4 x float> %113, i32 0 %115 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %116 = extractelement <4 x float> %115, i32 1 %117 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 7) %118 = extractelement <4 x float> %117, i32 2 %119 = insertelement <4 x float> undef, float %89, i32 0 %120 = insertelement <4 x float> %119, float %92, i32 1 %121 = insertelement <4 x float> %120, float %95, i32 2 %122 = insertelement <4 x float> %121, float 0.000000e+00, i32 3 %123 = insertelement <4 x float> undef, float %114, i32 0 %124 = insertelement <4 x float> %123, float %116, i32 1 %125 = insertelement <4 x float> %124, float %118, i32 2 %126 = insertelement <4 x float> %125, float 0.000000e+00, i32 3 %127 = call float @llvm.AMDGPU.dp4(<4 x float> %122, <4 x float> %126) %128 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %129 = extractelement <4 x float> %128, i32 0 %130 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %131 = extractelement <4 x float> %130, i32 1 %132 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 9) %133 = extractelement <4 x float> %132, i32 2 %134 = insertelement <4 x float> undef, float %89, i32 0 %135 = insertelement <4 x float> %134, float %92, i32 1 %136 = insertelement <4 x float> %135, float %95, i32 2 %137 = insertelement <4 x float> %136, float 0.000000e+00, i32 3 %138 = insertelement <4 x float> undef, float %129, i32 0 %139 = insertelement <4 x float> %138, float %131, i32 1 %140 = insertelement <4 x float> %139, float %133, i32 2 %141 = insertelement <4 x float> %140, float 0.000000e+00, i32 3 %142 = call float @llvm.AMDGPU.dp4(<4 x float> %137, <4 x float> %141) %143 = fcmp uge float %127, 0.000000e+00 %144 = select i1 %143, float %127, float 0.000000e+00 %145 = fcmp uge float %142, 0.000000e+00 %146 = select i1 %145, float %142, float 0.000000e+00 %147 = call float @llvm.pow.f32(float %146, float %100) %148 = fcmp ult float %127, 0.000000e+00 %149 = select i1 %148, float 0.000000e+00, float %147 %150 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %151 = extractelement <4 x float> %150, i32 0 %152 = fadd float %151, %102 %153 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %154 = extractelement <4 x float> %153, i32 1 %155 = fadd float %154, %104 %156 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %157 = extractelement <4 x float> %156, i32 2 %158 = fadd float %157, %106 %159 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 10) %160 = extractelement <4 x float> %159, i32 3 %161 = fadd float %160, %108 %162 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 11) %163 = extractelement <4 x float> %162, i32 0 %164 = fmul float %144, %163 %165 = fadd float %164, %152 %166 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 11) %167 = extractelement <4 x float> %166, i32 1 %168 = fmul float %144, %167 %169 = fadd float %168, %155 %170 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 11) %171 = extractelement <4 x float> %170, i32 2 %172 = fmul float %144, %171 %173 = fadd float %172, %158 %174 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 11) %175 = extractelement <4 x float> %174, i32 3 %176 = fmul float %144, %175 %177 = fadd float %176, %161 %178 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 12) %179 = extractelement <4 x float> %178, i32 0 %180 = fmul float %149, %179 %181 = fadd float %180, %165 %182 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 12) %183 = extractelement <4 x float> %182, i32 1 %184 = fmul float %149, %183 %185 = fadd float %184, %169 %186 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 12) %187 = extractelement <4 x float> %186, i32 2 %188 = fmul float %149, %187 %189 = fadd float %188, %173 %190 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %191 = call float @llvm.AMDIL.clamp.(float %185, float 0.000000e+00, float 1.000000e+00) %192 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00) %193 = insertelement <4 x float> undef, float %51, i32 0 %194 = insertelement <4 x float> %193, float %55, i32 1 %195 = insertelement <4 x float> %194, float %59, i32 2 %196 = insertelement <4 x float> %195, float %63, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %196, i32 60, i32 1) %197 = insertelement <4 x float> undef, float %190, i32 0 %198 = insertelement <4 x float> %197, float %191, i32 1 %199 = insertelement <4 x float> %198, float %192, i32 2 %200 = insertelement <4 x float> %199, float %112, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %200, i32 0, i32 2) ret void } declare float @llvm.R600.load.input(i32) readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) readnone declare float @fabs(float) readonly declare float @llvm.AMDGPU.rsq(float) readnone declare float @llvm.AMDIL.clamp.(float, float, float) readnone declare float @llvm.pow.f32(float, float) nounwind readonly declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T1_W %T1_Z %T1_Y %T1_X %T0_W = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW, %T0_XYZW BUNDLE %T2_X, %T2_Y, %T2_Z, %T2_W, %T0_X, %PRED_SEL_OFF, %T0_Y, %T0_Z, %T0_W * %T2_X = DOT4_r600_real 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2064, %ALU_CONST, 0, 0, 0, 2064, 0, pred:%PRED_SEL_OFF, 0; flags: * %T2_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2065, %ALU_CONST, 0, 0, 0, 2065, 0, pred:%PRED_SEL_OFF, 0; flags: * %T2_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2066, %ALU_CONST, 0, 0, 0, 2066, 0, pred:%PRED_SEL_OFF, 0; flags: * %T2_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T3_X = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T2_X, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0 %T2_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2064, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2065, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2066, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW, %T2_XYZW %T2_W = MOV 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T2_XYZW BUNDLE %T4_X, %T4_Y, %T4_Z, %T4_W, %T2_X, %T0_X, %PRED_SEL_OFF, %T2_Y, %T0_Y, %T2_Z, %T0_Z, %T2_W, %T0_W * %T4_X = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2084, 0, pred:%PRED_SEL_OFF, 0; flags: * %T4_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2085, 0, pred:%PRED_SEL_OFF, 0; flags: * %T4_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2086, 0, pred:%PRED_SEL_OFF, 0; flags: * %T4_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T4_X = CNDGE_r600 0, 0, %T4_X, 0, 0, -1, %T4_X, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T4_X = LOG_IEEE_r600 1, 0, 0, 0, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_W = MOV 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T3_XYZW %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2068, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T2_X, %T3_X, %PRED_SEL_OFF, %T2_Y, %T3_Y, %T2_Z, %T3_Z, %T2_W, %T3_W * %T0_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2076, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Y = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2077, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2078, 0, pred:%PRED_SEL_OFF, 0; flags: * %T0_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; flags: %T3_Y = CNDGE_r600 0, 0, %T0_Y, 0, 0, -1, %T0_Y, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2048, 1, pred:%PRED_SEL_OFF, 0 %T0_X = EXP_IEEE_r600 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_X = CNDGE_r600 0, 0, %T0_Y, 0, 0, -1, %T0_X, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2052, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = ADD 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2092, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2049, 1, pred:%PRED_SEL_OFF, 0 %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Z, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2056, 1, pred:%PRED_SEL_OFF, 0 %T2_X = ADD 0, 0, 1, 0, 0, 0, %T0_W, 0, 0, 0, -1, %T0_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2053, 1, pred:%PRED_SEL_OFF, 0 %T2_Y = ADD 0, 0, 1, 0, 0, 0, %T0_Y, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T3_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2093, 1, pred:%PRED_SEL_OFF, 0 %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2096, 1, pred:%PRED_SEL_OFF, 0 %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %ALU_CONST, 0, 0, 0, 2050, 1, pred:%PRED_SEL_OFF, 0 %T0_Z = ADD 0, 0, 1, 0, 0, 0, %ALU_CONST, 0, 0, 0, 2088, %ALU_CONST, 0, 0, 0, 2072, 1, pred:%PRED_SEL_OFF, 0 %T0_X = ADD 0, 0, 1, 0, 0, 0, %T0_X, 0, 0, 0, -1, %T0_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0 %T0_X = ADD 0, 0, 1, 0, 0, 1, %T0_Y, 0, 0, 0, -1, %T0_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, %T0_XYZW %T2_W