===== SHADER #200 OPT ====================================== PS/RS880/R600 ===== ===== 120 dw ===== 5 gprs ===== 0 stack ======================================== 0000 00000006 a0080000 ALU 3 @12 0012 80000c00 6f806610 1 t: RECIP_IEEE T0.w, R0.w 0014 018f8000 00000210 2 x: MUL_IEEE R0.x, R0.x, T0.w 0016 818f8400 20000210 y: MUL_IEEE R0.y, R0.y, T0.w 0002 0000000a 80800000 TEX 1 @20 0020 00001110 f01f9004 fc808000 SAMPLE R4.xy__, R0.xy__, RID:17, SID:1 CT:NNNN 0004 0000000c a0700000 ALU 29 @24 0024 801fa404 600204fd 3 w: MULADD R0.w, R4.y, [0x40000000 2].x, [0xbf800000 -1].y 0026 40000000 0027 bf800000 0028 801fa004 6fe204fd 4 w: MULADD T3.w, R4.x, [0x40000000 2].x, [0xbf800000 -1].y 0030 40000000 0031 bf800000 0032 018fec7f 00005000 5 x: DOT4 __.x, T3.w, T3.w 0034 01800c00 20085000 y: DOT4 __.y, R0.w, R0.w VEC_120 0036 001f00f8 40005000 z: DOT4 __.z, 0, 0 0038 801f00f8 6f805010 w: DOT4 T0.w, 0, 0 0040 801f3c7c efc00010 6 w: ADD_sat T2.w, -T0.w, 1.0 0042 80000c7e 6f806710 7 t: RECIPSQRT_CLAMPED T0.w, T2.w 0044 018fe001 0f840210 8 x: MUL_IEEE T0.x, R1.x, T3.w VEC_021 0046 018fcc7c 6fa00210 w: MUL_IEEE T1.w, T0.w, T2.w 0048 818fe801 6f840210 t: MUL_IEEE T0.w, R1.z, T3.w SCL_122 0050 01800002 0fa2807c 9 x: MULADD_IEEE T1.x, R2.x, R0.w, T0.x 0052 018fe401 2f8c0210 y: MUL_IEEE T0.y, R1.y, T3.w VEC_102 0054 81800802 6f828c7c w: MULADD_IEEE T0.w, R2.z, R0.w, T0.w 0056 01800402 0f82847c 10 x: MULADD_IEEE T0.x, R2.y, R0.w, T0.y 0058 801f1c7e 4f834c7d z: CNDGE T0.z, -T2.w, 0, T1.w 0060 010f8403 0f86807c 11 x: MULADD_IEEE T0.x, R3.y, T0.z, T0.x VEC_021 0062 010f8003 2f82807d y: MULADD_IEEE T0.y, R3.x, T0.z, T1.x 0064 810f8803 6f828c7c w: MULADD_IEEE T0.w, R3.z, T0.z, T0.w 0066 000f8c7c 0f805210 12 x: CUBE T0.x, T0.w, T0.x 0068 008f8c7c 2f805210 y: CUBE T0.y, T0.w, T0.y 0070 018f847c 4f805210 z: CUBE T0.z, T0.y, T0.w 0072 818f807c 60205210 w: CUBE R1.w, T0.x, T0.w 0074 8000087c 6f806611 13 t: RECIP_IEEE T0.w, |T0.z| 0076 018f847c 002280fd 14 x: MULADD_IEEE R1.x, T0.y, T0.w, [0x3fc00000 1.5].x 0078 818f807c 202280fd y: MULADD_IEEE R1.y, T0.x, T0.w, [0x3fc00000 1.5].x 0080 3fc00000 0006 0000002a 80800400 TEX 2 @84 0084 00011210 f01d1002 0c810000 SAMPLE R2.xyz_, R1.xywx, RID:18, SID:2 CT:NNNN 0088 00001010 f00d1001 fc800000 SAMPLE R1.xyzw, R0.xy__, RID:16, SID:0 CT:NNNN 0008 4000002e a0340000 ALU 14 @92 KC0[CB0:0-15] 0092 01004801 0f800210 15 x: MUL_IEEE T0.x, R1.z, R2.z 0094 81100801 2f800210 y: MUL_IEEE T0.y, R1.z, KC0[0].z 0096 0010807c 0fa2847c 16 x: MULADD_IEEE T1.x, T0.x, KC0[4].x, T0.y 0098 00900401 2f800210 y: MUL_IEEE T0.y, R1.y, KC0[0].y 0100 00004001 4f880210 z: MUL_IEEE T0.z, R1.x, R2.x VEC_120 0102 00804401 6f800210 w: MUL_IEEE T0.w, R1.y, R2.y 0104 80100001 0f840210 t: MUL_IEEE T0.x, R1.x, KC0[0].x SCL_122 0106 0010887c 0f82807c 17 x: MULADD_IEEE T0.x, T0.z, KC0[4].x, T0.x 0108 00108c7c 2f82847c y: MULADD_IEEE T0.y, T0.w, KC0[4].x, T0.y 0110 81000c01 4f880210 z: MUL_IEEE T0.z, R1.w, R0.z VEC_120 0112 0010a07c 00000210 18 x: MUL_IEEE R0.x, T0.x, KC0[5].x 0114 0090a47c 20000210 y: MUL_IEEE R0.y, T0.y, KC0[5].y 0116 0110a07d 40080210 z: MUL_IEEE R0.z, T1.x, KC0[5].z VEC_120 0118 8190a87c 60000210 w: MUL_IEEE R0.w, T0.z, KC0[5].w 0010 c0000000 94200688 EXPORT_DONE PIXEL 0 R0.xyzw EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..11] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: DP4 TEMP[0].x, CONST[8], IN[0] 1: DP4 TEMP[1].x, CONST[9], IN[0] 2: DP4 TEMP[2].x, CONST[10], IN[0] 3: DP3 TEMP[3].x, CONST[8].xyzz, IN[2].xyzz 4: DP3 TEMP[4].x, CONST[9].xyzz, IN[2].xyzz 5: MOV TEMP[3].y, TEMP[4].xxxx 6: DP3 TEMP[4].x, CONST[10].xyzz, IN[2].xyzz 7: MOV TEMP[3].z, TEMP[4].xxxx 8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 9: RSQ TEMP[4].x, TEMP[4].xxxx 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 11: DP3 TEMP[4].x, CONST[8].xyzz, IN[3].xyzz 12: DP3 TEMP[5].x, CONST[9].xyzz, IN[3].xyzz 13: MOV TEMP[4].y, TEMP[5].xxxx 14: DP3 TEMP[5].x, CONST[10].xyzz, IN[3].xyzz 15: MOV TEMP[4].z, TEMP[5].xxxx 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 17: RSQ TEMP[5].x, TEMP[5].xxxx 18: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 19: MOV TEMP[5].w, IN[1].wwww 20: MUL TEMP[6].xy, IN[1].xyyy, CONST[11].xyyy 21: MAD TEMP[5].xy, CONST[11].zwww, IN[1].wwww, TEMP[6].xyyy 22: MOV TEMP[5].z, IN[2].wwww 23: MUL TEMP[0], CONST[0], TEMP[0].xxxx 24: MAD TEMP[0], CONST[1], TEMP[1].xxxx, TEMP[0] 25: MAD TEMP[0], CONST[2], TEMP[2].xxxx, TEMP[0] 26: ADD TEMP[0], TEMP[0], CONST[3] 27: MUL TEMP[1], CONST[4], TEMP[4].xxxx 28: MAD TEMP[1], CONST[5], TEMP[4].yyyy, TEMP[1] 29: MAD TEMP[1].xyz, CONST[6], TEMP[4].zzzz, TEMP[1] 30: MOV TEMP[1].xyz, TEMP[1].xyzx 31: MUL TEMP[6].xyz, TEMP[3].zxyy, TEMP[4].yzxx 32: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[6].xyzz 33: MUL TEMP[2].xyz, TEMP[4].xyzz, IN[3].wwww 34: MUL TEMP[4], CONST[4], TEMP[2].xxxx 35: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4] 36: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4] 37: MOV TEMP[2].xyz, TEMP[2].xyzx 38: MUL TEMP[4], CONST[4], TEMP[3].xxxx 39: MAD TEMP[4], CONST[5], TEMP[3].yyyy, TEMP[4] 40: MAD TEMP[3].xyz, CONST[6], TEMP[3].zzzz, TEMP[4] 41: MOV TEMP[3].xyz, TEMP[3].xyzx 42: MOV OUT[1], TEMP[5] 43: MOV OUT[2], TEMP[1] 44: MOV OUT[3], TEMP[2] 45: MOV OUT[4], TEMP[3] 46: MOV OUT[0], TEMP[0] 47: END ; ModuleID = 'tgsi' define void @main(<4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> inreg) #0 { main_body: %5 = extractelement <4 x float> %1, i32 0 %6 = extractelement <4 x float> %1, i32 1 %7 = extractelement <4 x float> %1, i32 2 %8 = extractelement <4 x float> %1, i32 3 %9 = extractelement <4 x float> %2, i32 0 %10 = extractelement <4 x float> %2, i32 1 %11 = extractelement <4 x float> %2, i32 3 %12 = extractelement <4 x float> %3, i32 0 %13 = extractelement <4 x float> %3, i32 1 %14 = extractelement <4 x float> %3, i32 2 %15 = extractelement <4 x float> %3, i32 3 %16 = extractelement <4 x float> %4, i32 0 %17 = extractelement <4 x float> %4, i32 1 %18 = extractelement <4 x float> %4, i32 2 %19 = extractelement <4 x float> %4, i32 3 %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %21 = extractelement <4 x float> %20, i32 0 %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %23 = extractelement <4 x float> %22, i32 1 %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %25 = extractelement <4 x float> %24, i32 2 %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %27 = extractelement <4 x float> %26, i32 3 %28 = insertelement <4 x float> undef, float %21, i32 0 %29 = insertelement <4 x float> %28, float %23, i32 1 %30 = insertelement <4 x float> %29, float %25, i32 2 %31 = insertelement <4 x float> %30, float %27, i32 3 %32 = insertelement <4 x float> undef, float %5, i32 0 %33 = insertelement <4 x float> %32, float %6, i32 1 %34 = insertelement <4 x float> %33, float %7, i32 2 %35 = insertelement <4 x float> %34, float %8, i32 3 %36 = call float @llvm.AMDGPU.dp4(<4 x float> %31, <4 x float> %35) %37 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %38 = extractelement <4 x float> %37, i32 0 %39 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %40 = extractelement <4 x float> %39, i32 1 %41 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %42 = extractelement <4 x float> %41, i32 2 %43 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %44 = extractelement <4 x float> %43, i32 3 %45 = insertelement <4 x float> undef, float %38, i32 0 %46 = insertelement <4 x float> %45, float %40, i32 1 %47 = insertelement <4 x float> %46, float %42, i32 2 %48 = insertelement <4 x float> %47, float %44, i32 3 %49 = insertelement <4 x float> undef, float %5, i32 0 %50 = insertelement <4 x float> %49, float %6, i32 1 %51 = insertelement <4 x float> %50, float %7, i32 2 %52 = insertelement <4 x float> %51, float %8, i32 3 %53 = call float @llvm.AMDGPU.dp4(<4 x float> %48, <4 x float> %52) %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %55 = extractelement <4 x float> %54, i32 0 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %57 = extractelement <4 x float> %56, i32 1 %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %59 = extractelement <4 x float> %58, i32 2 %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %61 = extractelement <4 x float> %60, i32 3 %62 = insertelement <4 x float> undef, float %55, i32 0 %63 = insertelement <4 x float> %62, float %57, i32 1 %64 = insertelement <4 x float> %63, float %59, i32 2 %65 = insertelement <4 x float> %64, float %61, i32 3 %66 = insertelement <4 x float> undef, float %5, i32 0 %67 = insertelement <4 x float> %66, float %6, i32 1 %68 = insertelement <4 x float> %67, float %7, i32 2 %69 = insertelement <4 x float> %68, float %8, i32 3 %70 = call float @llvm.AMDGPU.dp4(<4 x float> %65, <4 x float> %69) %71 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %72 = extractelement <4 x float> %71, i32 0 %73 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %74 = extractelement <4 x float> %73, i32 1 %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %76 = extractelement <4 x float> %75, i32 2 %77 = insertelement <4 x float> undef, float %72, i32 0 %78 = insertelement <4 x float> %77, float %74, i32 1 %79 = insertelement <4 x float> %78, float %76, i32 2 %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 %81 = insertelement <4 x float> undef, float %12, i32 0 %82 = insertelement <4 x float> %81, float %13, i32 1 %83 = insertelement <4 x float> %82, float %14, i32 2 %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 3 %85 = call float @llvm.AMDGPU.dp4(<4 x float> %80, <4 x float> %84) %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %87 = extractelement <4 x float> %86, i32 0 %88 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %89 = extractelement <4 x float> %88, i32 1 %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %91 = extractelement <4 x float> %90, i32 2 %92 = insertelement <4 x float> undef, float %87, i32 0 %93 = insertelement <4 x float> %92, float %89, i32 1 %94 = insertelement <4 x float> %93, float %91, i32 2 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = insertelement <4 x float> undef, float %12, i32 0 %97 = insertelement <4 x float> %96, float %13, i32 1 %98 = insertelement <4 x float> %97, float %14, i32 2 %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 3 %100 = call float @llvm.AMDGPU.dp4(<4 x float> %95, <4 x float> %99) %101 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %102 = extractelement <4 x float> %101, i32 0 %103 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %104 = extractelement <4 x float> %103, i32 1 %105 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %106 = extractelement <4 x float> %105, i32 2 %107 = insertelement <4 x float> undef, float %102, i32 0 %108 = insertelement <4 x float> %107, float %104, i32 1 %109 = insertelement <4 x float> %108, float %106, i32 2 %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 3 %111 = insertelement <4 x float> undef, float %12, i32 0 %112 = insertelement <4 x float> %111, float %13, i32 1 %113 = insertelement <4 x float> %112, float %14, i32 2 %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 %115 = call float @llvm.AMDGPU.dp4(<4 x float> %110, <4 x float> %114) %116 = insertelement <4 x float> undef, float %85, i32 0 %117 = insertelement <4 x float> %116, float %100, i32 1 %118 = insertelement <4 x float> %117, float %115, i32 2 %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3 %120 = insertelement <4 x float> undef, float %85, i32 0 %121 = insertelement <4 x float> %120, float %100, i32 1 %122 = insertelement <4 x float> %121, float %115, i32 2 %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123) %125 = call float @llvm.AMDGPU.rsq(float %124) %126 = fmul float %85, %125 %127 = fmul float %100, %125 %128 = fmul float %115, %125 %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %130 = extractelement <4 x float> %129, i32 0 %131 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %132 = extractelement <4 x float> %131, i32 1 %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %134 = extractelement <4 x float> %133, i32 2 %135 = insertelement <4 x float> undef, float %130, i32 0 %136 = insertelement <4 x float> %135, float %132, i32 1 %137 = insertelement <4 x float> %136, float %134, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = insertelement <4 x float> undef, float %16, i32 0 %140 = insertelement <4 x float> %139, float %17, i32 1 %141 = insertelement <4 x float> %140, float %18, i32 2 %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3 %143 = call float @llvm.AMDGPU.dp4(<4 x float> %138, <4 x float> %142) %144 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %145 = extractelement <4 x float> %144, i32 0 %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %147 = extractelement <4 x float> %146, i32 1 %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %149 = extractelement <4 x float> %148, i32 2 %150 = insertelement <4 x float> undef, float %145, i32 0 %151 = insertelement <4 x float> %150, float %147, i32 1 %152 = insertelement <4 x float> %151, float %149, i32 2 %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3 %154 = insertelement <4 x float> undef, float %16, i32 0 %155 = insertelement <4 x float> %154, float %17, i32 1 %156 = insertelement <4 x float> %155, float %18, i32 2 %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 %158 = call float @llvm.AMDGPU.dp4(<4 x float> %153, <4 x float> %157) %159 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %160 = extractelement <4 x float> %159, i32 0 %161 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %162 = extractelement <4 x float> %161, i32 1 %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %164 = extractelement <4 x float> %163, i32 2 %165 = insertelement <4 x float> undef, float %160, i32 0 %166 = insertelement <4 x float> %165, float %162, i32 1 %167 = insertelement <4 x float> %166, float %164, i32 2 %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3 %169 = insertelement <4 x float> undef, float %16, i32 0 %170 = insertelement <4 x float> %169, float %17, i32 1 %171 = insertelement <4 x float> %170, float %18, i32 2 %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 3 %173 = call float @llvm.AMDGPU.dp4(<4 x float> %168, <4 x float> %172) %174 = insertelement <4 x float> undef, float %143, i32 0 %175 = insertelement <4 x float> %174, float %158, i32 1 %176 = insertelement <4 x float> %175, float %173, i32 2 %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3 %178 = insertelement <4 x float> undef, float %143, i32 0 %179 = insertelement <4 x float> %178, float %158, i32 1 %180 = insertelement <4 x float> %179, float %173, i32 2 %181 = insertelement <4 x float> %180, float 0.000000e+00, i32 3 %182 = call float @llvm.AMDGPU.dp4(<4 x float> %177, <4 x float> %181) %183 = call float @llvm.AMDGPU.rsq(float %182) %184 = fmul float %143, %183 %185 = fmul float %158, %183 %186 = fmul float %173, %183 %187 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %188 = extractelement <4 x float> %187, i32 0 %189 = fmul float %9, %188 %190 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %191 = extractelement <4 x float> %190, i32 1 %192 = fmul float %10, %191 %193 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %194 = extractelement <4 x float> %193, i32 2 %195 = fmul float %194, %11 %196 = fadd float %195, %189 %197 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %198 = extractelement <4 x float> %197, i32 3 %199 = fmul float %198, %11 %200 = fadd float %199, %192 %201 = load <4 x float> addrspace(8)* null %202 = extractelement <4 x float> %201, i32 0 %203 = fmul float %202, %36 %204 = load <4 x float> addrspace(8)* null %205 = extractelement <4 x float> %204, i32 1 %206 = fmul float %205, %36 %207 = load <4 x float> addrspace(8)* null %208 = extractelement <4 x float> %207, i32 2 %209 = fmul float %208, %36 %210 = load <4 x float> addrspace(8)* null %211 = extractelement <4 x float> %210, i32 3 %212 = fmul float %211, %36 %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %214 = extractelement <4 x float> %213, i32 0 %215 = fmul float %214, %53 %216 = fadd float %215, %203 %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %218 = extractelement <4 x float> %217, i32 1 %219 = fmul float %218, %53 %220 = fadd float %219, %206 %221 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %222 = extractelement <4 x float> %221, i32 2 %223 = fmul float %222, %53 %224 = fadd float %223, %209 %225 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %226 = extractelement <4 x float> %225, i32 3 %227 = fmul float %226, %53 %228 = fadd float %227, %212 %229 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = fmul float %230, %70 %232 = fadd float %231, %216 %233 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %234 = extractelement <4 x float> %233, i32 1 %235 = fmul float %234, %70 %236 = fadd float %235, %220 %237 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %238 = extractelement <4 x float> %237, i32 2 %239 = fmul float %238, %70 %240 = fadd float %239, %224 %241 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %242 = extractelement <4 x float> %241, i32 3 %243 = fmul float %242, %70 %244 = fadd float %243, %228 %245 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %246 = extractelement <4 x float> %245, i32 0 %247 = fadd float %232, %246 %248 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %249 = extractelement <4 x float> %248, i32 1 %250 = fadd float %236, %249 %251 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %252 = extractelement <4 x float> %251, i32 2 %253 = fadd float %240, %252 %254 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %255 = extractelement <4 x float> %254, i32 3 %256 = fadd float %244, %255 %257 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %258 = extractelement <4 x float> %257, i32 0 %259 = fmul float %258, %184 %260 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %261 = extractelement <4 x float> %260, i32 1 %262 = fmul float %261, %184 %263 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %264 = extractelement <4 x float> %263, i32 2 %265 = fmul float %264, %184 %266 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %267 = extractelement <4 x float> %266, i32 3 %268 = fmul float %267, %184 %269 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %270 = extractelement <4 x float> %269, i32 0 %271 = fmul float %270, %185 %272 = fadd float %271, %259 %273 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %274 = extractelement <4 x float> %273, i32 1 %275 = fmul float %274, %185 %276 = fadd float %275, %262 %277 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %278 = extractelement <4 x float> %277, i32 2 %279 = fmul float %278, %185 %280 = fadd float %279, %265 %281 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %282 = extractelement <4 x float> %281, i32 3 %283 = fmul float %282, %185 %284 = fadd float %283, %268 %285 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %286 = extractelement <4 x float> %285, i32 0 %287 = fmul float %286, %186 %288 = fadd float %287, %272 %289 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %290 = extractelement <4 x float> %289, i32 1 %291 = fmul float %290, %186 %292 = fadd float %291, %276 %293 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %294 = extractelement <4 x float> %293, i32 2 %295 = fmul float %294, %186 %296 = fadd float %295, %280 %297 = fmul float %128, %185 %298 = fmul float %126, %186 %299 = fmul float %127, %184 %300 = fsub float -0.000000e+00, %297 %301 = fmul float %127, %186 %302 = fadd float %301, %300 %303 = fsub float -0.000000e+00, %298 %304 = fmul float %128, %184 %305 = fadd float %304, %303 %306 = fsub float -0.000000e+00, %299 %307 = fmul float %126, %185 %308 = fadd float %307, %306 %309 = fmul float %302, %19 %310 = fmul float %305, %19 %311 = fmul float %308, %19 %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %313 = extractelement <4 x float> %312, i32 0 %314 = fmul float %313, %309 %315 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %316 = extractelement <4 x float> %315, i32 1 %317 = fmul float %316, %309 %318 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %319 = extractelement <4 x float> %318, i32 2 %320 = fmul float %319, %309 %321 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %322 = extractelement <4 x float> %321, i32 0 %323 = fmul float %322, %310 %324 = fadd float %323, %314 %325 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %326 = extractelement <4 x float> %325, i32 1 %327 = fmul float %326, %310 %328 = fadd float %327, %317 %329 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %330 = extractelement <4 x float> %329, i32 2 %331 = fmul float %330, %310 %332 = fadd float %331, %320 %333 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %334 = extractelement <4 x float> %333, i32 0 %335 = fmul float %334, %311 %336 = fadd float %335, %324 %337 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %338 = extractelement <4 x float> %337, i32 1 %339 = fmul float %338, %311 %340 = fadd float %339, %328 %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %342 = extractelement <4 x float> %341, i32 2 %343 = fmul float %342, %311 %344 = fadd float %343, %332 %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %346 = extractelement <4 x float> %345, i32 0 %347 = fmul float %346, %126 %348 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %349 = extractelement <4 x float> %348, i32 1 %350 = fmul float %349, %126 %351 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %352 = extractelement <4 x float> %351, i32 2 %353 = fmul float %352, %126 %354 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %355 = extractelement <4 x float> %354, i32 0 %356 = fmul float %355, %127 %357 = fadd float %356, %347 %358 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %359 = extractelement <4 x float> %358, i32 1 %360 = fmul float %359, %127 %361 = fadd float %360, %350 %362 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %363 = extractelement <4 x float> %362, i32 2 %364 = fmul float %363, %127 %365 = fadd float %364, %353 %366 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %367 = extractelement <4 x float> %366, i32 0 %368 = fmul float %367, %128 %369 = fadd float %368, %357 %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %371 = extractelement <4 x float> %370, i32 1 %372 = fmul float %371, %128 %373 = fadd float %372, %361 %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %375 = extractelement <4 x float> %374, i32 2 %376 = fmul float %375, %128 %377 = fadd float %376, %365 %378 = insertelement <4 x float> undef, float %247, i32 0 %379 = insertelement <4 x float> %378, float %250, i32 1 %380 = insertelement <4 x float> %379, float %253, i32 2 %381 = insertelement <4 x float> %380, float %256, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %381, i32 60, i32 1) %382 = insertelement <4 x float> undef, float %196, i32 0 %383 = insertelement <4 x float> %382, float %200, i32 1 %384 = insertelement <4 x float> %383, float %15, i32 2 %385 = insertelement <4 x float> %384, float %11, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %385, i32 0, i32 2) %386 = insertelement <4 x float> undef, float %288, i32 0 %387 = insertelement <4 x float> %386, float %292, i32 1 %388 = insertelement <4 x float> %387, float %296, i32 2 %389 = insertelement <4 x float> %388, float %284, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %389, i32 1, i32 2) %390 = insertelement <4 x float> undef, float %336, i32 0 %391 = insertelement <4 x float> %390, float %340, i32 1 %392 = insertelement <4 x float> %391, float %344, i32 2 %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %393, i32 2, i32 2) %394 = insertelement <4 x float> undef, float %369, i32 0 %395 = insertelement <4 x float> %394, float %373, i32 1 %396 = insertelement <4 x float> %395, float %377, i32 2 %397 = insertelement <4 x float> %396, float 0.000000e+00, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %397, i32 3, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone }