PROPERTY CS_FIXED_BLOCK_WIDTH 64 PROPERTY CS_FIXED_BLOCK_HEIGHT 1 PROPERTY CS_FIXED_BLOCK_DEPTH 1 DCL SV[0], THREAD_ID DCL SV[1], BLOCK_ID DCL IMAGE[0], BUFFER, PIPE_FORMAT_R32_UINT, WR DCL CONST[0..1] DCL TEMP[0], LOCAL IMM[0] UINT32 {64, 1, 0, 0} 0: UMAD TEMP[0].x, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz 1: STORE IMAGE[0], TEMP[0].xxxx, CONST[0].xxxx, PIPE_FORMAT_R32_UINT 2: END radeonsi: Compiling shader 28 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_cs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, <3 x i32>) #0 { main_body: %7 = extractelement <3 x i32> %6, i32 0 %8 = shl i32 %3, 6 %9 = add i32 %8, %7 %10 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 16, !amdgpu.uniform !0 %11 = load <4 x i32>, <4 x i32> addrspace(2)* %10, align 16, !invariant.load !0 %12 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %11, i32 0) %13 = insertelement <4 x float> undef, float %12, i32 0 %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer %15 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %16 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %15, i64 0, i64 31, !amdgpu.uniform !0 %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %14, <4 x i32> %17, i32 %9, i32 0, i1 true, i1 false) #3 ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 ; Function Attrs: nounwind writeonly declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2 attributes #0 = { "amdgpu-max-work-group-size"="64" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind writeonly } attributes #3 = { inaccessiblememonly nounwind } !0 = !{} SHADER KEY Compute Shader: Shader main disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x40 ; C0800340 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_lshl_b32 s1, s6, 6 ; 8F018606 v_add_i32_e32 v4, vcc, s1, v0 ; 4A080001 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 s_load_dwordx4 s[0:3], s[4:5], 0x7c ; C080057C v_mov_b32_e32 v1, v0 ; 7E020300 v_mov_b32_e32 v2, v0 ; 7E040300 v_mov_b32_e32 v3, v0 ; 7E060300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen glc ; E01C6000 80000004 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Spilled SGPRs: 0 Spilled VGPRs: 0 Private memory VGPRs: 0 Code Size: 60 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** COMP PROPERTY CS_FIXED_BLOCK_WIDTH 4 PROPERTY CS_FIXED_BLOCK_HEIGHT 4 PROPERTY CS_FIXED_BLOCK_DEPTH 4 DCL SV[0], THREAD_ID DCL SV[1], BLOCK_ID DCL SAMP[0] DCL SVIEW[0], BUFFER, FLOAT DCL IMAGE[0], BUFFER, PIPE_FORMAT_R32_UINT, WR DCL IMAGE[1], BUFFER, PIPE_FORMAT_R32_UINT, WR DCL IMAGE[2], BUFFER, PIPE_FORMAT_R32_UINT, WR DCL CONST[0..2] DCL CONST[1][0..174] DCL CONST[2][0..34] DCL CONST[3][0..2727] DCL TEMP[0..263], LOCAL DCL ADDR[0] IMM[0] UINT32 {4, 1, 16, 4294967295} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 2000000.0000} IMM[2] UINT32 {48, 0, 432, 848} IMM[3] UINT32 {416, 496, 480, 448} IMM[4] UINT32 {464, 12, 28, 2} IMM[5] FLT32 { 0.5000, -2.0000, 1.2500, 0.0000} IMM[6] UINT32 {3, 928, 176, 160} IMM[7] UINT32 {128, 144, 0, 0} IMM[8] INT32 {0, 4, 0, 0} 0: UMAD TEMP[0].xyz, SV[1].xyzz, IMM[0].xxxx, SV[0].xyzz 1: MOV TEMP[1].xyz, CONST[2][1].xyzz 2: USLT TEMP[2].xyz, TEMP[0].xyzz, CONST[2][1].xyzz 3: MOV TEMP[3].xzw, TEMP[2].xyzz 4: AND TEMP[3].y, TEMP[2].yyyy, TEMP[2].zzzz 5: AND TEMP[4].x, TEMP[2].xxxx, TEMP[3].yyyy 6: UIF TEMP[4].xxxx 7: MOV TEMP[5].xyz, CONST[2][1].xyzz 8: UMAD TEMP[6].x, TEMP[0].zzzz, CONST[2][1].yyyy, TEMP[0].yyyy 9: MOV TEMP[7].xyz, CONST[2][1].xyzz 10: UMAD TEMP[8].x, TEMP[6].xxxx, CONST[2][1].xxxx, TEMP[0].xxxx 11: MOV TEMP[9].xyz, CONST[2][1].xyzz 12: I2F TEMP[10].xy, CONST[2][1].xyyy 13: RCP TEMP[11].x, TEMP[10].xxxx 14: RCP TEMP[11].y, TEMP[10].yyyy 15: MUL TEMP[12].xy, IMM[1].xxxx, TEMP[11].xyyy 16: U2F TEMP[13].xy, TEMP[0].xyyy 17: MAD TEMP[14].xy, TEMP[13].xyyy, TEMP[12].xyyy, IMM[1].yyyy 18: MUL TEMP[15].xy, TEMP[14].xyyy, IMM[1].zyyy 19: UADD TEMP[16].xy, TEMP[0].xyyy, IMM[0].yyyy 20: U2F TEMP[17].xy, TEMP[16].xyyy 21: MAD TEMP[18].xy, TEMP[17].xyyy, TEMP[12].xyyy, IMM[1].yyyy 22: MUL TEMP[19].xy, TEMP[18].xyyy, IMM[1].zyyy 23: U2F TEMP[20].x, TEMP[0].zzzz 24: MOV TEMP[21].xyz, CONST[2][3].xyzz 25: RCP TEMP[22].x, CONST[2][3].zzzz 26: MUL TEMP[23].x, TEMP[20].xxxx, TEMP[22].xxxx 27: EX2 TEMP[24].x, TEMP[23].xxxx 28: MOV TEMP[25].xyz, CONST[2][3].xyzz 29: ADD TEMP[26].x, TEMP[24].xxxx, -CONST[2][3].yyyy 30: MOV TEMP[27].xyz, CONST[2][3].xyzz 31: RCP TEMP[28].x, CONST[2][3].xxxx 32: MUL TEMP[29].x, TEMP[26].xxxx, TEMP[28].xxxx 33: MOV TEMP[30].xyz, CONST[2][1].xyzz 34: USEQ TEMP[31].x, TEMP[0].zzzz, CONST[2][1].zzzz 35: UCMP TEMP[29].x, TEMP[31].xxxx, IMM[1].wwww, TEMP[29].xxxx 36: UADD TEMP[32].x, TEMP[0].zzzz, IMM[0].yyyy 37: U2F TEMP[33].x, TEMP[32].xxxx 38: MOV TEMP[34].xyz, CONST[2][3].xyzz 39: RCP TEMP[35].x, CONST[2][3].zzzz 40: MUL TEMP[36].x, TEMP[33].xxxx, TEMP[35].xxxx 41: EX2 TEMP[37].x, TEMP[36].xxxx 42: MOV TEMP[38].xyz, CONST[2][3].xyzz 43: ADD TEMP[39].x, TEMP[37].xxxx, -CONST[2][3].yyyy 44: MOV TEMP[40].xyz, CONST[2][3].xyzz 45: RCP TEMP[41].x, CONST[2][3].xxxx 46: MUL TEMP[42].x, TEMP[39].xxxx, TEMP[41].xxxx 47: MOV TEMP[43].xyz, CONST[2][1].xyzz 48: USEQ TEMP[44].x, TEMP[32].xxxx, CONST[2][1].zzzz 49: UCMP TEMP[42].x, TEMP[44].xxxx, IMM[1].wwww, TEMP[42].xxxx 50: MOV TEMP[45], CONST[1][27] 51: FSLT TEMP[46].x, CONST[1][27].wwww, IMM[1].zzzz 52: UIF TEMP[46].xxxx 53: MOV TEMP[47], CONST[1][53] 54: ADD TEMP[48].x, TEMP[29].xxxx, CONST[1][53].wwww 55: MOV TEMP[49], CONST[1][53] 56: MUL TEMP[50].x, TEMP[48].xxxx, CONST[1][53].zzzz 57: RCP TEMP[51].x, TEMP[50].xxxx 58: ELSE 59: MOV TEMP[52], CONST[1][26] 60: MOV TEMP[53], CONST[1][27] 61: MAD TEMP[51].x, TEMP[29].xxxx, CONST[1][26].zzzz, CONST[1][27].zzzz 62: ENDIF 63: MOV TEMP[54], CONST[1][31] 64: MOV TEMP[55], CONST[1][30] 65: MOV TEMP[56], CONST[1][28] 66: MOV TEMP[57], CONST[1][29] 67: MUL TEMP[58], CONST[1][29], TEMP[15].yyyy 68: MAD TEMP[59], CONST[1][28], TEMP[15].xxxx, TEMP[58] 69: MAD TEMP[60], CONST[1][30], TEMP[51].xxxx, TEMP[59] 70: ADD TEMP[61], CONST[1][31], TEMP[60] 71: MOV TEMP[62], CONST[1][31] 72: MOV TEMP[63], CONST[1][30] 73: MOV TEMP[64], CONST[1][28] 74: MOV TEMP[65], CONST[1][29] 75: MUL TEMP[66], CONST[1][29], TEMP[19].yyyy 76: MAD TEMP[67], CONST[1][28], TEMP[19].xxxx, TEMP[66] 77: MAD TEMP[68], CONST[1][30], TEMP[51].xxxx, TEMP[67] 78: ADD TEMP[69], CONST[1][31], TEMP[68] 79: MOV TEMP[70], CONST[1][31] 80: MOV TEMP[71], CONST[1][30] 81: MOV TEMP[72], CONST[1][28] 82: MOV TEMP[73], CONST[1][29] 83: MUL TEMP[74], CONST[1][29], TEMP[19].yyyy 84: MAD TEMP[75], CONST[1][28], TEMP[15].xxxx, TEMP[74] 85: MAD TEMP[76], CONST[1][30], TEMP[51].xxxx, TEMP[75] 86: ADD TEMP[77], CONST[1][31], TEMP[76] 87: MOV TEMP[78], CONST[1][31] 88: MOV TEMP[79], CONST[1][30] 89: MOV TEMP[80], CONST[1][28] 90: MOV TEMP[81], CONST[1][29] 91: MUL TEMP[82], CONST[1][29], TEMP[15].yyyy 92: MAD TEMP[83], CONST[1][28], TEMP[19].xxxx, TEMP[82] 93: MAD TEMP[84], CONST[1][30], TEMP[51].xxxx, TEMP[83] 94: ADD TEMP[85], CONST[1][31], TEMP[84] 95: MOV TEMP[86], CONST[1][27] 96: FSLT TEMP[87].x, CONST[1][27].wwww, IMM[1].zzzz 97: UIF TEMP[87].xxxx 98: MOV TEMP[88], CONST[1][53] 99: ADD TEMP[89].x, TEMP[42].xxxx, CONST[1][53].wwww 100: MOV TEMP[90], CONST[1][53] 101: MUL TEMP[91].x, TEMP[89].xxxx, CONST[1][53].zzzz 102: RCP TEMP[92].x, TEMP[91].xxxx 103: ELSE 104: MOV TEMP[93], CONST[1][26] 105: MOV TEMP[94], CONST[1][27] 106: MAD TEMP[92].x, TEMP[42].xxxx, CONST[1][26].zzzz, CONST[1][27].zzzz 107: ENDIF 108: MOV TEMP[95], CONST[1][31] 109: MOV TEMP[96], CONST[1][30] 110: MOV TEMP[97], CONST[1][28] 111: MOV TEMP[98], CONST[1][29] 112: MUL TEMP[99], CONST[1][29], TEMP[15].yyyy 113: MAD TEMP[100], CONST[1][28], TEMP[15].xxxx, TEMP[99] 114: MAD TEMP[101], CONST[1][30], TEMP[92].xxxx, TEMP[100] 115: ADD TEMP[102], CONST[1][31], TEMP[101] 116: MOV TEMP[103], CONST[1][31] 117: MOV TEMP[104], CONST[1][30] 118: MOV TEMP[105], CONST[1][28] 119: MOV TEMP[106], CONST[1][29] 120: MUL TEMP[107], CONST[1][29], TEMP[19].yyyy 121: MAD TEMP[108], CONST[1][28], TEMP[19].xxxx, TEMP[107] 122: MAD TEMP[109], CONST[1][30], TEMP[92].xxxx, TEMP[108] 123: ADD TEMP[110], CONST[1][31], TEMP[109] 124: MOV TEMP[111], CONST[1][31] 125: MOV TEMP[112], CONST[1][30] 126: MOV TEMP[113], CONST[1][28] 127: MOV TEMP[114], CONST[1][29] 128: MUL TEMP[115], CONST[1][29], TEMP[19].yyyy 129: MAD TEMP[116], CONST[1][28], TEMP[15].xxxx, TEMP[115] 130: MAD TEMP[117], CONST[1][30], TEMP[92].xxxx, TEMP[116] 131: ADD TEMP[118], CONST[1][31], TEMP[117] 132: MOV TEMP[119], CONST[1][31] 133: MOV TEMP[120], CONST[1][30] 134: MOV TEMP[121], CONST[1][28] 135: MOV TEMP[122], CONST[1][29] 136: MUL TEMP[123], CONST[1][29], TEMP[15].yyyy 137: MAD TEMP[124], CONST[1][28], TEMP[19].xxxx, TEMP[123] 138: MAD TEMP[125], CONST[1][30], TEMP[92].xxxx, TEMP[124] 139: ADD TEMP[126], CONST[1][31], TEMP[125] 140: RCP TEMP[127].xy, TEMP[61].wwww 141: MUL TEMP[128].xy, TEMP[61].xyyy, TEMP[127].xyyy 142: RCP TEMP[129].xy, TEMP[69].wwww 143: MUL TEMP[130].xy, TEMP[69].xyyy, TEMP[129].xyyy 144: MIN TEMP[131].xy, TEMP[128].xyyy, TEMP[130].xyyy 145: RCP TEMP[132].xy, TEMP[77].wwww 146: MUL TEMP[133].xy, TEMP[77].xyyy, TEMP[132].xyyy 147: MIN TEMP[134].xy, TEMP[131].xyyy, TEMP[133].xyyy 148: RCP TEMP[135].xy, TEMP[85].wwww 149: MUL TEMP[136].xy, TEMP[85].xyyy, TEMP[135].xyyy 150: MIN TEMP[137].xy, TEMP[134].xyyy, TEMP[136].xyyy 151: RCP TEMP[138].xy, TEMP[102].wwww 152: MUL TEMP[139].xy, TEMP[102].xyyy, TEMP[138].xyyy 153: MIN TEMP[140].xy, TEMP[137].xyyy, TEMP[139].xyyy 154: RCP TEMP[141].xy, TEMP[110].wwww 155: MUL TEMP[142].xy, TEMP[110].xyyy, TEMP[141].xyyy 156: MIN TEMP[143].xy, TEMP[140].xyyy, TEMP[142].xyyy 157: RCP TEMP[144].xy, TEMP[118].wwww 158: MUL TEMP[145].xy, TEMP[118].xyyy, TEMP[144].xyyy 159: MIN TEMP[146].xy, TEMP[143].xyyy, TEMP[145].xyyy 160: RCP TEMP[147].xy, TEMP[126].wwww 161: MUL TEMP[148].xy, TEMP[126].xyyy, TEMP[147].xyyy 162: MIN TEMP[149].xy, TEMP[146].xyyy, TEMP[148].xyyy 163: RCP TEMP[150].xy, TEMP[61].wwww 164: MUL TEMP[151].xy, TEMP[61].xyyy, TEMP[150].xyyy 165: RCP TEMP[152].xy, TEMP[69].wwww 166: MUL TEMP[153].xy, TEMP[69].xyyy, TEMP[152].xyyy 167: MAX TEMP[154].xy, TEMP[151].xyyy, TEMP[153].xyyy 168: RCP TEMP[155].xy, TEMP[77].wwww 169: MUL TEMP[156].xy, TEMP[77].xyyy, TEMP[155].xyyy 170: MAX TEMP[157].xy, TEMP[154].xyyy, TEMP[156].xyyy 171: RCP TEMP[158].xy, TEMP[85].wwww 172: MUL TEMP[159].xy, TEMP[85].xyyy, TEMP[158].xyyy 173: MAX TEMP[160].xy, TEMP[157].xyyy, TEMP[159].xyyy 174: RCP TEMP[161].xy, TEMP[102].wwww 175: MUL TEMP[162].xy, TEMP[102].xyyy, TEMP[161].xyyy 176: MAX TEMP[163].xy, TEMP[160].xyyy, TEMP[162].xyyy 177: RCP TEMP[164].xy, TEMP[110].wwww 178: MUL TEMP[165].xy, TEMP[110].xyyy, TEMP[164].xyyy 179: MAX TEMP[166].xy, TEMP[163].xyyy, TEMP[165].xyyy 180: RCP TEMP[167].xy, TEMP[118].wwww 181: MUL TEMP[168].xy, TEMP[118].xyyy, TEMP[167].xyyy 182: MAX TEMP[169].xy, TEMP[166].xyyy, TEMP[168].xyyy 183: RCP TEMP[170].xy, TEMP[126].wwww 184: MUL TEMP[171].xy, TEMP[126].xyyy, TEMP[170].xyyy 185: MAX TEMP[172].xy, TEMP[169].xyyy, TEMP[171].xyyy 186: MOV TEMP[149].z, TEMP[29].xxxx 187: MOV TEMP[172].z, TEMP[42].xxxx 188: ADD TEMP[173].xyz, TEMP[149].xyzz, TEMP[172].xyzz 189: MUL TEMP[174].xyz, IMM[5].xxxx, TEMP[173].xyzz 190: ADD TEMP[175].xyz, TEMP[172].xyzz, -TEMP[174].xyzz 191: MOV TEMP[176].x, CONST[2][0].wwww 192: MOV TEMP[177].x, CONST[2][1].wwww 193: UMUL TEMP[178].x, CONST[2][0].wwww, CONST[2][1].wwww 194: UMUL TEMP[179].x, TEMP[178].xxxx, IMM[4].wwww 195: MOV TEMP[180].x, IMM[2].yyyy 196: BGNLOOP 197: MOV TEMP[181].x, CONST[2][0].xxxx 198: USGE TEMP[182].x, TEMP[180].xxxx, CONST[2][0].xxxx 199: UIF TEMP[182].xxxx 200: BRK 201: ENDIF 202: UMUL TEMP[183].x, TEMP[180].xxxx, IMM[0].xxxx 203: MOV TEMP[184].x, TEMP[183].xxxx 204: TXF_LZ TEMP[185], TEMP[184], SAMP[0], BUFFER 205: MOV TEMP[186], TEMP[185] 206: MOV TEMP[187], TEMP[185] 207: RCP TEMP[188].x, TEMP[185].wwww 208: MOV TEMP[189].x, TEMP[188].xxxx 209: UADD TEMP[190].x, TEMP[183].xxxx, IMM[6].xxxx 210: MOV TEMP[191].x, TEMP[190].xxxx 211: TXF_LZ TEMP[192], TEMP[191], SAMP[0], BUFFER 212: MOV TEMP[193], TEMP[192] 213: FSLT TEMP[194].x, IMM[5].yyyy, TEMP[192].xxxx 214: UIF TEMP[194].xxxx 215: UADD TEMP[195].x, TEMP[183].xxxx, IMM[4].wwww 216: MOV TEMP[196].x, TEMP[195].xxxx 217: TXF_LZ TEMP[197], TEMP[196], SAMP[0], BUFFER 218: MUL TEMP[198].xyz, IMM[5].xxxx, TEMP[197].xyzz 219: MUL TEMP[199].xyz, TEMP[198].xyzz, TEMP[188].xxxx 220: ADD TEMP[187].xyz, TEMP[185].xyzz, -TEMP[199].xyzz 221: MUL TEMP[200].x, IMM[5].zzzz, TEMP[188].xxxx 222: MUL TEMP[201].x, TEMP[188].xxxx, TEMP[188].xxxx 223: MUL TEMP[202].x, TEMP[201].xxxx, TEMP[192].xxxx 224: MAD TEMP[203].x, TEMP[200].xxxx, TEMP[188].xxxx, -TEMP[202].xxxx 225: SQRT TEMP[189].x, TEMP[203].xxxx 226: ENDIF 227: MOV TEMP[204].w, IMM[1].zzzz 228: MOV TEMP[205].xyz, CONST[1][58].xyzz 229: ADD TEMP[204].xyz, TEMP[187].xyzz, CONST[1][58].xyzz 230: MOV TEMP[206], CONST[1][11] 231: MOV TEMP[207], CONST[1][10] 232: MOV TEMP[208], CONST[1][8] 233: MOV TEMP[209], CONST[1][9] 234: MUL TEMP[210], CONST[1][9], TEMP[204].yyyy 235: MAD TEMP[211], CONST[1][8], TEMP[204].xxxx, TEMP[210] 236: MAD TEMP[212], CONST[1][10], TEMP[204].zzzz, TEMP[211] 237: ADD TEMP[213], CONST[1][11], TEMP[212] 238: ADD TEMP[214].xyz, TEMP[213].xyzz, -TEMP[174].xyzz 239: MOV TEMP[215].xyz, |TEMP[214].xyzz| 240: ADD TEMP[216].xyz, TEMP[215].xyzz, -TEMP[175].xyzz 241: MAX TEMP[217].xyz, TEMP[216].xyzz, IMM[5].wwww 242: DP3 TEMP[218].x, TEMP[217].xyzz, TEMP[217].xyzz 243: MUL TEMP[219].x, TEMP[189].xxxx, TEMP[189].xxxx 244: FSLT TEMP[220].x, TEMP[218].xxxx, TEMP[219].xxxx 245: UIF TEMP[220].xxxx 246: MOV TEMP[221].x, IMM[8].xxxx 247: ATOMUADD TEMP[222].x, IMAGE[2], IMM[8].xxxx, IMM[0].yyyy, PIPE_FORMAT_R32_UINT 248: USLT TEMP[223].x, TEMP[222].xxxx, TEMP[179].xxxx 249: UIF TEMP[223].xxxx 250: MOV TEMP[224].x, TEMP[8].xxxx 251: ATOMXCHG TEMP[225].x, IMAGE[1], TEMP[8].xxxx, TEMP[222].xxxx, PIPE_FORMAT_R32_UINT 252: UMUL TEMP[226].x, TEMP[222].xxxx, IMM[4].wwww 253: MOV TEMP[227].x, TEMP[226].xxxx 254: STORE IMAGE[0], TEMP[226].xxxx, TEMP[180].xxxx, PIPE_FORMAT_R32_UINT 255: UMAD TEMP[228].x, TEMP[222].xxxx, IMM[4].wwww, IMM[0].yyyy 256: MOV TEMP[229].x, TEMP[228].xxxx 257: STORE IMAGE[0], TEMP[228].xxxx, TEMP[225].xxxx, PIPE_FORMAT_R32_UINT 258: ENDIF 259: ENDIF 260: UADD TEMP[180].x, TEMP[180].xxxx, IMM[0].yyyy 261: ENDLOOP 262: MOV TEMP[230].x, IMM[2].yyyy 263: BGNLOOP 264: MOV TEMP[231].x, CONST[2][0].yyyy 265: USGE TEMP[232].x, TEMP[230].xxxx, CONST[2][0].yyyy 266: UIF TEMP[232].xxxx 267: BRK 268: ENDIF 269: UMUL TEMP[233].x, TEMP[230].xxxx, IMM[0].zzzz 270: USHR TEMP[234], TEMP[233].xxxx, IMM[8].yyyy 271: UARL ADDR[0].x, TEMP[234].xxxx 272: UARL ADDR[0].x, TEMP[234].xxxx 273: MOV TEMP[235], CONST[3][ADDR[0].x] 274: MOV TEMP[236].w, IMM[1].zzzz 275: MOV TEMP[237].xyz, CONST[1][58].xyzz 276: ADD TEMP[236].xyz, TEMP[235].xyzz, CONST[1][58].xyzz 277: MOV TEMP[238], CONST[1][11] 278: MOV TEMP[239], CONST[1][10] 279: MOV TEMP[240], CONST[1][8] 280: MOV TEMP[241], CONST[1][9] 281: MUL TEMP[242], CONST[1][9], TEMP[236].yyyy 282: MAD TEMP[243], CONST[1][8], TEMP[236].xxxx, TEMP[242] 283: MAD TEMP[244], CONST[1][10], TEMP[236].zzzz, TEMP[243] 284: ADD TEMP[245], CONST[1][11], TEMP[244] 285: ADD TEMP[246].xyz, TEMP[245].xyzz, -TEMP[174].xyzz 286: MOV TEMP[247].xyz, |TEMP[246].xyzz| 287: ADD TEMP[248].xyz, TEMP[247].xyzz, -TEMP[175].xyzz 288: MAX TEMP[249].xyz, TEMP[248].xyzz, IMM[5].wwww 289: DP3 TEMP[250].x, TEMP[249].xyzz, TEMP[249].xyzz 290: MUL TEMP[251].x, TEMP[235].wwww, TEMP[235].wwww 291: FSLT TEMP[252].x, TEMP[250].xxxx, TEMP[251].xxxx 292: UIF TEMP[252].xxxx 293: MOV TEMP[253].x, IMM[8].xxxx 294: ATOMUADD TEMP[254].x, IMAGE[2], IMM[8].xxxx, IMM[0].yyyy, PIPE_FORMAT_R32_UINT 295: USLT TEMP[255].x, TEMP[254].xxxx, TEMP[179].xxxx 296: UIF TEMP[255].xxxx 297: MOV TEMP[256].x, CONST[2][0].wwww 298: UADD TEMP[257].x, CONST[2][0].wwww, TEMP[8].xxxx 299: MOV TEMP[258].x, TEMP[257].xxxx 300: ATOMXCHG TEMP[259].x, IMAGE[1], TEMP[257].xxxx, TEMP[254].xxxx, PIPE_FORMAT_R32_UINT 301: UMUL TEMP[260].x, TEMP[254].xxxx, IMM[4].wwww 302: MOV TEMP[261].x, TEMP[260].xxxx 303: STORE IMAGE[0], TEMP[260].xxxx, TEMP[230].xxxx, PIPE_FORMAT_R32_UINT 304: UMAD TEMP[262].x, TEMP[254].xxxx, IMM[4].wwww, IMM[0].yyyy 305: MOV TEMP[263].x, TEMP[262].xxxx 306: STORE IMAGE[0], TEMP[262].xxxx, TEMP[259].xxxx, PIPE_FORMAT_R32_UINT 307: ENDIF 308: ENDIF 309: UADD TEMP[230].x, TEMP[230].xxxx, IMM[0].yyyy 310: ENDLOOP 311: ENDIF 312: END radeonsi: Compiling shader 29 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" define amdgpu_cs void @main([11 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [80 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, <3 x i32>) #0 { main_body: %7 = extractelement <3 x i32> %6, i32 0 %8 = shl i32 %3, 2 %9 = add i32 %8, %7 %10 = extractelement <3 x i32> %6, i32 1 %11 = shl i32 %4, 2 %12 = add i32 %11, %10 %13 = extractelement <3 x i32> %6, i32 2 %14 = shl i32 %5, 2 %15 = add i32 %14, %13 %16 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 18, !amdgpu.uniform !0 %17 = load <4 x i32>, <4 x i32> addrspace(2)* %16, align 16, !invariant.load !0 %18 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 16) %19 = bitcast float %18 to i32 %20 = icmp ult i32 %9, %19 %21 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 20) %22 = bitcast float %21 to i32 %23 = icmp ult i32 %12, %22 %24 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 24) %25 = bitcast float %24 to i32 %26 = icmp ult i32 %15, %25 %27 = and i1 %23, %26 %28 = and i1 %20, %27 br i1 %28, label %if6, label %endif311 if6: ; preds = %main_body %29 = mul i32 %15, %22 %30 = add i32 %29, %12 %31 = mul i32 %30, %19 %32 = add i32 %31, %9 %33 = sitofp i32 %19 to float %34 = sitofp i32 %22 to float %35 = fdiv nsz float 1.000000e+00, %33, !fpmath !1 %36 = fdiv nsz float 1.000000e+00, %34, !fpmath !1 %37 = fmul nsz float %35, 2.000000e+00 %38 = fmul nsz float %36, 2.000000e+00 %39 = uitofp i32 %9 to float %40 = uitofp i32 %12 to float %41 = fmul nsz float %37, %39 %42 = fadd nsz float %41, -1.000000e+00 %43 = fmul nsz float %38, %40 %44 = fadd nsz float %43, -1.000000e+00 %45 = fsub nsz float -0.000000e+00, %44 %46 = add i32 %9, 1 %47 = add i32 %12, 1 %48 = uitofp i32 %46 to float %49 = uitofp i32 %47 to float %50 = fmul nsz float %37, %48 %51 = fadd nsz float %50, -1.000000e+00 %52 = fmul nsz float %38, %49 %53 = fadd nsz float %52, -1.000000e+00 %54 = fsub nsz float -0.000000e+00, %53 %55 = uitofp i32 %15 to float %56 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 56) %57 = fdiv nsz float 1.000000e+00, %56, !fpmath !1 %58 = fmul nsz float %57, %55 %59 = call nsz float @llvm.exp2.f32(float %58) #1 %60 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 52) %61 = fsub nsz float %59, %60 %62 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 48) %63 = fdiv nsz float 1.000000e+00, %62, !fpmath !1 %64 = fmul nsz float %61, %63 %65 = icmp eq i32 %15, %25 %66 = select i1 %65, float 2.000000e+06, float %64 %67 = add i32 %15, 1 %68 = uitofp i32 %67 to float %69 = fmul nsz float %57, %68 %70 = call nsz float @llvm.exp2.f32(float %69) #1 %71 = fsub nsz float %70, %60 %72 = fmul nsz float %71, %63 %73 = icmp eq i32 %67, %25 %74 = select i1 %73, float 2.000000e+06, float %72 %75 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 17, !amdgpu.uniform !0 %76 = load <4 x i32>, <4 x i32> addrspace(2)* %75, align 16, !invariant.load !0 %77 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 444) %78 = fcmp nsz olt float %77, 1.000000e+00 br i1 %78, label %if52, label %else58 if52: ; preds = %if6 %79 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 860) %80 = fadd nsz float %66, %79 %81 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 856) %82 = fmul nsz float %80, %81 %83 = fdiv nsz float 1.000000e+00, %82, !fpmath !1 br label %endif62 else58: ; preds = %if6 %84 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 424) %85 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 440) %86 = fmul nsz float %66, %84 %87 = fadd nsz float %86, %85 br label %endif62 endif62: ; preds = %else58, %if52 %TEMP51.x.0 = phi float [ %83, %if52 ], [ %87, %else58 ] %88 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 464) %89 = fmul nsz float %88, %45 %90 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 468) %91 = fmul nsz float %90, %45 %92 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 476) %93 = fmul nsz float %92, %45 %94 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 448) %95 = fmul nsz float %94, %42 %96 = fadd nsz float %95, %89 %97 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 452) %98 = fmul nsz float %97, %42 %99 = fadd nsz float %98, %91 %100 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 460) %101 = fmul nsz float %100, %42 %102 = fadd nsz float %101, %93 %103 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 480) %104 = fmul nsz float %103, %TEMP51.x.0 %105 = fadd nsz float %104, %96 %106 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 484) %107 = fmul nsz float %106, %TEMP51.x.0 %108 = fadd nsz float %107, %99 %109 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 492) %110 = fmul nsz float %109, %TEMP51.x.0 %111 = fadd nsz float %110, %102 %112 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 496) %113 = fadd nsz float %112, %105 %114 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 500) %115 = fadd nsz float %114, %108 %116 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 508) %117 = fadd nsz float %116, %111 %118 = fmul nsz float %88, %54 %119 = fmul nsz float %90, %54 %120 = fmul nsz float %92, %54 %121 = fmul nsz float %94, %51 %122 = fadd nsz float %121, %118 %123 = fmul nsz float %97, %51 %124 = fadd nsz float %123, %119 %125 = fmul nsz float %100, %51 %126 = fadd nsz float %125, %120 %127 = fadd nsz float %104, %122 %128 = fadd nsz float %107, %124 %129 = fadd nsz float %110, %126 %130 = fadd nsz float %112, %127 %131 = fadd nsz float %114, %128 %132 = fadd nsz float %116, %129 %133 = fadd nsz float %95, %118 %134 = fadd nsz float %98, %119 %135 = fadd nsz float %101, %120 %136 = fadd nsz float %104, %133 %137 = fadd nsz float %107, %134 %138 = fadd nsz float %110, %135 %139 = fadd nsz float %112, %136 %140 = fadd nsz float %114, %137 %141 = fadd nsz float %116, %138 %142 = fadd nsz float %121, %89 %143 = fadd nsz float %123, %91 %144 = fadd nsz float %125, %93 %145 = fadd nsz float %104, %142 %146 = fadd nsz float %107, %143 %147 = fadd nsz float %110, %144 %148 = fadd nsz float %112, %145 %149 = fadd nsz float %114, %146 %150 = fadd nsz float %116, %147 br i1 %78, label %if97, label %else103 if97: ; preds = %endif62 %151 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 860) %152 = fadd nsz float %74, %151 %153 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 856) %154 = fmul nsz float %152, %153 %155 = fdiv nsz float 1.000000e+00, %154, !fpmath !1 br label %endif107 else103: ; preds = %endif62 %156 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 424) %157 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 440) %158 = fmul nsz float %74, %156 %159 = fadd nsz float %158, %157 br label %endif107 endif107: ; preds = %else103, %if97 %TEMP92.x.0 = phi float [ %155, %if97 ], [ %159, %else103 ] %160 = fmul nsz float %103, %TEMP92.x.0 %161 = fadd nsz float %160, %96 %162 = fmul nsz float %106, %TEMP92.x.0 %163 = fadd nsz float %162, %99 %164 = fmul nsz float %109, %TEMP92.x.0 %165 = fadd nsz float %164, %102 %166 = fadd nsz float %112, %161 %167 = fadd nsz float %114, %163 %168 = fadd nsz float %116, %165 %169 = fadd nsz float %160, %122 %170 = fadd nsz float %162, %124 %171 = fadd nsz float %164, %126 %172 = fadd nsz float %112, %169 %173 = fadd nsz float %114, %170 %174 = fadd nsz float %116, %171 %175 = fadd nsz float %160, %133 %176 = fadd nsz float %162, %134 %177 = fadd nsz float %164, %135 %178 = fadd nsz float %112, %175 %179 = fadd nsz float %114, %176 %180 = fadd nsz float %116, %177 %181 = fadd nsz float %160, %142 %182 = fadd nsz float %162, %143 %183 = fadd nsz float %164, %144 %184 = fadd nsz float %112, %181 %185 = fadd nsz float %114, %182 %186 = fadd nsz float %116, %183 %187 = fdiv nsz float 1.000000e+00, %117, !fpmath !1 %188 = fmul nsz float %113, %187 %189 = fmul nsz float %115, %187 %190 = fdiv nsz float 1.000000e+00, %132, !fpmath !1 %191 = fmul nsz float %130, %190 %192 = fmul nsz float %131, %190 %193 = call nsz float @llvm.minnum.f32(float %188, float %191) #1 %194 = call nsz float @llvm.minnum.f32(float %189, float %192) #1 %195 = fdiv nsz float 1.000000e+00, %141, !fpmath !1 %196 = fmul nsz float %139, %195 %197 = fmul nsz float %140, %195 %198 = call nsz float @llvm.minnum.f32(float %193, float %196) #1 %199 = call nsz float @llvm.minnum.f32(float %194, float %197) #1 %200 = fdiv nsz float 1.000000e+00, %150, !fpmath !1 %201 = fmul nsz float %148, %200 %202 = fmul nsz float %149, %200 %203 = call nsz float @llvm.minnum.f32(float %198, float %201) #1 %204 = call nsz float @llvm.minnum.f32(float %199, float %202) #1 %205 = fdiv nsz float 1.000000e+00, %168, !fpmath !1 %206 = fmul nsz float %166, %205 %207 = fmul nsz float %167, %205 %208 = call nsz float @llvm.minnum.f32(float %203, float %206) #1 %209 = call nsz float @llvm.minnum.f32(float %204, float %207) #1 %210 = fdiv nsz float 1.000000e+00, %174, !fpmath !1 %211 = fmul nsz float %172, %210 %212 = fmul nsz float %173, %210 %213 = call nsz float @llvm.minnum.f32(float %208, float %211) #1 %214 = call nsz float @llvm.minnum.f32(float %209, float %212) #1 %215 = fdiv nsz float 1.000000e+00, %180, !fpmath !1 %216 = fmul nsz float %178, %215 %217 = fmul nsz float %179, %215 %218 = call nsz float @llvm.minnum.f32(float %213, float %216) #1 %219 = call nsz float @llvm.minnum.f32(float %214, float %217) #1 %220 = fdiv nsz float 1.000000e+00, %186, !fpmath !1 %221 = fmul nsz float %184, %220 %222 = fmul nsz float %185, %220 %223 = call nsz float @llvm.minnum.f32(float %218, float %221) #1 %224 = call nsz float @llvm.minnum.f32(float %219, float %222) #1 %225 = call nsz float @llvm.maxnum.f32(float %188, float %191) #1 %226 = call nsz float @llvm.maxnum.f32(float %189, float %192) #1 %227 = call nsz float @llvm.maxnum.f32(float %225, float %196) #1 %228 = call nsz float @llvm.maxnum.f32(float %226, float %197) #1 %229 = call nsz float @llvm.maxnum.f32(float %227, float %201) #1 %230 = call nsz float @llvm.maxnum.f32(float %228, float %202) #1 %231 = call nsz float @llvm.maxnum.f32(float %229, float %206) #1 %232 = call nsz float @llvm.maxnum.f32(float %230, float %207) #1 %233 = call nsz float @llvm.maxnum.f32(float %231, float %211) #1 %234 = call nsz float @llvm.maxnum.f32(float %232, float %212) #1 %235 = call nsz float @llvm.maxnum.f32(float %233, float %216) #1 %236 = call nsz float @llvm.maxnum.f32(float %234, float %217) #1 %237 = call nsz float @llvm.maxnum.f32(float %235, float %221) #1 %238 = call nsz float @llvm.maxnum.f32(float %236, float %222) #1 %239 = fadd nsz float %223, %237 %240 = fadd nsz float %224, %238 %241 = fadd nsz float %66, %74 %242 = fmul nsz float %239, 5.000000e-01 %243 = fmul nsz float %240, 5.000000e-01 %244 = fmul nsz float %241, 5.000000e-01 %245 = fsub nsz float %237, %242 %246 = fsub nsz float %238, %243 %247 = fsub nsz float %74, %244 %248 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 12) %249 = bitcast float %248 to i32 %250 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 28) %251 = bitcast float %250 to i32 %252 = mul i32 %249, %251 %253 = shl i32 %252, 1 %254 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 0) %255 = bitcast float %254 to i32 %256 = bitcast [80 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %257 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %256, i64 0, i64 33 %258 = load <4 x i32>, <4 x i32> addrspace(2)* %257, align 16 %259 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %256, i64 0, i64 27 %260 = load <4 x i32>, <4 x i32> addrspace(2)* %259, align 16 %261 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %256, i64 0, i64 29 %262 = load <4 x i32>, <4 x i32> addrspace(2)* %261, align 16 %263 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %256, i64 0, i64 31 %264 = load <4 x i32>, <4 x i32> addrspace(2)* %263, align 16 br label %loop196 loop196: ; preds = %endif259, %endif107 %265 = phi i32 [ 0, %endif107 ], [ %362, %endif259 ] %TEMP180.x.0 = phi float [ 0.000000e+00, %endif107 ], [ %363, %endif259 ] %266 = icmp ult i32 %265, %255 br i1 %266, label %endif201, label %endloop261 endif201: ; preds = %loop196 %267 = shl i32 %265, 2 %268 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %258, i32 %267, i32 0, i1 false, i1 false) #1 %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = extractelement <4 x float> %268, i32 3 %273 = fdiv nsz float 1.000000e+00, %272, !fpmath !1 %274 = or i32 %267, 3 %275 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %258, i32 %274, i32 0, i1 false, i1 false) %276 = fcmp nsz ogt float %275, -2.000000e+00 br i1 %276, label %if214, label %endif226 if214: ; preds = %endif201 %277 = or i32 %267, 2 %278 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %258, i32 %277, i32 0, i1 false, i1 false) #1 %279 = extractelement <4 x float> %278, i32 0 %280 = extractelement <4 x float> %278, i32 1 %281 = extractelement <4 x float> %278, i32 2 %282 = fmul nsz float %279, 5.000000e-01 %283 = fmul nsz float %280, 5.000000e-01 %284 = fmul nsz float %281, 5.000000e-01 %285 = fmul nsz float %282, %273 %286 = fmul nsz float %283, %273 %287 = fmul nsz float %284, %273 %288 = fsub nsz float %269, %285 %289 = fsub nsz float %270, %286 %290 = fsub nsz float %271, %287 %291 = fmul nsz float %273, 1.250000e+00 %292 = fmul nsz float %273, %273 %293 = fmul nsz float %292, %275 %294 = fmul nsz float %291, %273 %295 = fsub nsz float %294, %293 %296 = call nsz float @llvm.sqrt.f32(float %295) #1 br label %endif226 endif226: ; preds = %if214, %endif201 %TEMP189.x.0 = phi float [ %296, %if214 ], [ %273, %endif201 ] %TEMP187.z.0 = phi float [ %290, %if214 ], [ %271, %endif201 ] %TEMP187.y.0 = phi float [ %289, %if214 ], [ %270, %endif201 ] %TEMP187.x.0 = phi float [ %288, %if214 ], [ %269, %endif201 ] %297 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 928) %298 = fadd nsz float %TEMP187.x.0, %297 %299 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 932) %300 = fadd nsz float %TEMP187.y.0, %299 %301 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 936) %302 = fadd nsz float %TEMP187.z.0, %301 %303 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 144) %304 = fmul nsz float %303, %300 %305 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 148) %306 = fmul nsz float %305, %300 %307 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 152) %308 = fmul nsz float %307, %300 %309 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 128) %310 = fmul nsz float %309, %298 %311 = fadd nsz float %310, %304 %312 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 132) %313 = fmul nsz float %312, %298 %314 = fadd nsz float %313, %306 %315 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 136) %316 = fmul nsz float %315, %298 %317 = fadd nsz float %316, %308 %318 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 160) %319 = fmul nsz float %318, %302 %320 = fadd nsz float %319, %311 %321 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 164) %322 = fmul nsz float %321, %302 %323 = fadd nsz float %322, %314 %324 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 168) %325 = fmul nsz float %324, %302 %326 = fadd nsz float %325, %317 %327 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 176) %328 = fadd nsz float %327, %320 %329 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 180) %330 = fadd nsz float %329, %323 %331 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 184) %332 = fadd nsz float %331, %326 %333 = fsub nsz float %328, %242 %334 = fsub nsz float %330, %243 %335 = fsub nsz float %332, %244 %336 = call nsz float @llvm.fabs.f32(float %333) #4 %337 = call nsz float @llvm.fabs.f32(float %334) #4 %338 = call nsz float @llvm.fabs.f32(float %335) #4 %339 = fsub nsz float %336, %245 %340 = fsub nsz float %337, %246 %341 = fsub nsz float %338, %247 %342 = call nsz float @llvm.maxnum.f32(float %339, float 0.000000e+00) #1 %343 = call nsz float @llvm.maxnum.f32(float %340, float 0.000000e+00) #1 %344 = call nsz float @llvm.maxnum.f32(float %341, float 0.000000e+00) #1 %345 = fmul nsz float %342, %342 %346 = fmul nsz float %343, %343 %347 = fadd nsz float %346, %345 %348 = fmul nsz float %344, %344 %349 = fadd nsz float %347, %348 %350 = fmul nsz float %TEMP189.x.0, %TEMP189.x.0 %351 = fcmp nsz olt float %349, %350 br i1 %351, label %if245, label %endif259 if245: ; preds = %endif226 %352 = call i32 @llvm.amdgcn.buffer.atomic.add(i32 1, <4 x i32> %260, i32 0, i32 0, i1 false) #4 %353 = icmp ult i32 %352, %253 br i1 %353, label %if249, label %endif259 if249: ; preds = %if245 %354 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %352, <4 x i32> %262, i32 %32, i32 0, i1 false) #4 %355 = bitcast i32 %354 to float %356 = shl i32 %352, 1 %357 = insertelement <4 x float> undef, float %TEMP180.x.0, i32 0 %358 = shufflevector <4 x float> %357, <4 x float> undef, <4 x i32> zeroinitializer call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %358, <4 x i32> %264, i32 %356, i32 0, i1 true, i1 false) #6 %359 = or i32 %356, 1 %360 = insertelement <4 x float> undef, float %355, i32 0 %361 = shufflevector <4 x float> %360, <4 x float> undef, <4 x i32> zeroinitializer call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %361, <4 x i32> %264, i32 %359, i32 0, i1 true, i1 false) #6 br label %endif259 endif259: ; preds = %if245, %if249, %endif226 %362 = add i32 %265, 1 %363 = bitcast i32 %362 to float br label %loop196 endloop261: ; preds = %loop196 %364 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %17, i32 4) %365 = bitcast float %364 to i32 %366 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %1, i64 0, i64 19 %367 = load <4 x i32>, <4 x i32> addrspace(2)* %366, align 16 br label %loop263 loop263: ; preds = %endif308, %endloop261 %368 = phi i32 [ 0, %endloop261 ], [ %444, %endif308 ] %TEMP230.x.0 = phi float [ 0.000000e+00, %endloop261 ], [ %445, %endif308 ] %369 = icmp ult i32 %368, %365 br i1 %369, label %endif268, label %endif311 endif268: ; preds = %loop263 %370 = shl i32 %368, 4 %371 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %367, i32 %370) %372 = or i32 %370, 4 %373 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %367, i32 %372) %374 = or i32 %370, 8 %375 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %367, i32 %374) %376 = or i32 %370, 12 %377 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %367, i32 %376) %378 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 928) %379 = fadd nsz float %371, %378 %380 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 932) %381 = fadd nsz float %373, %380 %382 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 936) %383 = fadd nsz float %375, %382 %384 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 144) %385 = fmul nsz float %384, %381 %386 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 148) %387 = fmul nsz float %386, %381 %388 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 152) %389 = fmul nsz float %388, %381 %390 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 128) %391 = fmul nsz float %390, %379 %392 = fadd nsz float %391, %385 %393 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 132) %394 = fmul nsz float %393, %379 %395 = fadd nsz float %394, %387 %396 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 136) %397 = fmul nsz float %396, %379 %398 = fadd nsz float %397, %389 %399 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 160) %400 = fmul nsz float %399, %383 %401 = fadd nsz float %400, %392 %402 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 164) %403 = fmul nsz float %402, %383 %404 = fadd nsz float %403, %395 %405 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 168) %406 = fmul nsz float %405, %383 %407 = fadd nsz float %406, %398 %408 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 176) %409 = fadd nsz float %408, %401 %410 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 180) %411 = fadd nsz float %410, %404 %412 = call nsz float @llvm.SI.load.const.v4i32(<4 x i32> %76, i32 184) %413 = fadd nsz float %412, %407 %414 = fsub nsz float %409, %242 %415 = fsub nsz float %411, %243 %416 = fsub nsz float %413, %244 %417 = call nsz float @llvm.fabs.f32(float %414) #4 %418 = call nsz float @llvm.fabs.f32(float %415) #4 %419 = call nsz float @llvm.fabs.f32(float %416) #4 %420 = fsub nsz float %417, %245 %421 = fsub nsz float %418, %246 %422 = fsub nsz float %419, %247 %423 = call nsz float @llvm.maxnum.f32(float %420, float 0.000000e+00) #1 %424 = call nsz float @llvm.maxnum.f32(float %421, float 0.000000e+00) #1 %425 = call nsz float @llvm.maxnum.f32(float %422, float 0.000000e+00) #1 %426 = fmul nsz float %423, %423 %427 = fmul nsz float %424, %424 %428 = fadd nsz float %427, %426 %429 = fmul nsz float %425, %425 %430 = fadd nsz float %428, %429 %431 = fmul nsz float %377, %377 %432 = fcmp nsz olt float %430, %431 br i1 %432, label %if292, label %endif308 if292: ; preds = %endif268 %433 = call i32 @llvm.amdgcn.buffer.atomic.add(i32 1, <4 x i32> %260, i32 0, i32 0, i1 false) #4 %434 = icmp ult i32 %433, %253 br i1 %434, label %if296, label %endif308 if296: ; preds = %if292 %435 = add i32 %32, %249 %436 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %433, <4 x i32> %262, i32 %435, i32 0, i1 false) #4 %437 = bitcast i32 %436 to float %438 = shl i32 %433, 1 %439 = insertelement <4 x float> undef, float %TEMP230.x.0, i32 0 %440 = shufflevector <4 x float> %439, <4 x float> undef, <4 x i32> zeroinitializer call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %440, <4 x i32> %264, i32 %438, i32 0, i1 true, i1 false) #6 %441 = or i32 %438, 1 %442 = insertelement <4 x float> undef, float %437, i32 0 %443 = shufflevector <4 x float> %442, <4 x float> undef, <4 x i32> zeroinitializer call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %443, <4 x i32> %264, i32 %441, i32 0, i1 true, i1 false) #6 br label %endif308 endif308: ; preds = %if292, %if296, %endif268 %444 = add i32 %368, 1 %445 = bitcast i32 %444 to float br label %loop263 endif311: ; preds = %loop263, %main_body ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 ; Function Attrs: nounwind readnone speculatable declare float @llvm.exp2.f32(float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.minnum.f32(float, float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.maxnum.f32(float, float) #2 ; Function Attrs: nounwind readonly declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #3 ; Function Attrs: nounwind readnone speculatable declare float @llvm.sqrt.f32(float) #2 ; Function Attrs: nounwind readnone speculatable declare float @llvm.fabs.f32(float) #2 ; Function Attrs: nounwind declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #4 ; Function Attrs: nounwind declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #4 ; Function Attrs: nounwind writeonly declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #5 ; Function Attrs: nounwind readonly declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #3 attributes #0 = { "amdgpu-max-work-group-size"="64" "no-signed-zeros-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind readonly } attributes #4 = { nounwind } attributes #5 = { nounwind writeonly } attributes #6 = { inaccessiblememonly nounwind } !0 = !{} !1 = !{float 2.500000e+00}