ATTENTION: default value of option vblank_mode overridden by environment. FRAG DCL IN[0..1], ARRAY(1), GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL OUT[4], COLOR[4] DCL OUT[5], COLOR[5] DCL OUT[6], COLOR[6] DCL OUT[7], COLOR[7] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL CONST[7] DCL CONST[1][0..15] DCL CONST[2][0..8] DCL CONST[3][0..1] DCL CONST[4][0..223] DCL CONST[5][0..15] DCL TEMP[0..26], LOCAL DCL TEMP[27..34], ARRAY(1), LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, -2.0000} IMM[1] FLT32 { -1.0000, 1.0000, 7.9688, 32.0000} IMM[2] UINT32 {3, 1312, 416, 432} IMM[3] FLT32 { 255.0000, 340282346638528859811704183484516925440.0000, 0.5000, -340282346638528859811704183484516925440.0000} IMM[4] UINT32 {656, 1408, 192, 224} IMM[5] FLT32 { 1.0000, -0.5000, 0.8750, 0.4500} IMM[6] FLT32 { 0.2000, 0.7000, 0.1000, 0.4000} IMM[7] FLT32 { 0.0200, 0.0000, 0.0300, 0.0120} IMM[8] UINT32 {208, 0, 0, 0} 0: MUL TEMP[0].xyw, IMM[0].xxyy, IN[0].xyxx 1: MOV TEMP[1].xy, TEMP[0].xyyy 2: MOV TEMP[1].w, TEMP[0].wwww 3: TXL TEMP[1].x, TEMP[1], SAMP[1], 2D 4: MOV TEMP[1].x, TEMP[1].xxxx 5: MOV TEMP[2].xy, TEMP[0].xyyy 6: MOV TEMP[2].w, TEMP[0].wwww 7: TXL TEMP[2].xy, TEMP[2], SAMP[4], 2D 8: MAD TEMP[0].xy, TEMP[2].xyyy, IMM[0].zwww, IMM[1].xyyy 9: MOV TEMP[2].y, IN[0].yyyy 10: ADD TEMP[3].xy, CONST[4][82].zwww, IN[0].xyyy 11: MUL TEMP[4].xy, IMM[0].xyyy, IN[0].xxxx 12: MOV TEMP[3].zw, TEMP[4].yyxy 13: MOV TEMP[2].x, TEMP[3].xxww 14: MOV TEMP[5].xy, TEMP[2].xyyy 15: MOV TEMP[5].w, TEMP[4].yyyy 16: TXL TEMP[5].x, TEMP[5], SAMP[1], 2D 17: MOV TEMP[1].z, TEMP[5].xxxx 18: MOV TEMP[5].xy, TEMP[3].zyyy 19: MOV TEMP[5].w, TEMP[4].yyyy 20: TXL TEMP[5].x, TEMP[5], SAMP[1], 2D 21: MOV TEMP[6].xy, TEMP[3].xyyy 22: MOV TEMP[6].w, TEMP[4].yyyy 23: TXL TEMP[4].x, TEMP[6], SAMP[1], 2D 24: MOV TEMP[1].w, TEMP[4].xxxx 25: MOV TEMP[1].y, TEMP[5].xxxx 26: MUL TEMP[2], TEMP[1], IMM[1].zzzz 27: FRC TEMP[4], TEMP[2] 28: ADD TEMP[2], TEMP[2], -TEMP[4] 29: MUL TEMP[3], TEMP[2], IMM[1].wwww 30: MAD TEMP[1], TEMP[1], IMM[3].xxxx, -TEMP[3] 31: ADD TEMP[1], TEMP[1], TEMP[1] 32: MAD TEMP[3].xy, TEMP[1].xxxx, CONST[4][26].zwww, CONST[4][26].xyyy 33: MOV TEMP[4].xy, TEMP[3].xyyy 34: MOV TEMP[4].w, IMM[0].yyyy 35: TXL TEMP[4], TEMP[4], SAMP[6], 2D 36: ADD TEMP[3].xy, TEMP[3].xyyy, CONST[4][26].zwww 37: MUL TEMP[5].xy, TEMP[4].zzzz, CONST[4][27].zxxx 38: MUL TEMP[6].xy, TEMP[4].zzzz, CONST[4][41].xyyy 39: MUL TEMP[6].xy, TEMP[6].xyyy, CONST[4][27].zxxx 40: FSEQ TEMP[7].x, TEMP[6].xxxx, IMM[0].yyyy 41: UCMP TEMP[8].x, TEMP[7].xxxx, IMM[3].yyyy, TEMP[8].xxxx 42: RCP TEMP[9].x, TEMP[6].xxxx 43: NOT TEMP[7].x, TEMP[7].xxxx 44: UCMP TEMP[8].x, TEMP[7].xxxx, TEMP[9].xxxx, TEMP[8].xxxx 45: MOV TEMP[7].x, TEMP[8].xxxx 46: FSEQ TEMP[8].x, TEMP[6].yyyy, IMM[0].yyyy 47: UCMP TEMP[9].x, TEMP[8].xxxx, IMM[3].yyyy, TEMP[9].xxxx 48: RCP TEMP[10].x, TEMP[6].yyyy 49: NOT TEMP[8].x, TEMP[8].xxxx 50: UCMP TEMP[9].x, TEMP[8].xxxx, TEMP[10].xxxx, TEMP[9].xxxx 51: MOV TEMP[7].y, TEMP[9].xxxx 52: MUL TEMP[6].xy, TEMP[4].wwww, TEMP[6].xyyy 53: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[3].zzzz 54: ADD TEMP[8], -TEMP[2].xxyy, IMM[0].xzxz 55: ADD TEMP[2], -TEMP[2].zzww, IMM[0].xzxz 56: ADD TEMP[9].x, CONST[4][88].xxxx, CONST[4][88].xxxx 57: MUL TEMP[9].x, TEMP[9].xxxx, IN[1].zzzz 58: MOV TEMP[9].z, TEMP[9].xxxx 59: MUL TEMP[9].xy, CONST[4][88].xxxx, IN[1].xyyy 60: MUL TEMP[10].xy, TEMP[9].xzzz, IMM[5].xyyy 61: MOV TEMP[11].z, TEMP[10].yxyy 62: MUL TEMP[12].xy, TEMP[9].yzzz, IMM[5].xyyy 63: MOV TEMP[13].z, TEMP[12].yxyy 64: MUL TEMP[14].xy, TEMP[9].xyyy, IMM[1].yxxx 65: FSLT TEMP[15].x, -TEMP[0].yyyy, IMM[0].yyyy 66: UCMP TEMP[16].x, TEMP[15].xxxx, IMM[1].xxxx, TEMP[16].xxxx 67: NOT TEMP[15].x, TEMP[15].xxxx 68: UCMP TEMP[16].x, TEMP[15].xxxx, IMM[0].xxxx, TEMP[16].xxxx 69: MOV TEMP[9].x, TEMP[16].xxxx 70: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[16].xxxx 71: FSLT TEMP[10].x, TEMP[0].xxxx, IMM[0].yyyy 72: UCMP TEMP[15].x, TEMP[10].xxxx, IMM[1].xxxx, TEMP[15].xxxx 73: NOT TEMP[10].x, TEMP[10].xxxx 74: UCMP TEMP[15].x, TEMP[10].xxxx, IMM[0].xxxx, TEMP[15].xxxx 75: MOV TEMP[10].y, TEMP[15].xxxx 76: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[15].xxxx 77: ABS TEMP[12], TEMP[8].yyww 78: FSLT TEMP[12], -TEMP[12], IMM[0].yyyy 79: ABS TEMP[15], TEMP[8].xxzz 80: FSLT TEMP[15], -TEMP[15], IMM[0].yyyy 81: UCMP TEMP[15], TEMP[15], TEMP[14].xyxy, TEMP[11].xzxz 82: UCMP TEMP[12], TEMP[12], TEMP[15], TEMP[13].xzxz 83: ABS TEMP[15], TEMP[2].yyww 84: FSLT TEMP[15], -TEMP[15], IMM[0].yyyy 85: ABS TEMP[16], TEMP[2].xxzz 86: FSLT TEMP[16], -TEMP[16], IMM[0].yyyy 87: UCMP TEMP[14], TEMP[16], TEMP[14].xyxy, TEMP[11].xzxz 88: UCMP TEMP[14], TEMP[15], TEMP[14], TEMP[13].xzxz 89: DDX TEMP[15], TEMP[12] 90: MUL TEMP[16].xy, TEMP[6].xyyy, TEMP[15].xyyy 91: DP2 TEMP[16].x, TEMP[16].xyyy, TEMP[16].xyyy 92: MUL TEMP[17], CONST[7].xxxx, TEMP[12] 93: DDY TEMP[17], TEMP[17] 94: MOV TEMP[17], -TEMP[17] 95: MUL TEMP[18].xy, TEMP[6].xyyy, TEMP[17].xyyy 96: DP2 TEMP[1].x, TEMP[18].xyyy, TEMP[18].xyyy 97: MAX TEMP[16].x, TEMP[16].xxxx, TEMP[1].xxxx 98: FSEQ TEMP[18].x, TEMP[16].xxxx, IMM[0].yyyy 99: UCMP TEMP[19].x, TEMP[18].xxxx, IMM[3].yyyy, TEMP[19].xxxx 100: ABS TEMP[16].x, TEMP[16].xxxx 101: RSQ TEMP[16].x, TEMP[16].xxxx 102: NOT TEMP[18].x, TEMP[18].xxxx 103: UCMP TEMP[19].x, TEMP[18].xxxx, TEMP[16].xxxx, TEMP[19].xxxx 104: FSEQ TEMP[16].x, TEMP[19].xxxx, IMM[0].yyyy 105: UCMP TEMP[18].x, TEMP[16].xxxx, IMM[3].yyyy, TEMP[18].xxxx 106: RCP TEMP[19].x, TEMP[19].xxxx 107: NOT TEMP[16].x, TEMP[16].xxxx 108: UCMP TEMP[18].x, TEMP[16].xxxx, TEMP[19].xxxx, TEMP[18].xxxx 109: FSEQ TEMP[16].x, TEMP[18].xxxx, IMM[0].yyyy 110: UCMP TEMP[19].x, TEMP[16].xxxx, IMM[3].wwww, TEMP[19].xxxx 111: ABS TEMP[18].x, TEMP[18].xxxx 112: LG2 TEMP[18].x, TEMP[18].xxxx 113: NOT TEMP[16].x, TEMP[16].xxxx 114: UCMP TEMP[19].x, TEMP[16].xxxx, TEMP[18].xxxx, TEMP[19].xxxx 115: MUL TEMP[16].x, TEMP[19].xxxx, IMM[5].zzzz 116: MIN TEMP[1].x, TEMP[16].xxxx, CONST[4][41].zzzz 117: MAX TEMP[16].x, TEMP[1].xxxx, IMM[0].yyyy 118: FRC TEMP[19].x, -TEMP[16].xxxx 119: ADD TEMP[19].x, TEMP[16].xxxx, TEMP[19].xxxx 120: EX2 TEMP[19].x, TEMP[19].xxxx 121: MUL TEMP[20].xy, TEMP[7].xyyy, TEMP[19].xxxx 122: MAD TEMP[7].xy, TEMP[7].xyyy, -TEMP[19].xxxx, IMM[0].xxxx 123: MUL TEMP[19].xy, TEMP[4].wwww, TEMP[12].xyyy 124: FRC TEMP[19].xy, TEMP[19].xyyy 125: MAX TEMP[19].xy, TEMP[19].xyyy, TEMP[20].xyyy 126: MIN TEMP[19].xy, TEMP[7].xyyy, TEMP[19].xyyy 127: MAD TEMP[18].xy, TEMP[19].xyyy, TEMP[5].xyyy, TEMP[4].xyyy 128: MOV TEMP[19].xy, TEMP[18].xyyy 129: MOV TEMP[19].w, TEMP[16].xxxx 130: TXL TEMP[19].xyz, TEMP[19], SAMP[2], 2D 131: MOV TEMP[18].xy, TEMP[18].xyyy 132: MOV TEMP[18].w, TEMP[16].xxxx 133: TXL TEMP[16].xyw, TEMP[18], SAMP[3], 2D 134: MAD TEMP[7].xy, TEMP[1].yyyy, CONST[4][26].zwww, CONST[4][26].xyyy 135: MOV TEMP[18].xy, TEMP[7].xyyy 136: MOV TEMP[18].w, IMM[0].yyyy 137: TXL TEMP[18], TEMP[18], SAMP[6], 2D 138: ADD TEMP[7].xy, TEMP[7].xyyy, CONST[4][26].zwww 139: MUL TEMP[1].xy, TEMP[18].zzzz, CONST[4][27].zxxx 140: MUL TEMP[13].xy, TEMP[18].zzzz, CONST[4][41].xyyy 141: MUL TEMP[13].xy, TEMP[13].xyyy, CONST[4][27].zxxx 142: FSEQ TEMP[20].x, TEMP[13].xxxx, IMM[0].yyyy 143: UCMP TEMP[21].x, TEMP[20].xxxx, IMM[3].yyyy, TEMP[21].xxxx 144: RCP TEMP[22].x, TEMP[13].xxxx 145: NOT TEMP[20].x, TEMP[20].xxxx 146: UCMP TEMP[21].x, TEMP[20].xxxx, TEMP[22].xxxx, TEMP[21].xxxx 147: MOV TEMP[6].x, TEMP[21].xxxx 148: FSEQ TEMP[20].x, TEMP[13].yyyy, IMM[0].yyyy 149: UCMP TEMP[21].x, TEMP[20].xxxx, IMM[3].yyyy, TEMP[21].xxxx 150: RCP TEMP[22].x, TEMP[13].yyyy 151: NOT TEMP[20].x, TEMP[20].xxxx 152: UCMP TEMP[21].x, TEMP[20].xxxx, TEMP[22].xxxx, TEMP[21].xxxx 153: MOV TEMP[6].y, TEMP[21].xxxx 154: MUL TEMP[13].xy, TEMP[18].wwww, TEMP[13].xyyy 155: MUL TEMP[6].xy, TEMP[6].xyyy, IMM[3].zzzz 156: MUL TEMP[15].xy, TEMP[15].zwww, TEMP[13].xyyy 157: MUL TEMP[13].xy, TEMP[17].zwww, TEMP[13].xyyy 158: DP2 TEMP[20].x, TEMP[13].xyyy, TEMP[13].xyyy 159: DP2 TEMP[15].x, TEMP[15].xyyy, TEMP[15].xyyy 160: MAX TEMP[15].x, TEMP[15].xxxx, TEMP[20].xxxx 161: FSEQ TEMP[20].x, TEMP[15].xxxx, IMM[0].yyyy 162: UCMP TEMP[21].x, TEMP[20].xxxx, IMM[3].yyyy, TEMP[21].xxxx 163: ABS TEMP[15].x, TEMP[15].xxxx 164: RSQ TEMP[15].x, TEMP[15].xxxx 165: NOT TEMP[20].x, TEMP[20].xxxx 166: UCMP TEMP[21].x, TEMP[20].xxxx, TEMP[15].xxxx, TEMP[21].xxxx 167: FSEQ TEMP[15].x, TEMP[21].xxxx, IMM[0].yyyy 168: UCMP TEMP[20].x, TEMP[15].xxxx, IMM[3].yyyy, TEMP[20].xxxx 169: RCP TEMP[21].x, TEMP[21].xxxx 170: NOT TEMP[15].x, TEMP[15].xxxx 171: UCMP TEMP[20].x, TEMP[15].xxxx, TEMP[21].xxxx, TEMP[20].xxxx 172: FSEQ TEMP[15].x, TEMP[20].xxxx, IMM[0].yyyy 173: UCMP TEMP[21].x, TEMP[15].xxxx, IMM[3].wwww, TEMP[21].xxxx 174: ABS TEMP[20].x, TEMP[20].xxxx 175: LG2 TEMP[20].x, TEMP[20].xxxx 176: NOT TEMP[15].x, TEMP[15].xxxx 177: UCMP TEMP[21].x, TEMP[15].xxxx, TEMP[20].xxxx, TEMP[21].xxxx 178: MUL TEMP[15].x, TEMP[21].xxxx, IMM[5].zzzz 179: MIN TEMP[15].x, TEMP[15].xxxx, CONST[4][41].zzzz 180: MAX TEMP[15].x, TEMP[15].xxxx, IMM[0].yyyy 181: FRC TEMP[20].x, -TEMP[15].xxxx 182: ADD TEMP[20].x, TEMP[15].xxxx, TEMP[20].xxxx 183: EX2 TEMP[20].x, TEMP[20].xxxx 184: MUL TEMP[17].xy, TEMP[6].xyyy, TEMP[20].xxxx 185: MAD TEMP[6].xy, TEMP[6].xyyy, -TEMP[20].xxxx, IMM[0].xxxx 186: MUL TEMP[12].xy, TEMP[12].zwww, TEMP[18].wwww 187: FRC TEMP[12].xy, TEMP[12].xyyy 188: MAX TEMP[12].xy, TEMP[12].xyyy, TEMP[17].xyyy 189: MIN TEMP[17].xy, TEMP[6].xyyy, TEMP[12].xyyy 190: MAD TEMP[13].xy, TEMP[17].xyyy, TEMP[1].xyyy, TEMP[18].xyyy 191: MOV TEMP[12].xy, TEMP[13].xyyy 192: MOV TEMP[12].w, TEMP[15].xxxx 193: TXL TEMP[12].xyz, TEMP[12], SAMP[2], 2D 194: MOV TEMP[13].xy, TEMP[13].xyyy 195: MOV TEMP[13].w, TEMP[15].xxxx 196: TXL TEMP[13].xyw, TEMP[13], SAMP[3], 2D 197: MUL TEMP[1].xy, CONST[4][82].xyyy, IN[0].xyyy 198: FRC TEMP[1].xy, TEMP[1].xyyy 199: LRP TEMP[17].xyz, TEMP[1].yyyy, TEMP[12].xyzz, TEMP[19].xyzz 200: MAD TEMP[4].xy, TEMP[1].zzzz, CONST[4][26].zwww, CONST[4][26].xyyy 201: MAD TEMP[6].xy, TEMP[1].wwww, CONST[4][26].zwww, CONST[4][26].xyyy 202: MOV TEMP[12].xy, TEMP[4].xyyy 203: MOV TEMP[12].w, IMM[0].yyyy 204: TXL TEMP[12], TEMP[12], SAMP[6], 2D 205: ADD TEMP[4].xy, TEMP[4].xyyy, CONST[4][26].zwww 206: MUL TEMP[15].xy, TEMP[12].zzzz, CONST[4][27].zxxx 207: MUL TEMP[18].xy, TEMP[12].zzzz, CONST[4][41].xyyy 208: MUL TEMP[18].xy, TEMP[18].xyyy, CONST[4][27].zxxx 209: FSEQ TEMP[19].x, TEMP[18].xxxx, IMM[0].yyyy 210: UCMP TEMP[20].x, TEMP[19].xxxx, IMM[3].yyyy, TEMP[20].xxxx 211: RCP TEMP[21].x, TEMP[18].xxxx 212: NOT TEMP[19].x, TEMP[19].xxxx 213: UCMP TEMP[20].x, TEMP[19].xxxx, TEMP[21].xxxx, TEMP[20].xxxx 214: MOV TEMP[19].x, TEMP[20].xxxx 215: FSEQ TEMP[20].x, TEMP[18].yyyy, IMM[0].yyyy 216: UCMP TEMP[21].x, TEMP[20].xxxx, IMM[3].yyyy, TEMP[21].xxxx 217: RCP TEMP[22].x, TEMP[18].yyyy 218: NOT TEMP[20].x, TEMP[20].xxxx 219: UCMP TEMP[21].x, TEMP[20].xxxx, TEMP[22].xxxx, TEMP[21].xxxx 220: MOV TEMP[19].y, TEMP[21].xxxx 221: MUL TEMP[18].xy, TEMP[12].wwww, TEMP[18].xyyy 222: MUL TEMP[20].xy, TEMP[19].xyyy, IMM[3].zzzz 223: DDX TEMP[21], TEMP[14] 224: MUL TEMP[19].xy, TEMP[18].xyyy, TEMP[21].xyyy 225: DP2 TEMP[22].x, TEMP[19].xyyy, TEMP[19].xyyy 226: MUL TEMP[23], CONST[7].xxxx, TEMP[14] 227: DDY TEMP[23], TEMP[23] 228: MOV TEMP[23], -TEMP[23] 229: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[23].xyyy 230: DP2 TEMP[24].x, TEMP[18].xyyy, TEMP[18].xyyy 231: MAX TEMP[22].x, TEMP[22].xxxx, TEMP[24].xxxx 232: FSEQ TEMP[24].x, TEMP[22].xxxx, IMM[0].yyyy 233: UCMP TEMP[25].x, TEMP[24].xxxx, IMM[3].yyyy, TEMP[25].xxxx 234: ABS TEMP[22].x, TEMP[22].xxxx 235: RSQ TEMP[22].x, TEMP[22].xxxx 236: NOT TEMP[24].x, TEMP[24].xxxx 237: UCMP TEMP[25].x, TEMP[24].xxxx, TEMP[22].xxxx, TEMP[25].xxxx 238: FSEQ TEMP[22].x, TEMP[25].xxxx, IMM[0].yyyy 239: UCMP TEMP[24].x, TEMP[22].xxxx, IMM[3].yyyy, TEMP[24].xxxx 240: RCP TEMP[25].x, TEMP[25].xxxx 241: NOT TEMP[22].x, TEMP[22].xxxx 242: UCMP TEMP[24].x, TEMP[22].xxxx, TEMP[25].xxxx, TEMP[24].xxxx 243: FSEQ TEMP[22].x, TEMP[24].xxxx, IMM[0].yyyy 244: UCMP TEMP[25].x, TEMP[22].xxxx, IMM[3].wwww, TEMP[25].xxxx 245: ABS TEMP[24].x, TEMP[24].xxxx 246: LG2 TEMP[24].x, TEMP[24].xxxx 247: NOT TEMP[22].x, TEMP[22].xxxx 248: UCMP TEMP[25].x, TEMP[22].xxxx, TEMP[24].xxxx, TEMP[25].xxxx 249: MUL TEMP[22].x, TEMP[25].xxxx, IMM[5].zzzz 250: MIN TEMP[22].x, TEMP[22].xxxx, CONST[4][41].zzzz 251: MAX TEMP[22].x, TEMP[22].xxxx, IMM[0].yyyy 252: FRC TEMP[24].x, -TEMP[22].xxxx 253: ADD TEMP[24].x, TEMP[22].xxxx, TEMP[24].xxxx 254: EX2 TEMP[24].x, TEMP[24].xxxx 255: MUL TEMP[18].xy, TEMP[20].xyyy, TEMP[24].xxxx 256: MAD TEMP[20].xy, TEMP[20].xyyy, -TEMP[24].xxxx, IMM[0].xxxx 257: MUL TEMP[11].xy, TEMP[14].xyyy, TEMP[12].wwww 258: FRC TEMP[11].xy, TEMP[11].xyyy 259: MAX TEMP[24].xy, TEMP[11].xyyy, TEMP[18].xyyy 260: MIN TEMP[11].xy, TEMP[20].xyyy, TEMP[24].xyyy 261: MAD TEMP[12].xy, TEMP[11].xyyy, TEMP[15].xyyy, TEMP[12].xyyy 262: MOV TEMP[15].xy, TEMP[12].xyyy 263: MOV TEMP[15].w, TEMP[22].xxxx 264: TXL TEMP[15].xyz, TEMP[15], SAMP[2], 2D 265: MOV TEMP[12].xy, TEMP[12].xyyy 266: MOV TEMP[12].w, TEMP[22].xxxx 267: TXL TEMP[12].xyw, TEMP[12], SAMP[3], 2D 268: MOV TEMP[20].xy, TEMP[6].xyyy 269: MOV TEMP[20].w, IMM[0].yyyy 270: TXL TEMP[20], TEMP[20], SAMP[6], 2D 271: ADD TEMP[6].xy, TEMP[6].xyyy, CONST[4][26].zwww 272: MUL TEMP[22].xy, TEMP[20].zzzz, CONST[4][27].zxxx 273: MUL TEMP[11].xy, TEMP[20].zzzz, CONST[4][41].xyyy 274: MUL TEMP[11].xy, TEMP[11].xyyy, CONST[4][27].zxxx 275: FSEQ TEMP[24].x, TEMP[11].xxxx, IMM[0].yyyy 276: UCMP TEMP[25].x, TEMP[24].xxxx, IMM[3].yyyy, TEMP[25].xxxx 277: RCP TEMP[26].x, TEMP[11].xxxx 278: NOT TEMP[24].x, TEMP[24].xxxx 279: UCMP TEMP[25].x, TEMP[24].xxxx, TEMP[26].xxxx, TEMP[25].xxxx 280: MOV TEMP[19].x, TEMP[25].xxxx 281: FSEQ TEMP[24].x, TEMP[11].yyyy, IMM[0].yyyy 282: UCMP TEMP[25].x, TEMP[24].xxxx, IMM[3].yyyy, TEMP[25].xxxx 283: RCP TEMP[26].x, TEMP[11].yyyy 284: NOT TEMP[24].x, TEMP[24].xxxx 285: UCMP TEMP[25].x, TEMP[24].xxxx, TEMP[26].xxxx, TEMP[25].xxxx 286: MOV TEMP[19].y, TEMP[25].xxxx 287: MUL TEMP[11].xy, TEMP[20].wwww, TEMP[11].xyyy 288: MUL TEMP[19].xy, TEMP[19].xyyy, IMM[3].zzzz 289: MUL TEMP[21].xy, TEMP[21].zwww, TEMP[11].xyyy 290: MUL TEMP[11].xy, TEMP[23].zwww, TEMP[11].xyyy 291: DP2 TEMP[24].x, TEMP[11].xyyy, TEMP[11].xyyy 292: DP2 TEMP[21].x, TEMP[21].xyyy, TEMP[21].xyyy 293: MAX TEMP[21].x, TEMP[21].xxxx, TEMP[24].xxxx 294: FSEQ TEMP[24].x, TEMP[21].xxxx, IMM[0].yyyy 295: UCMP TEMP[25].x, TEMP[24].xxxx, IMM[3].yyyy, TEMP[25].xxxx 296: ABS TEMP[21].x, TEMP[21].xxxx 297: RSQ TEMP[21].x, TEMP[21].xxxx 298: NOT TEMP[24].x, TEMP[24].xxxx 299: UCMP TEMP[25].x, TEMP[24].xxxx, TEMP[21].xxxx, TEMP[25].xxxx 300: FSEQ TEMP[21].x, TEMP[25].xxxx, IMM[0].yyyy 301: UCMP TEMP[24].x, TEMP[21].xxxx, IMM[3].yyyy, TEMP[24].xxxx 302: RCP TEMP[25].x, TEMP[25].xxxx 303: NOT TEMP[21].x, TEMP[21].xxxx 304: UCMP TEMP[24].x, TEMP[21].xxxx, TEMP[25].xxxx, TEMP[24].xxxx 305: FSEQ TEMP[21].x, TEMP[24].xxxx, IMM[0].yyyy 306: UCMP TEMP[25].x, TEMP[21].xxxx, IMM[3].wwww, TEMP[25].xxxx 307: ABS TEMP[24].x, TEMP[24].xxxx 308: LG2 TEMP[24].x, TEMP[24].xxxx 309: NOT TEMP[21].x, TEMP[21].xxxx 310: UCMP TEMP[25].x, TEMP[21].xxxx, TEMP[24].xxxx, TEMP[25].xxxx 311: MUL TEMP[21].x, TEMP[25].xxxx, IMM[5].zzzz 312: MIN TEMP[21].x, TEMP[21].xxxx, CONST[4][41].zzzz 313: MAX TEMP[21].x, TEMP[21].xxxx, IMM[0].yyyy 314: FRC TEMP[24].x, -TEMP[21].xxxx 315: ADD TEMP[24].x, TEMP[21].xxxx, TEMP[24].xxxx 316: EX2 TEMP[24].x, TEMP[24].xxxx 317: MUL TEMP[11].xy, TEMP[19].xyyy, TEMP[24].xxxx 318: MAD TEMP[19].xy, TEMP[19].xyyy, -TEMP[24].xxxx, IMM[0].xxxx 319: MUL TEMP[14].xy, TEMP[14].zwww, TEMP[20].wwww 320: FRC TEMP[14].xy, TEMP[14].xyyy 321: MAX TEMP[14].xy, TEMP[14].xyyy, TEMP[11].xyyy 322: MIN TEMP[11].xy, TEMP[19].xyyy, TEMP[14].xyyy 323: MAD TEMP[23].xy, TEMP[11].xyyy, TEMP[22].xyyy, TEMP[20].xyyy 324: MOV TEMP[14].xy, TEMP[23].xyyy 325: MOV TEMP[14].w, TEMP[21].xxxx 326: TXL TEMP[14].xyz, TEMP[14], SAMP[2], 2D 327: MOV TEMP[19].xy, TEMP[23].xyyy 328: MOV TEMP[19].w, TEMP[21].xxxx 329: TXL TEMP[19].xyw, TEMP[19], SAMP[3], 2D 330: LRP TEMP[23].xyz, TEMP[1].yyyy, TEMP[19].xyww, TEMP[12].xyww 331: LRP TEMP[18].xyz, TEMP[1].yyyy, TEMP[14].xyzz, TEMP[15].xyzz 332: LRP TEMP[11].xyz, TEMP[1].xxxx, TEMP[18].xyzz, TEMP[17].xyzz 333: DP3 TEMP[12].x, TEMP[11].xyzz, IMM[6].xyzz 334: MOV TEMP[14].xy, TEMP[3].xyyy 335: MOV TEMP[14].w, IMM[0].yyyy 336: TXL TEMP[14].xy, TEMP[14], SAMP[6], 2D 337: MOV TEMP[15].xy, TEMP[7].xyyy 338: MOV TEMP[15].w, IMM[0].yyyy 339: TXL TEMP[15].xy, TEMP[15], SAMP[6], 2D 340: LRP TEMP[17].xy, TEMP[1].yyyy, TEMP[15].xyyy, TEMP[14].xyyy 341: MOV TEMP[14].xy, TEMP[4].xyyy 342: MOV TEMP[14].w, IMM[0].yyyy 343: TXL TEMP[14].xy, TEMP[14], SAMP[6], 2D 344: MOV TEMP[6].xy, TEMP[6].xyyy 345: MOV TEMP[6].w, IMM[0].yyyy 346: TXL TEMP[6].xy, TEMP[6], SAMP[6], 2D 347: LRP TEMP[7].xy, TEMP[1].yyyy, TEMP[6].xyyy, TEMP[14].xyyy 348: LRP TEMP[3].xy, TEMP[1].xxxx, TEMP[7].xyyy, TEMP[17].xyyy 349: ADD TEMP[6].x, TEMP[12].xxxx, -TEMP[3].xxxx 350: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[6].xxxx 351: MOV TEMP[28].xyz, TEMP[6].xxxx 352: MOV_SAT TEMP[28].xyz, TEMP[28].xyzz 353: MOV TEMP[6].xy, IN[0].xyyy 354: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D 355: ADD TEMP[3].xyz, TEMP[11].xyzz, TEMP[6].xyzz 356: ADD TEMP[27].xyz, TEMP[3].xyzz, IMM[5].yyyy 357: MOV TEMP[9].yz, IMM[7].yxyy 358: ABS TEMP[3].x, TEMP[8].xxxx 359: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 360: UCMP TEMP[6].xyz, TEMP[3].xxxx, IMM[0].xyyx, TEMP[6].xyzz 361: NOT TEMP[3].x, TEMP[3].xxxx 362: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[9].xyzx, TEMP[6].xyzz 363: MOV TEMP[10].xz, IMM[7].zzwz 364: ABS TEMP[3].x, TEMP[8].yyyy 365: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 366: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[6].xyzx, TEMP[6].xyzz 367: NOT TEMP[3].x, TEMP[3].xxxx 368: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[10].xyzx, TEMP[6].xyzz 369: ABS TEMP[3].x, TEMP[8].zzzz 370: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 371: UCMP TEMP[11].xyz, TEMP[3].xxxx, IMM[0].xyyx, TEMP[11].xyzz 372: NOT TEMP[3].x, TEMP[3].xxxx 373: UCMP TEMP[11].xyz, TEMP[3].xxxx, TEMP[9].xyzx, TEMP[11].xyzz 374: ABS TEMP[3].x, TEMP[8].wwww 375: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 376: UCMP TEMP[8].xyz, TEMP[3].xxxx, TEMP[11].xyzx, TEMP[8].xyzz 377: NOT TEMP[3].x, TEMP[3].xxxx 378: UCMP TEMP[8].xyz, TEMP[3].xxxx, TEMP[10].xyzx, TEMP[8].xyzz 379: LRP TEMP[7].xyz, TEMP[1].yyyy, TEMP[8].xyzz, TEMP[6].xyzz 380: ABS TEMP[3].x, TEMP[2].xxxx 381: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 382: UCMP TEMP[6].xyz, TEMP[3].xxxx, IMM[0].xyyx, TEMP[6].xyzz 383: NOT TEMP[3].x, TEMP[3].xxxx 384: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[9].xyzx, TEMP[6].xyzz 385: ABS TEMP[3].x, TEMP[2].zzzz 386: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 387: UCMP TEMP[8].xyz, TEMP[3].xxxx, IMM[0].xyyx, TEMP[8].xyzz 388: NOT TEMP[3].x, TEMP[3].xxxx 389: UCMP TEMP[8].xyz, TEMP[3].xxxx, TEMP[9].xyzx, TEMP[8].xyzz 390: ABS TEMP[3].x, TEMP[2].wwww 391: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 392: UCMP TEMP[8].xyz, TEMP[3].xxxx, TEMP[8].xyzx, TEMP[8].xyzz 393: NOT TEMP[3].x, TEMP[3].xxxx 394: UCMP TEMP[8].xyz, TEMP[3].xxxx, TEMP[10].xyzx, TEMP[8].xyzz 395: ABS TEMP[3].x, TEMP[2].yyyy 396: FSLT TEMP[3].x, -TEMP[3].xxxx, IMM[0].yyyy 397: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[6].xyzx, TEMP[6].xyzz 398: NOT TEMP[3].x, TEMP[3].xxxx 399: UCMP TEMP[6].xyz, TEMP[3].xxxx, TEMP[10].xyzx, TEMP[6].xyzz 400: LRP TEMP[4].xyz, TEMP[1].yyyy, TEMP[8].xyzz, TEMP[6].xyzz 401: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[7].xyzz 402: MAD TEMP[3].x, TEMP[0].xxxx, -TEMP[0].xxxx, IMM[0].xxxx 403: MAD TEMP[3].x, TEMP[0].yyyy, -TEMP[0].yyyy, TEMP[3].xxxx 404: FSEQ TEMP[6].x, TEMP[3].xxxx, IMM[0].yyyy 405: UCMP TEMP[7].x, TEMP[6].xxxx, IMM[3].yyyy, TEMP[7].xxxx 406: ABS TEMP[3].x, TEMP[3].xxxx 407: RSQ TEMP[3].x, TEMP[3].xxxx 408: NOT TEMP[6].x, TEMP[6].xxxx 409: UCMP TEMP[7].x, TEMP[6].xxxx, TEMP[3].xxxx, TEMP[7].xxxx 410: FSEQ TEMP[3].x, TEMP[7].xxxx, IMM[0].yyyy 411: UCMP TEMP[6].x, TEMP[3].xxxx, IMM[3].yyyy, TEMP[6].xxxx 412: RCP TEMP[7].x, TEMP[7].xxxx 413: NOT TEMP[3].x, TEMP[3].xxxx 414: UCMP TEMP[6].x, TEMP[3].xxxx, TEMP[7].xxxx, TEMP[6].xxxx 415: MOV TEMP[0].z, TEMP[6].xxxx 416: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[2].xyzz 417: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xxxx 418: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 419: SQRT TEMP[3].x, TEMP[3].xxxx 420: FSEQ TEMP[6].x, TEMP[3].xxxx, IMM[0].yyyy 421: MUL TEMP[7].xyz, TEMP[2].xyzz, IMM[3].yyyy 422: UCMP TEMP[7].xyz, TEMP[6].xxxx, TEMP[7].xyzx, TEMP[7].xyzz 423: RCP TEMP[8].x, TEMP[3].xxxx 424: MUL TEMP[8].xyz, TEMP[2].xyzz, TEMP[8].xxxx 425: NOT TEMP[6].x, TEMP[6].xxxx 426: UCMP TEMP[7].xyz, TEMP[6].xxxx, TEMP[8].xyzx, TEMP[7].xyzz 427: MUL TEMP[2].xyz, TEMP[0].yzxx, TEMP[7].zxyy 428: MAD TEMP[2].xyz, TEMP[7].yzxx, TEMP[0].zxyy, -TEMP[2].xyzz 429: LRP TEMP[4].xyz, TEMP[1].yyyy, TEMP[13].xyww, TEMP[16].xyww 430: LRP TEMP[5].xyz, TEMP[1].xxxx, TEMP[23].xyzz, TEMP[4].xyzz 431: MAD TEMP[1].xyz, TEMP[5].xyzz, IMM[0].zzzz, IMM[1].xxxx 432: MOV TEMP[4].w, TEMP[1].yyyy 433: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[1].zzzz 434: MAD TEMP[5].x, TEMP[4].xxxx, -TEMP[4].xxxx, IMM[0].xxxx 435: MAD TEMP[5].x, TEMP[1].yyyy, -TEMP[1].yyyy, TEMP[5].xxxx 436: FSEQ TEMP[6].x, TEMP[5].xxxx, IMM[0].yyyy 437: UCMP TEMP[8].x, TEMP[6].xxxx, IMM[3].yyyy, TEMP[8].xxxx 438: ABS TEMP[5].x, TEMP[5].xxxx 439: RSQ TEMP[5].x, TEMP[5].xxxx 440: NOT TEMP[6].x, TEMP[6].xxxx 441: UCMP TEMP[8].x, TEMP[6].xxxx, TEMP[5].xxxx, TEMP[8].xxxx 442: FSEQ TEMP[5].x, TEMP[8].xxxx, IMM[0].yyyy 443: UCMP TEMP[6].x, TEMP[5].xxxx, IMM[3].yyyy, TEMP[6].xxxx 444: RCP TEMP[8].x, TEMP[8].xxxx 445: NOT TEMP[5].x, TEMP[5].xxxx 446: UCMP TEMP[6].x, TEMP[5].xxxx, TEMP[8].xxxx, TEMP[6].xxxx 447: MOV TEMP[4].z, TEMP[6].xxxx 448: MUL TEMP[1].xyz, TEMP[4].xwzz, IMM[5].xxww 449: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz 450: SQRT TEMP[3].x, TEMP[4].xxxx 451: FSEQ TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy 452: MUL TEMP[5].xyz, TEMP[1].xyzz, IMM[3].yyyy 453: UCMP TEMP[5].xyz, TEMP[4].xxxx, TEMP[5].xyzx, TEMP[5].xyzz 454: RCP TEMP[6].x, TEMP[3].xxxx 455: MUL TEMP[6].xyz, TEMP[1].xyzz, TEMP[6].xxxx 456: NOT TEMP[4].x, TEMP[4].xxxx 457: UCMP TEMP[5].xyz, TEMP[4].xxxx, TEMP[6].xyzx, TEMP[5].xyzz 458: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[5].yyyy 459: MAD TEMP[1].xyz, TEMP[5].xxxx, TEMP[7].xyzz, TEMP[1].xyzz 460: MAD TEMP[0].xyz, TEMP[5].zzzz, TEMP[0].xyzz, TEMP[1].xyzz 461: DP3 TEMP[1].x, TEMP[0].xyzz, CONST[4][12].xyzz 462: DP3 TEMP[4].x, TEMP[0].xyzz, CONST[4][14].xyzz 463: MOV TEMP[1].y, TEMP[4].xxxx 464: DP3 TEMP[4].x, TEMP[0].xyzz, CONST[4][13].xyzz 465: MOV TEMP[1].z, TEMP[4].xxxx 466: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz 467: SQRT TEMP[3].x, TEMP[4].xxxx 468: FSEQ TEMP[4].x, TEMP[3].xxxx, IMM[0].yyyy 469: MUL TEMP[5].xyz, TEMP[1].xyzz, IMM[3].yyyy 470: UCMP TEMP[5].xyz, TEMP[4].xxxx, TEMP[5].xyzx, TEMP[5].xyzz 471: RCP TEMP[3].x, TEMP[3].xxxx 472: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xxxx 473: NOT TEMP[4].x, TEMP[4].xxxx 474: UCMP TEMP[5].xyz, TEMP[4].xxxx, TEMP[3].xyzx, TEMP[5].xyzz 475: ABS TEMP[3].x, TEMP[5].xxxx 476: ABS TEMP[4].x, TEMP[5].yyyy 477: MAX TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx 478: ABS TEMP[3].x, TEMP[5].zzzz 479: MAX TEMP[2].x, TEMP[3].xxxx, TEMP[1].xxxx 480: ABS TEMP[3].xy, TEMP[5].zyyy 481: ADD TEMP[1].xy, TEMP[3].xyyy, -TEMP[2].xxxx 482: FSEQ TEMP[3].x, TEMP[2].xxxx, IMM[0].yyyy 483: UCMP TEMP[4].x, TEMP[3].xxxx, IMM[3].yyyy, TEMP[4].xxxx 484: RCP TEMP[6].x, TEMP[2].xxxx 485: NOT TEMP[3].x, TEMP[3].xxxx 486: UCMP TEMP[4].x, TEMP[3].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 487: MUL TEMP[2].xyz, TEMP[5].xyzz, TEMP[4].xxxx 488: FSLT TEMP[3].x, TEMP[1].yyyy, IMM[0].yyyy 489: ABS TEMP[4].xy, TEMP[5].yzzz 490: UCMP TEMP[4].xy, TEMP[3].xxxx, TEMP[4].xyxx, TEMP[4].xyyy 491: ABS TEMP[6].xy, TEMP[5].xzzz 492: NOT TEMP[3].x, TEMP[3].xxxx 493: UCMP TEMP[4].xy, TEMP[3].xxxx, TEMP[6].xyxx, TEMP[4].xyyy 494: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 495: UCMP TEMP[3].xy, TEMP[1].xxxx, TEMP[4].xyxx, TEMP[3].xyyy 496: ABS TEMP[4].xy, TEMP[5].xyyy 497: NOT TEMP[1].x, TEMP[1].xxxx 498: UCMP TEMP[3].xy, TEMP[1].xxxx, TEMP[4].xyxx, TEMP[3].xyyy 499: ADD TEMP[1].x, -TEMP[3].yyyy, TEMP[3].xxxx 500: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 501: UCMP TEMP[4].xy, TEMP[1].xxxx, TEMP[3].yxyy, TEMP[4].xyyy 502: NOT TEMP[1].x, TEMP[1].xxxx 503: UCMP TEMP[4].xy, TEMP[1].xxxx, TEMP[3].xyxx, TEMP[4].xyyy 504: MOV TEMP[0].x, TEMP[4].xyxx 505: FSEQ TEMP[1].x, TEMP[4].xxxx, IMM[0].yyyy 506: UCMP TEMP[3].x, TEMP[1].xxxx, IMM[3].yyyy, TEMP[3].xxxx 507: RCP TEMP[5].x, TEMP[4].xxxx 508: NOT TEMP[1].x, TEMP[1].xxxx 509: UCMP TEMP[3].x, TEMP[1].xxxx, TEMP[5].xxxx, TEMP[3].xxxx 510: MUL TEMP[1].x, TEMP[4].yyyy, TEMP[3].xxxx 511: MOV TEMP[0].z, TEMP[1].xxxx 512: MOV TEMP[1].xy, TEMP[0].xzzz 513: TEX TEMP[1].x, TEMP[1], SAMP[5], 2D 514: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xxxx 515: MAD TEMP[29].xyz, TEMP[0].xyzz, IMM[3].zzzz, IMM[3].zzzz 516: MOV TEMP[27].w, IMM[0].yyyy 517: MOV TEMP[28].w, IMM[0].yyyy 518: MOV TEMP[29].w, IMM[6].wwww 519: MOV TEMP[30], IN[0].zzzz 520: MOV OUT[0], TEMP[27] 521: MOV OUT[1], TEMP[28] 522: MOV OUT[2], TEMP[29] 523: MOV OUT[3], TEMP[30] 524: MOV OUT[4], TEMP[31] 525: MOV OUT[5], TEMP[32] 526: MOV OUT[6], TEMP[33] 527: MOV OUT[7], TEMP[34] 528: END radeonsi: Compiling shader 4 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [24 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %24 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %26 = fmul float %25, 0.000000e+00 %27 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !invariant.load !0 %29 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i64 0, i64 7, !amdgpu.uniform !0 %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0 %32 = bitcast float %23 to i32 %33 = bitcast float %24 to i32 %34 = bitcast float %26 to i32 %35 = insertelement <4 x i32> undef, i32 %32, i32 0 %36 = insertelement <4 x i32> %35, i32 %33, i32 1 %37 = insertelement <4 x i32> %36, i32 %34, i32 2 %38 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %37, <8 x i32> %28, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %39 = extractelement <4 x float> %38, i32 0 %40 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8, !amdgpu.uniform !0 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !invariant.load !0 %42 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %43 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %42, i64 0, i64 19, !amdgpu.uniform !0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !invariant.load !0 %45 = bitcast float %23 to i32 %46 = bitcast float %24 to i32 %47 = bitcast float %26 to i32 %48 = insertelement <4 x i32> undef, i32 %45, i32 0 %49 = insertelement <4 x i32> %48, i32 %46, i32 1 %50 = insertelement <4 x i32> %49, i32 %47, i32 2 %51 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %50, <8 x i32> %41, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = fmul float %52, 2.000000e+00 %55 = fadd float %54, -1.000000e+00 %56 = fmul float %53, -2.000000e+00 %57 = fadd float %56, 1.000000e+00 %58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !invariant.load !0 %61 = call float @llvm.SI.load.const(<16 x i8> %60, i32 1320) %62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %63 = fadd float %61, %62 %64 = call float @llvm.SI.load.const(<16 x i8> %60, i32 1324) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %66 = fadd float %64, %65 %67 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %69 = fmul float %68, 0.000000e+00 %70 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !invariant.load !0 %72 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %73 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %72, i64 0, i64 7, !amdgpu.uniform !0 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !invariant.load !0 %75 = bitcast float %63 to i32 %76 = bitcast float %58 to i32 %77 = bitcast float %69 to i32 %78 = insertelement <4 x i32> undef, i32 %75, i32 0 %79 = insertelement <4 x i32> %78, i32 %76, i32 1 %80 = insertelement <4 x i32> %79, i32 %77, i32 2 %81 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %80, <8 x i32> %71, <4 x i32> %74, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %82 = extractelement <4 x float> %81, i32 0 %83 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !invariant.load !0 %85 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %86 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %85, i64 0, i64 7, !amdgpu.uniform !0 %87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !invariant.load !0 %88 = bitcast float %67 to i32 %89 = bitcast float %66 to i32 %90 = bitcast float %69 to i32 %91 = insertelement <4 x i32> undef, i32 %88, i32 0 %92 = insertelement <4 x i32> %91, i32 %89, i32 1 %93 = insertelement <4 x i32> %92, i32 %90, i32 2 %94 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %93, <8 x i32> %84, <4 x i32> %87, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %95 = extractelement <4 x float> %94, i32 0 %96 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %97 = load <8 x i32>, <8 x i32> addrspace(2)* %96, align 32, !invariant.load !0 %98 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %99 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %98, i64 0, i64 7, !amdgpu.uniform !0 %100 = load <4 x i32>, <4 x i32> addrspace(2)* %99, align 16, !invariant.load !0 %101 = bitcast float %63 to i32 %102 = bitcast float %66 to i32 %103 = bitcast float %69 to i32 %104 = insertelement <4 x i32> undef, i32 %101, i32 0 %105 = insertelement <4 x i32> %104, i32 %102, i32 1 %106 = insertelement <4 x i32> %105, i32 %103, i32 2 %107 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %106, <8 x i32> %97, <4 x i32> %100, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %108 = extractelement <4 x float> %107, i32 0 %109 = fmul float %39, 7.968750e+00 %110 = fmul float %95, 7.968750e+00 %111 = fmul float %82, 7.968750e+00 %112 = fmul float %108, 7.968750e+00 %113 = call float @llvm.floor.f32(float %109) %114 = fsub float %109, %113 %115 = call float @llvm.floor.f32(float %110) %116 = fsub float %110, %115 %117 = call float @llvm.floor.f32(float %111) %118 = fsub float %111, %117 %119 = call float @llvm.floor.f32(float %112) %120 = fsub float %112, %119 %121 = fsub float %109, %114 %122 = fsub float %110, %116 %123 = fsub float %111, %118 %124 = fsub float %112, %120 %125 = fmul float %121, 3.200000e+01 %126 = fmul float %122, 3.200000e+01 %127 = fmul float %123, 3.200000e+01 %128 = fmul float %124, 3.200000e+01 %129 = fmul float %39, 2.550000e+02 %130 = fsub float %129, %125 %131 = fmul float %95, 2.550000e+02 %132 = fsub float %131, %126 %133 = fmul float %82, 2.550000e+02 %134 = fsub float %133, %127 %135 = fmul float %108, 2.550000e+02 %136 = fsub float %135, %128 %137 = fadd float %130, %130 %138 = fadd float %132, %132 %139 = fadd float %134, %134 %140 = fadd float %136, %136 %141 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %142 = load <16 x i8>, <16 x i8> addrspace(2)* %141, align 16, !invariant.load !0 %143 = call float @llvm.SI.load.const(<16 x i8> %142, i32 424) %144 = call float @llvm.SI.load.const(<16 x i8> %142, i32 416) %145 = fmul float %137, %143 %146 = fadd float %145, %144 %147 = call float @llvm.SI.load.const(<16 x i8> %142, i32 428) %148 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, align 16, !invariant.load !0 %150 = call float @llvm.SI.load.const(<16 x i8> %149, i32 420) %151 = fmul float %137, %147 %152 = fadd float %151, %150 %153 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %154 = load <8 x i32>, <8 x i32> addrspace(2)* %153, align 32, !invariant.load !0 %155 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %156 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %155, i64 0, i64 27, !amdgpu.uniform !0 %157 = load <4 x i32>, <4 x i32> addrspace(2)* %156, align 16, !invariant.load !0 %158 = bitcast float %146 to i32 %159 = bitcast float %152 to i32 %160 = insertelement <4 x i32> undef, i32 %158, i32 0 %161 = insertelement <4 x i32> %160, i32 %159, i32 1 %162 = insertelement <4 x i32> %161, i32 0, i32 2 %163 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %162, <8 x i32> %154, <4 x i32> %157, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %164 = extractelement <4 x float> %163, i32 0 %165 = extractelement <4 x float> %163, i32 1 %166 = extractelement <4 x float> %163, i32 2 %167 = extractelement <4 x float> %163, i32 3 %168 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %169 = load <16 x i8>, <16 x i8> addrspace(2)* %168, align 16, !invariant.load !0 %170 = call float @llvm.SI.load.const(<16 x i8> %169, i32 424) %171 = fadd float %146, %170 %172 = call float @llvm.SI.load.const(<16 x i8> %169, i32 428) %173 = fadd float %152, %172 %174 = call float @llvm.SI.load.const(<16 x i8> %169, i32 440) %175 = fmul float %166, %174 %176 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %177 = load <16 x i8>, <16 x i8> addrspace(2)* %176, align 16, !invariant.load !0 %178 = call float @llvm.SI.load.const(<16 x i8> %177, i32 432) %179 = fmul float %166, %178 %180 = call float @llvm.SI.load.const(<16 x i8> %177, i32 656) %181 = fmul float %166, %180 %182 = call float @llvm.SI.load.const(<16 x i8> %177, i32 660) %183 = fmul float %166, %182 %184 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %185 = load <16 x i8>, <16 x i8> addrspace(2)* %184, align 16, !invariant.load !0 %186 = call float @llvm.SI.load.const(<16 x i8> %185, i32 440) %187 = fmul float %181, %186 %188 = call float @llvm.SI.load.const(<16 x i8> %185, i32 432) %189 = fmul float %183, %188 %190 = fcmp une float %187, 0.000000e+00 %191 = fdiv float 1.000000e+00, %187, !fpmath !1 %192 = fcmp oeq float %189, 0.000000e+00 %193 = fdiv float 1.000000e+00, %189, !fpmath !1 %194 = fmul float %167, %187 %195 = fmul float %167, %189 %.op = fmul float %191, 5.000000e-01 %196 = select i1 %190, float %.op, float 0x47DFFFFFE0000000 %.op4 = fmul float %193, 5.000000e-01 %197 = select i1 %192, float 0x47DFFFFFE0000000, float %.op4 %198 = fsub float 1.000000e+00, %121 %199 = fsub float 2.000000e+00, %121 %200 = fsub float 1.000000e+00, %122 %201 = fsub float 2.000000e+00, %122 %202 = fsub float 1.000000e+00, %123 %203 = fsub float 2.000000e+00, %123 %204 = fsub float 1.000000e+00, %124 %205 = fsub float 2.000000e+00, %124 %206 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %207 = load <16 x i8>, <16 x i8> addrspace(2)* %206, align 16, !invariant.load !0 %208 = call float @llvm.SI.load.const(<16 x i8> %207, i32 1408) %209 = call float @llvm.SI.load.const(<16 x i8> %207, i32 1408) %210 = fadd float %208, %209 %211 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %212 = fmul float %210, %211 %213 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %214 = load <16 x i8>, <16 x i8> addrspace(2)* %213, align 16, !invariant.load !0 %215 = call float @llvm.SI.load.const(<16 x i8> %214, i32 1408) %216 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %217 = fmul float %215, %216 %218 = call float @llvm.SI.load.const(<16 x i8> %214, i32 1408) %219 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %220 = fmul float %218, %219 %221 = fmul float %212, -5.000000e-01 %222 = fmul float %212, -5.000000e-01 %223 = fsub float -0.000000e+00, %220 %224 = fcmp ule float %57, -0.000000e+00 %225 = select i1 %224, float 1.000000e+00, float -1.000000e+00 %226 = fmul float %217, %225 %227 = fcmp olt float %55, 0.000000e+00 %228 = select i1 %227, float -1.000000e+00, float 1.000000e+00 %229 = fmul float %220, %228 %230 = call float @llvm.fabs.f32(float %199) %231 = call float @llvm.fabs.f32(float %199) %232 = call float @llvm.fabs.f32(float %201) %233 = call float @llvm.fabs.f32(float %201) %234 = fcmp ogt float %230, -0.000000e+00 %235 = fcmp ogt float %231, -0.000000e+00 %236 = fcmp ogt float %232, -0.000000e+00 %237 = fcmp ogt float %233, -0.000000e+00 %238 = call float @llvm.fabs.f32(float %198) %239 = call float @llvm.fabs.f32(float %198) %240 = call float @llvm.fabs.f32(float %200) %241 = call float @llvm.fabs.f32(float %200) %242 = fcmp ogt float %238, -0.000000e+00 %243 = fcmp ogt float %239, -0.000000e+00 %244 = fcmp ogt float %240, -0.000000e+00 %245 = fcmp ogt float %241, -0.000000e+00 %246 = select i1 %242, float %217, float %226 %247 = select i1 %243, float %223, float %221 %248 = select i1 %244, float %217, float %226 %249 = select i1 %245, float %223, float %221 %250 = select i1 %234, float %246, float %229 %251 = select i1 %235, float %247, float %222 %252 = select i1 %236, float %248, float %229 %253 = select i1 %237, float %249, float %222 %254 = call float @llvm.fabs.f32(float %203) %255 = call float @llvm.fabs.f32(float %203) %256 = call float @llvm.fabs.f32(float %205) %257 = call float @llvm.fabs.f32(float %205) %258 = fcmp ogt float %254, -0.000000e+00 %259 = fcmp ogt float %255, -0.000000e+00 %260 = fcmp ogt float %256, -0.000000e+00 %261 = fcmp ogt float %257, -0.000000e+00 %262 = call float @llvm.fabs.f32(float %202) %263 = call float @llvm.fabs.f32(float %202) %264 = call float @llvm.fabs.f32(float %204) %265 = call float @llvm.fabs.f32(float %204) %266 = fcmp ogt float %262, -0.000000e+00 %267 = fcmp ogt float %263, -0.000000e+00 %268 = fcmp ogt float %264, -0.000000e+00 %269 = fcmp ogt float %265, -0.000000e+00 %270 = select i1 %266, float %217, float %226 %271 = select i1 %267, float %223, float %221 %272 = select i1 %268, float %217, float %226 %273 = select i1 %269, float %223, float %221 %274 = select i1 %258, float %270, float %229 %275 = select i1 %259, float %271, float %222 %276 = select i1 %260, float %272, float %229 %277 = select i1 %261, float %273, float %222 %278 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %279 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %278), !range !2 %280 = and i32 %279, 60 %281 = bitcast float %250 to i32 %282 = shl nuw nsw i32 %280, 2 %283 = call i32 @llvm.amdgcn.ds.bpermute(i32 %282, i32 %281) %284 = shl nuw nsw i32 %280, 2 %285 = or i32 %284, 4 %286 = call i32 @llvm.amdgcn.ds.bpermute(i32 %285, i32 %281) %287 = bitcast i32 %283 to float %288 = bitcast i32 %286 to float %289 = fsub float %288, %287 %290 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %291 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %290), !range !2 %292 = and i32 %291, 60 %293 = bitcast float %251 to i32 %294 = shl nuw nsw i32 %292, 2 %295 = call i32 @llvm.amdgcn.ds.bpermute(i32 %294, i32 %293) %296 = shl nuw nsw i32 %292, 2 %297 = or i32 %296, 4 %298 = call i32 @llvm.amdgcn.ds.bpermute(i32 %297, i32 %293) %299 = bitcast i32 %295 to float %300 = bitcast i32 %298 to float %301 = fsub float %300, %299 %302 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %303 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %302), !range !2 %304 = and i32 %303, 60 %305 = bitcast float %252 to i32 %306 = shl nuw nsw i32 %304, 2 %307 = call i32 @llvm.amdgcn.ds.bpermute(i32 %306, i32 %305) %308 = shl nuw nsw i32 %304, 2 %309 = or i32 %308, 4 %310 = call i32 @llvm.amdgcn.ds.bpermute(i32 %309, i32 %305) %311 = bitcast i32 %307 to float %312 = bitcast i32 %310 to float %313 = fsub float %312, %311 %314 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %315 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %314), !range !2 %316 = and i32 %315, 60 %317 = bitcast float %253 to i32 %318 = shl nuw nsw i32 %316, 2 %319 = call i32 @llvm.amdgcn.ds.bpermute(i32 %318, i32 %317) %320 = shl nuw nsw i32 %316, 2 %321 = or i32 %320, 4 %322 = call i32 @llvm.amdgcn.ds.bpermute(i32 %321, i32 %317) %323 = bitcast i32 %319 to float %324 = bitcast i32 %322 to float %325 = fsub float %324, %323 %326 = fmul float %194, %289 %327 = fmul float %195, %301 %328 = fmul float %326, %326 %329 = fmul float %327, %327 %330 = fadd float %328, %329 %331 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %332 = load <16 x i8>, <16 x i8> addrspace(2)* %331, align 16, !invariant.load !0 %333 = call float @llvm.SI.load.const(<16 x i8> %332, i32 112) %334 = fmul float %333, %250 %335 = call float @llvm.SI.load.const(<16 x i8> %332, i32 112) %336 = fmul float %335, %251 %337 = call float @llvm.SI.load.const(<16 x i8> %332, i32 112) %338 = fmul float %337, %252 %339 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %340 = load <16 x i8>, <16 x i8> addrspace(2)* %339, align 16, !invariant.load !0 %341 = call float @llvm.SI.load.const(<16 x i8> %340, i32 112) %342 = fmul float %341, %253 %343 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %344 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %343), !range !2 %345 = and i32 %344, 60 %346 = bitcast float %334 to i32 %347 = shl nuw nsw i32 %345, 2 %348 = call i32 @llvm.amdgcn.ds.bpermute(i32 %347, i32 %346) %349 = shl nuw nsw i32 %345, 2 %350 = or i32 %349, 8 %351 = call i32 @llvm.amdgcn.ds.bpermute(i32 %350, i32 %346) %352 = bitcast i32 %348 to float %353 = bitcast i32 %351 to float %354 = fsub float %353, %352 %355 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %356 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %355), !range !2 %357 = and i32 %356, 60 %358 = bitcast float %336 to i32 %359 = shl nuw nsw i32 %357, 2 %360 = call i32 @llvm.amdgcn.ds.bpermute(i32 %359, i32 %358) %361 = shl nuw nsw i32 %357, 2 %362 = or i32 %361, 8 %363 = call i32 @llvm.amdgcn.ds.bpermute(i32 %362, i32 %358) %364 = bitcast i32 %360 to float %365 = bitcast i32 %363 to float %366 = fsub float %365, %364 %367 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %368 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %367), !range !2 %369 = and i32 %368, 60 %370 = bitcast float %338 to i32 %371 = shl nuw nsw i32 %369, 2 %372 = call i32 @llvm.amdgcn.ds.bpermute(i32 %371, i32 %370) %373 = shl nuw nsw i32 %369, 2 %374 = or i32 %373, 8 %375 = call i32 @llvm.amdgcn.ds.bpermute(i32 %374, i32 %370) %376 = bitcast i32 %372 to float %377 = bitcast i32 %375 to float %378 = fsub float %377, %376 %379 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %380 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %379), !range !2 %381 = and i32 %380, 60 %382 = bitcast float %342 to i32 %383 = shl nuw nsw i32 %381, 2 %384 = call i32 @llvm.amdgcn.ds.bpermute(i32 %383, i32 %382) %385 = shl nuw nsw i32 %381, 2 %386 = or i32 %385, 8 %387 = call i32 @llvm.amdgcn.ds.bpermute(i32 %386, i32 %382) %388 = bitcast i32 %384 to float %389 = bitcast i32 %387 to float %390 = fsub float %389, %388 %391 = fmul float %354, %194 %392 = fmul float %366, %195 %393 = fmul float %391, %391 %394 = fmul float %392, %392 %395 = fadd float %393, %394 %396 = call float @llvm.maxnum.f32(float %330, float %395) %397 = fcmp une float %396, 0.000000e+00 %398 = call float @llvm.fabs.f32(float %396) %399 = call float @llvm.sqrt.f32(float %398) %400 = fdiv float 1.000000e+00, %399, !fpmath !1 %401 = select i1 %397, float %400, float 0x47EFFFFFE0000000 %402 = fcmp oeq float %401, 0.000000e+00 %403 = fdiv float 1.000000e+00, %401, !fpmath !1 %404 = select i1 %402, float 0x47EFFFFFE0000000, float %403 %405 = fcmp oeq float %404, 0.000000e+00 %406 = call float @llvm.fabs.f32(float %404) %407 = call float @llvm.log2.f32(float %406) %.op5 = fmul float %407, 8.750000e-01 %408 = select i1 %405, float 0xC7EBFFFFE0000000, float %.op5 %409 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %410 = load <16 x i8>, <16 x i8> addrspace(2)* %409, align 16, !invariant.load !0 %411 = call float @llvm.SI.load.const(<16 x i8> %410, i32 664) %412 = call float @llvm.minnum.f32(float %408, float %411) %413 = call float @llvm.maxnum.f32(float %412, float 0.000000e+00) %414 = fsub float -0.000000e+00, %413 %415 = call float @llvm.floor.f32(float %414) %416 = fsub float %414, %415 %417 = fadd float %413, %416 %418 = call float @llvm.exp2.f32(float %417) %419 = fmul float %196, %418 %420 = fmul float %197, %418 %421 = fmul float %418, %196 %422 = fsub float 1.000000e+00, %421 %423 = fmul float %418, %197 %424 = fsub float 1.000000e+00, %423 %425 = fmul float %167, %250 %426 = fmul float %167, %251 %427 = call float @llvm.floor.f32(float %425) %428 = fsub float %425, %427 %429 = call float @llvm.floor.f32(float %426) %430 = fsub float %426, %429 %431 = call float @llvm.maxnum.f32(float %428, float %419) %432 = call float @llvm.maxnum.f32(float %430, float %420) %433 = call float @llvm.minnum.f32(float %422, float %431) %434 = call float @llvm.minnum.f32(float %424, float %432) %435 = fmul float %433, %175 %436 = fadd float %435, %164 %437 = fmul float %434, %179 %438 = fadd float %437, %165 %439 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %440 = load <8 x i32>, <8 x i32> addrspace(2)* %439, align 32, !invariant.load !0 %441 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %442 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %441, i64 0, i64 11, !amdgpu.uniform !0 %443 = load <4 x i32>, <4 x i32> addrspace(2)* %442, align 16, !invariant.load !0 %444 = bitcast float %436 to i32 %445 = bitcast float %438 to i32 %446 = bitcast float %413 to i32 %447 = insertelement <4 x i32> undef, i32 %444, i32 0 %448 = insertelement <4 x i32> %447, i32 %445, i32 1 %449 = insertelement <4 x i32> %448, i32 %446, i32 2 %450 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %449, <8 x i32> %440, <4 x i32> %443, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %451 = extractelement <4 x float> %450, i32 0 %452 = extractelement <4 x float> %450, i32 1 %453 = extractelement <4 x float> %450, i32 2 %454 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %455 = load <8 x i32>, <8 x i32> addrspace(2)* %454, align 32, !invariant.load !0 %456 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %457 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %456, i64 0, i64 15, !amdgpu.uniform !0 %458 = load <4 x i32>, <4 x i32> addrspace(2)* %457, align 16, !invariant.load !0 %459 = bitcast float %436 to i32 %460 = bitcast float %438 to i32 %461 = bitcast float %413 to i32 %462 = insertelement <4 x i32> undef, i32 %459, i32 0 %463 = insertelement <4 x i32> %462, i32 %460, i32 1 %464 = insertelement <4 x i32> %463, i32 %461, i32 2 %465 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %464, <8 x i32> %455, <4 x i32> %458, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %466 = extractelement <4 x float> %465, i32 0 %467 = extractelement <4 x float> %465, i32 1 %468 = extractelement <4 x float> %465, i32 3 %469 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %470 = load <16 x i8>, <16 x i8> addrspace(2)* %469, align 16, !invariant.load !0 %471 = call float @llvm.SI.load.const(<16 x i8> %470, i32 424) %472 = call float @llvm.SI.load.const(<16 x i8> %470, i32 416) %473 = fmul float %138, %471 %474 = fadd float %473, %472 %475 = call float @llvm.SI.load.const(<16 x i8> %470, i32 428) %476 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %477 = load <16 x i8>, <16 x i8> addrspace(2)* %476, align 16, !invariant.load !0 %478 = call float @llvm.SI.load.const(<16 x i8> %477, i32 420) %479 = fmul float %138, %475 %480 = fadd float %479, %478 %481 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %482 = load <8 x i32>, <8 x i32> addrspace(2)* %481, align 32, !invariant.load !0 %483 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %484 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %483, i64 0, i64 27, !amdgpu.uniform !0 %485 = load <4 x i32>, <4 x i32> addrspace(2)* %484, align 16, !invariant.load !0 %486 = bitcast float %474 to i32 %487 = bitcast float %480 to i32 %488 = insertelement <4 x i32> undef, i32 %486, i32 0 %489 = insertelement <4 x i32> %488, i32 %487, i32 1 %490 = insertelement <4 x i32> %489, i32 0, i32 2 %491 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %490, <8 x i32> %482, <4 x i32> %485, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %492 = extractelement <4 x float> %491, i32 0 %493 = extractelement <4 x float> %491, i32 1 %494 = extractelement <4 x float> %491, i32 2 %495 = extractelement <4 x float> %491, i32 3 %496 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %497 = load <16 x i8>, <16 x i8> addrspace(2)* %496, align 16, !invariant.load !0 %498 = call float @llvm.SI.load.const(<16 x i8> %497, i32 424) %499 = fadd float %474, %498 %500 = call float @llvm.SI.load.const(<16 x i8> %497, i32 428) %501 = fadd float %480, %500 %502 = call float @llvm.SI.load.const(<16 x i8> %497, i32 440) %503 = fmul float %494, %502 %504 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %505 = load <16 x i8>, <16 x i8> addrspace(2)* %504, align 16, !invariant.load !0 %506 = call float @llvm.SI.load.const(<16 x i8> %505, i32 432) %507 = fmul float %494, %506 %508 = call float @llvm.SI.load.const(<16 x i8> %505, i32 656) %509 = fmul float %494, %508 %510 = call float @llvm.SI.load.const(<16 x i8> %505, i32 660) %511 = fmul float %494, %510 %512 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %513 = load <16 x i8>, <16 x i8> addrspace(2)* %512, align 16, !invariant.load !0 %514 = call float @llvm.SI.load.const(<16 x i8> %513, i32 440) %515 = fmul float %509, %514 %516 = call float @llvm.SI.load.const(<16 x i8> %513, i32 432) %517 = fmul float %511, %516 %518 = fcmp une float %515, 0.000000e+00 %519 = fdiv float 1.000000e+00, %515, !fpmath !1 %520 = fcmp oeq float %517, 0.000000e+00 %521 = fdiv float 1.000000e+00, %517, !fpmath !1 %522 = fmul float %495, %515 %523 = fmul float %495, %517 %.op18 = fmul float %519, 5.000000e-01 %524 = select i1 %518, float %.op18, float 0x47DFFFFFE0000000 %.op19 = fmul float %521, 5.000000e-01 %525 = select i1 %520, float 0x47DFFFFFE0000000, float %.op19 %526 = fmul float %313, %522 %527 = fmul float %325, %523 %528 = fmul float %378, %522 %529 = fmul float %390, %523 %530 = fmul float %528, %528 %531 = fmul float %529, %529 %532 = fadd float %530, %531 %533 = fmul float %526, %526 %534 = fmul float %527, %527 %535 = fadd float %533, %534 %536 = call float @llvm.maxnum.f32(float %535, float %532) %537 = fcmp oeq float %536, 0.000000e+00 %538 = call float @llvm.fabs.f32(float %536) %539 = call float @llvm.sqrt.f32(float %538) %540 = fdiv float 1.000000e+00, %539, !fpmath !1 %541 = select i1 %537, float 0x47EFFFFFE0000000, float %540 %542 = fcmp oeq float %541, 0.000000e+00 %543 = fdiv float 1.000000e+00, %541, !fpmath !1 %544 = select i1 %542, float 0x47EFFFFFE0000000, float %543 %545 = fcmp oeq float %544, 0.000000e+00 %546 = call float @llvm.fabs.f32(float %544) %547 = call float @llvm.log2.f32(float %546) %.op6 = fmul float %547, 8.750000e-01 %548 = select i1 %545, float 0xC7EBFFFFE0000000, float %.op6 %549 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %550 = load <16 x i8>, <16 x i8> addrspace(2)* %549, align 16, !invariant.load !0 %551 = call float @llvm.SI.load.const(<16 x i8> %550, i32 664) %552 = call float @llvm.minnum.f32(float %548, float %551) %553 = call float @llvm.maxnum.f32(float %552, float 0.000000e+00) %554 = fsub float -0.000000e+00, %553 %555 = call float @llvm.floor.f32(float %554) %556 = fsub float %554, %555 %557 = fadd float %553, %556 %558 = call float @llvm.exp2.f32(float %557) %559 = fmul float %524, %558 %560 = fmul float %525, %558 %561 = fmul float %558, %524 %562 = fsub float 1.000000e+00, %561 %563 = fmul float %558, %525 %564 = fsub float 1.000000e+00, %563 %565 = fmul float %252, %495 %566 = fmul float %253, %495 %567 = call float @llvm.floor.f32(float %565) %568 = fsub float %565, %567 %569 = call float @llvm.floor.f32(float %566) %570 = fsub float %566, %569 %571 = call float @llvm.maxnum.f32(float %568, float %559) %572 = call float @llvm.maxnum.f32(float %570, float %560) %573 = call float @llvm.minnum.f32(float %562, float %571) %574 = call float @llvm.minnum.f32(float %564, float %572) %575 = fmul float %573, %503 %576 = fadd float %575, %492 %577 = fmul float %574, %507 %578 = fadd float %577, %493 %579 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %580 = load <8 x i32>, <8 x i32> addrspace(2)* %579, align 32, !invariant.load !0 %581 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %582 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %581, i64 0, i64 11, !amdgpu.uniform !0 %583 = load <4 x i32>, <4 x i32> addrspace(2)* %582, align 16, !invariant.load !0 %584 = bitcast float %576 to i32 %585 = bitcast float %578 to i32 %586 = bitcast float %553 to i32 %587 = insertelement <4 x i32> undef, i32 %584, i32 0 %588 = insertelement <4 x i32> %587, i32 %585, i32 1 %589 = insertelement <4 x i32> %588, i32 %586, i32 2 %590 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %589, <8 x i32> %580, <4 x i32> %583, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %591 = extractelement <4 x float> %590, i32 0 %592 = extractelement <4 x float> %590, i32 1 %593 = extractelement <4 x float> %590, i32 2 %594 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %595 = load <8 x i32>, <8 x i32> addrspace(2)* %594, align 32, !invariant.load !0 %596 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %597 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %596, i64 0, i64 15, !amdgpu.uniform !0 %598 = load <4 x i32>, <4 x i32> addrspace(2)* %597, align 16, !invariant.load !0 %599 = bitcast float %576 to i32 %600 = bitcast float %578 to i32 %601 = bitcast float %553 to i32 %602 = insertelement <4 x i32> undef, i32 %599, i32 0 %603 = insertelement <4 x i32> %602, i32 %600, i32 1 %604 = insertelement <4 x i32> %603, i32 %601, i32 2 %605 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %604, <8 x i32> %595, <4 x i32> %598, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %606 = extractelement <4 x float> %605, i32 0 %607 = extractelement <4 x float> %605, i32 1 %608 = extractelement <4 x float> %605, i32 3 %609 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %610 = load <16 x i8>, <16 x i8> addrspace(2)* %609, align 16, !invariant.load !0 %611 = call float @llvm.SI.load.const(<16 x i8> %610, i32 1312) %612 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %613 = fmul float %611, %612 %614 = call float @llvm.SI.load.const(<16 x i8> %610, i32 1316) %615 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %616 = fmul float %614, %615 %617 = call float @llvm.floor.f32(float %613) %618 = fsub float %613, %617 %619 = call float @llvm.floor.f32(float %616) %620 = fsub float %616, %619 %621 = fsub float 1.000000e+00, %620 %622 = fmul float %591, %620 %623 = fmul float %451, %621 %624 = fadd float %622, %623 %625 = fsub float 1.000000e+00, %620 %626 = fmul float %592, %620 %627 = fmul float %452, %625 %628 = fadd float %626, %627 %629 = fsub float 1.000000e+00, %620 %630 = fmul float %593, %620 %631 = fmul float %453, %629 %632 = fadd float %630, %631 %633 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %634 = load <16 x i8>, <16 x i8> addrspace(2)* %633, align 16, !invariant.load !0 %635 = call float @llvm.SI.load.const(<16 x i8> %634, i32 424) %636 = call float @llvm.SI.load.const(<16 x i8> %634, i32 416) %637 = fmul float %139, %635 %638 = fadd float %637, %636 %639 = call float @llvm.SI.load.const(<16 x i8> %634, i32 428) %640 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %641 = load <16 x i8>, <16 x i8> addrspace(2)* %640, align 16, !invariant.load !0 %642 = call float @llvm.SI.load.const(<16 x i8> %641, i32 420) %643 = fmul float %139, %639 %644 = fadd float %643, %642 %645 = call float @llvm.SI.load.const(<16 x i8> %641, i32 424) %646 = call float @llvm.SI.load.const(<16 x i8> %641, i32 416) %647 = fmul float %140, %645 %648 = fadd float %647, %646 %649 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %650 = load <16 x i8>, <16 x i8> addrspace(2)* %649, align 16, !invariant.load !0 %651 = call float @llvm.SI.load.const(<16 x i8> %650, i32 428) %652 = call float @llvm.SI.load.const(<16 x i8> %650, i32 420) %653 = fmul float %140, %651 %654 = fadd float %653, %652 %655 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %656 = load <8 x i32>, <8 x i32> addrspace(2)* %655, align 32, !invariant.load !0 %657 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %658 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %657, i64 0, i64 27, !amdgpu.uniform !0 %659 = load <4 x i32>, <4 x i32> addrspace(2)* %658, align 16, !invariant.load !0 %660 = bitcast float %638 to i32 %661 = bitcast float %644 to i32 %662 = insertelement <4 x i32> undef, i32 %660, i32 0 %663 = insertelement <4 x i32> %662, i32 %661, i32 1 %664 = insertelement <4 x i32> %663, i32 0, i32 2 %665 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %664, <8 x i32> %656, <4 x i32> %659, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %666 = extractelement <4 x float> %665, i32 0 %667 = extractelement <4 x float> %665, i32 1 %668 = extractelement <4 x float> %665, i32 2 %669 = extractelement <4 x float> %665, i32 3 %670 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %671 = load <16 x i8>, <16 x i8> addrspace(2)* %670, align 16, !invariant.load !0 %672 = call float @llvm.SI.load.const(<16 x i8> %671, i32 424) %673 = fadd float %638, %672 %674 = call float @llvm.SI.load.const(<16 x i8> %671, i32 428) %675 = fadd float %644, %674 %676 = call float @llvm.SI.load.const(<16 x i8> %671, i32 440) %677 = fmul float %668, %676 %678 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %679 = load <16 x i8>, <16 x i8> addrspace(2)* %678, align 16, !invariant.load !0 %680 = call float @llvm.SI.load.const(<16 x i8> %679, i32 432) %681 = fmul float %668, %680 %682 = call float @llvm.SI.load.const(<16 x i8> %679, i32 656) %683 = fmul float %668, %682 %684 = call float @llvm.SI.load.const(<16 x i8> %679, i32 660) %685 = fmul float %668, %684 %686 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %687 = load <16 x i8>, <16 x i8> addrspace(2)* %686, align 16, !invariant.load !0 %688 = call float @llvm.SI.load.const(<16 x i8> %687, i32 440) %689 = fmul float %683, %688 %690 = call float @llvm.SI.load.const(<16 x i8> %687, i32 432) %691 = fmul float %685, %690 %692 = fcmp oeq float %689, 0.000000e+00 %693 = fdiv float 1.000000e+00, %689, !fpmath !1 %694 = fcmp oeq float %691, 0.000000e+00 %695 = fdiv float 1.000000e+00, %691, !fpmath !1 %696 = fmul float %669, %689 %697 = fmul float %669, %691 %.op7 = fmul float %693, 5.000000e-01 %698 = select i1 %692, float 0x47DFFFFFE0000000, float %.op7 %.op8 = fmul float %695, 5.000000e-01 %699 = select i1 %694, float 0x47DFFFFFE0000000, float %.op8 %700 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %701 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %700), !range !2 %702 = and i32 %701, 60 %703 = bitcast float %274 to i32 %704 = shl nuw nsw i32 %702, 2 %705 = call i32 @llvm.amdgcn.ds.bpermute(i32 %704, i32 %703) %706 = shl nuw nsw i32 %702, 2 %707 = or i32 %706, 4 %708 = call i32 @llvm.amdgcn.ds.bpermute(i32 %707, i32 %703) %709 = bitcast i32 %705 to float %710 = bitcast i32 %708 to float %711 = fsub float %710, %709 %712 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %713 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %712), !range !2 %714 = and i32 %713, 60 %715 = bitcast float %275 to i32 %716 = shl nuw nsw i32 %714, 2 %717 = call i32 @llvm.amdgcn.ds.bpermute(i32 %716, i32 %715) %718 = shl nuw nsw i32 %714, 2 %719 = or i32 %718, 4 %720 = call i32 @llvm.amdgcn.ds.bpermute(i32 %719, i32 %715) %721 = bitcast i32 %717 to float %722 = bitcast i32 %720 to float %723 = fsub float %722, %721 %724 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %725 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %724), !range !2 %726 = and i32 %725, 60 %727 = bitcast float %276 to i32 %728 = shl nuw nsw i32 %726, 2 %729 = call i32 @llvm.amdgcn.ds.bpermute(i32 %728, i32 %727) %730 = shl nuw nsw i32 %726, 2 %731 = or i32 %730, 4 %732 = call i32 @llvm.amdgcn.ds.bpermute(i32 %731, i32 %727) %733 = bitcast i32 %729 to float %734 = bitcast i32 %732 to float %735 = fsub float %734, %733 %736 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %737 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %736), !range !2 %738 = and i32 %737, 60 %739 = bitcast float %277 to i32 %740 = shl nuw nsw i32 %738, 2 %741 = call i32 @llvm.amdgcn.ds.bpermute(i32 %740, i32 %739) %742 = shl nuw nsw i32 %738, 2 %743 = or i32 %742, 4 %744 = call i32 @llvm.amdgcn.ds.bpermute(i32 %743, i32 %739) %745 = bitcast i32 %741 to float %746 = bitcast i32 %744 to float %747 = fsub float %746, %745 %748 = fmul float %696, %711 %749 = fmul float %697, %723 %750 = fmul float %748, %748 %751 = fmul float %749, %749 %752 = fadd float %750, %751 %753 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %754 = load <16 x i8>, <16 x i8> addrspace(2)* %753, align 16, !invariant.load !0 %755 = call float @llvm.SI.load.const(<16 x i8> %754, i32 112) %756 = fmul float %755, %274 %757 = call float @llvm.SI.load.const(<16 x i8> %754, i32 112) %758 = fmul float %757, %275 %759 = call float @llvm.SI.load.const(<16 x i8> %754, i32 112) %760 = fmul float %759, %276 %761 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0 %762 = load <16 x i8>, <16 x i8> addrspace(2)* %761, align 16, !invariant.load !0 %763 = call float @llvm.SI.load.const(<16 x i8> %762, i32 112) %764 = fmul float %763, %277 %765 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %766 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %765), !range !2 %767 = and i32 %766, 60 %768 = bitcast float %756 to i32 %769 = shl nuw nsw i32 %767, 2 %770 = call i32 @llvm.amdgcn.ds.bpermute(i32 %769, i32 %768) %771 = shl nuw nsw i32 %767, 2 %772 = or i32 %771, 8 %773 = call i32 @llvm.amdgcn.ds.bpermute(i32 %772, i32 %768) %774 = bitcast i32 %770 to float %775 = bitcast i32 %773 to float %776 = fsub float %775, %774 %777 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %778 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %777), !range !2 %779 = and i32 %778, 60 %780 = bitcast float %758 to i32 %781 = shl nuw nsw i32 %779, 2 %782 = call i32 @llvm.amdgcn.ds.bpermute(i32 %781, i32 %780) %783 = shl nuw nsw i32 %779, 2 %784 = or i32 %783, 8 %785 = call i32 @llvm.amdgcn.ds.bpermute(i32 %784, i32 %780) %786 = bitcast i32 %782 to float %787 = bitcast i32 %785 to float %788 = fsub float %787, %786 %789 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %790 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %789), !range !2 %791 = and i32 %790, 60 %792 = bitcast float %760 to i32 %793 = shl nuw nsw i32 %791, 2 %794 = call i32 @llvm.amdgcn.ds.bpermute(i32 %793, i32 %792) %795 = shl nuw nsw i32 %791, 2 %796 = or i32 %795, 8 %797 = call i32 @llvm.amdgcn.ds.bpermute(i32 %796, i32 %792) %798 = bitcast i32 %794 to float %799 = bitcast i32 %797 to float %800 = fsub float %799, %798 %801 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %802 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %801), !range !2 %803 = and i32 %802, 60 %804 = bitcast float %764 to i32 %805 = shl nuw nsw i32 %803, 2 %806 = call i32 @llvm.amdgcn.ds.bpermute(i32 %805, i32 %804) %807 = shl nuw nsw i32 %803, 2 %808 = or i32 %807, 8 %809 = call i32 @llvm.amdgcn.ds.bpermute(i32 %808, i32 %804) %810 = bitcast i32 %806 to float %811 = bitcast i32 %809 to float %812 = fsub float %811, %810 %813 = fmul float %776, %696 %814 = fmul float %788, %697 %815 = fmul float %813, %813 %816 = fmul float %814, %814 %817 = fadd float %815, %816 %818 = call float @llvm.maxnum.f32(float %752, float %817) %819 = fcmp une float %818, 0.000000e+00 %820 = call float @llvm.fabs.f32(float %818) %821 = call float @llvm.sqrt.f32(float %820) %822 = fdiv float 1.000000e+00, %821, !fpmath !1 %823 = select i1 %819, float %822, float 0x47EFFFFFE0000000 %824 = fcmp oeq float %823, 0.000000e+00 %825 = fdiv float 1.000000e+00, %823, !fpmath !1 %826 = select i1 %824, float 0x47EFFFFFE0000000, float %825 %827 = fcmp oeq float %826, 0.000000e+00 %828 = call float @llvm.fabs.f32(float %826) %829 = call float @llvm.log2.f32(float %828) %.op20 = fmul float %829, 8.750000e-01 %830 = select i1 %827, float 0xC7EBFFFFE0000000, float %.op20 %831 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %832 = load <16 x i8>, <16 x i8> addrspace(2)* %831, align 16, !invariant.load !0 %833 = call float @llvm.SI.load.const(<16 x i8> %832, i32 664) %834 = call float @llvm.minnum.f32(float %830, float %833) %835 = call float @llvm.maxnum.f32(float %834, float 0.000000e+00) %836 = fsub float -0.000000e+00, %835 %837 = call float @llvm.floor.f32(float %836) %838 = fsub float %836, %837 %839 = fadd float %835, %838 %840 = call float @llvm.exp2.f32(float %839) %841 = fmul float %698, %840 %842 = fmul float %699, %840 %843 = fmul float %840, %698 %844 = fsub float 1.000000e+00, %843 %845 = fmul float %840, %699 %846 = fsub float 1.000000e+00, %845 %847 = fmul float %274, %669 %848 = fmul float %275, %669 %849 = call float @llvm.floor.f32(float %847) %850 = fsub float %847, %849 %851 = call float @llvm.floor.f32(float %848) %852 = fsub float %848, %851 %853 = call float @llvm.maxnum.f32(float %850, float %841) %854 = call float @llvm.maxnum.f32(float %852, float %842) %855 = call float @llvm.minnum.f32(float %844, float %853) %856 = call float @llvm.minnum.f32(float %846, float %854) %857 = fmul float %855, %677 %858 = fadd float %857, %666 %859 = fmul float %856, %681 %860 = fadd float %859, %667 %861 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %862 = load <8 x i32>, <8 x i32> addrspace(2)* %861, align 32, !invariant.load !0 %863 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %864 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %863, i64 0, i64 11, !amdgpu.uniform !0 %865 = load <4 x i32>, <4 x i32> addrspace(2)* %864, align 16, !invariant.load !0 %866 = bitcast float %858 to i32 %867 = bitcast float %860 to i32 %868 = bitcast float %835 to i32 %869 = insertelement <4 x i32> undef, i32 %866, i32 0 %870 = insertelement <4 x i32> %869, i32 %867, i32 1 %871 = insertelement <4 x i32> %870, i32 %868, i32 2 %872 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %871, <8 x i32> %862, <4 x i32> %865, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %873 = extractelement <4 x float> %872, i32 0 %874 = extractelement <4 x float> %872, i32 1 %875 = extractelement <4 x float> %872, i32 2 %876 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %877 = load <8 x i32>, <8 x i32> addrspace(2)* %876, align 32, !invariant.load !0 %878 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %879 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %878, i64 0, i64 15, !amdgpu.uniform !0 %880 = load <4 x i32>, <4 x i32> addrspace(2)* %879, align 16, !invariant.load !0 %881 = bitcast float %858 to i32 %882 = bitcast float %860 to i32 %883 = bitcast float %835 to i32 %884 = insertelement <4 x i32> undef, i32 %881, i32 0 %885 = insertelement <4 x i32> %884, i32 %882, i32 1 %886 = insertelement <4 x i32> %885, i32 %883, i32 2 %887 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %886, <8 x i32> %877, <4 x i32> %880, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %888 = extractelement <4 x float> %887, i32 0 %889 = extractelement <4 x float> %887, i32 1 %890 = extractelement <4 x float> %887, i32 3 %891 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %892 = load <8 x i32>, <8 x i32> addrspace(2)* %891, align 32, !invariant.load !0 %893 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %894 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %893, i64 0, i64 27, !amdgpu.uniform !0 %895 = load <4 x i32>, <4 x i32> addrspace(2)* %894, align 16, !invariant.load !0 %896 = bitcast float %648 to i32 %897 = bitcast float %654 to i32 %898 = insertelement <4 x i32> undef, i32 %896, i32 0 %899 = insertelement <4 x i32> %898, i32 %897, i32 1 %900 = insertelement <4 x i32> %899, i32 0, i32 2 %901 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %900, <8 x i32> %892, <4 x i32> %895, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %902 = extractelement <4 x float> %901, i32 0 %903 = extractelement <4 x float> %901, i32 1 %904 = extractelement <4 x float> %901, i32 2 %905 = extractelement <4 x float> %901, i32 3 %906 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %907 = load <16 x i8>, <16 x i8> addrspace(2)* %906, align 16, !invariant.load !0 %908 = call float @llvm.SI.load.const(<16 x i8> %907, i32 424) %909 = fadd float %648, %908 %910 = call float @llvm.SI.load.const(<16 x i8> %907, i32 428) %911 = fadd float %654, %910 %912 = call float @llvm.SI.load.const(<16 x i8> %907, i32 440) %913 = fmul float %904, %912 %914 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %915 = load <16 x i8>, <16 x i8> addrspace(2)* %914, align 16, !invariant.load !0 %916 = call float @llvm.SI.load.const(<16 x i8> %915, i32 432) %917 = fmul float %904, %916 %918 = call float @llvm.SI.load.const(<16 x i8> %915, i32 656) %919 = fmul float %904, %918 %920 = call float @llvm.SI.load.const(<16 x i8> %915, i32 660) %921 = fmul float %904, %920 %922 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %923 = load <16 x i8>, <16 x i8> addrspace(2)* %922, align 16, !invariant.load !0 %924 = call float @llvm.SI.load.const(<16 x i8> %923, i32 440) %925 = fmul float %919, %924 %926 = call float @llvm.SI.load.const(<16 x i8> %923, i32 432) %927 = fmul float %921, %926 %928 = fcmp oeq float %925, 0.000000e+00 %929 = fdiv float 1.000000e+00, %925, !fpmath !1 %930 = fcmp oeq float %927, 0.000000e+00 %931 = fdiv float 1.000000e+00, %927, !fpmath !1 %932 = fmul float %905, %925 %933 = fmul float %905, %927 %.op21 = fmul float %929, 5.000000e-01 %934 = select i1 %928, float 0x47DFFFFFE0000000, float %.op21 %.op22 = fmul float %931, 5.000000e-01 %935 = select i1 %930, float 0x47DFFFFFE0000000, float %.op22 %936 = fmul float %735, %932 %937 = fmul float %747, %933 %938 = fmul float %800, %932 %939 = fmul float %812, %933 %940 = fmul float %938, %938 %941 = fmul float %939, %939 %942 = fadd float %940, %941 %943 = fmul float %936, %936 %944 = fmul float %937, %937 %945 = fadd float %943, %944 %946 = call float @llvm.maxnum.f32(float %945, float %942) %947 = fcmp oeq float %946, 0.000000e+00 %948 = call float @llvm.fabs.f32(float %946) %949 = call float @llvm.sqrt.f32(float %948) %950 = fdiv float 1.000000e+00, %949, !fpmath !1 %951 = select i1 %947, float 0x47EFFFFFE0000000, float %950 %952 = fcmp oeq float %951, 0.000000e+00 %953 = fdiv float 1.000000e+00, %951, !fpmath !1 %954 = select i1 %952, float 0x47EFFFFFE0000000, float %953 %955 = fcmp oeq float %954, 0.000000e+00 %956 = call float @llvm.fabs.f32(float %954) %957 = call float @llvm.log2.f32(float %956) %.op9 = fmul float %957, 8.750000e-01 %958 = select i1 %955, float 0xC7EBFFFFE0000000, float %.op9 %959 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %960 = load <16 x i8>, <16 x i8> addrspace(2)* %959, align 16, !invariant.load !0 %961 = call float @llvm.SI.load.const(<16 x i8> %960, i32 664) %962 = call float @llvm.minnum.f32(float %958, float %961) %963 = call float @llvm.maxnum.f32(float %962, float 0.000000e+00) %964 = fsub float -0.000000e+00, %963 %965 = call float @llvm.floor.f32(float %964) %966 = fsub float %964, %965 %967 = fadd float %963, %966 %968 = call float @llvm.exp2.f32(float %967) %969 = fmul float %934, %968 %970 = fmul float %935, %968 %971 = fmul float %968, %934 %972 = fsub float 1.000000e+00, %971 %973 = fmul float %968, %935 %974 = fsub float 1.000000e+00, %973 %975 = fmul float %276, %905 %976 = fmul float %277, %905 %977 = call float @llvm.floor.f32(float %975) %978 = fsub float %975, %977 %979 = call float @llvm.floor.f32(float %976) %980 = fsub float %976, %979 %981 = call float @llvm.maxnum.f32(float %978, float %969) %982 = call float @llvm.maxnum.f32(float %980, float %970) %983 = call float @llvm.minnum.f32(float %972, float %981) %984 = call float @llvm.minnum.f32(float %974, float %982) %985 = fmul float %983, %913 %986 = fadd float %985, %902 %987 = fmul float %984, %917 %988 = fadd float %987, %903 %989 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %990 = load <8 x i32>, <8 x i32> addrspace(2)* %989, align 32, !invariant.load !0 %991 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %992 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %991, i64 0, i64 11, !amdgpu.uniform !0 %993 = load <4 x i32>, <4 x i32> addrspace(2)* %992, align 16, !invariant.load !0 %994 = bitcast float %986 to i32 %995 = bitcast float %988 to i32 %996 = bitcast float %963 to i32 %997 = insertelement <4 x i32> undef, i32 %994, i32 0 %998 = insertelement <4 x i32> %997, i32 %995, i32 1 %999 = insertelement <4 x i32> %998, i32 %996, i32 2 %1000 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %999, <8 x i32> %990, <4 x i32> %993, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1001 = extractelement <4 x float> %1000, i32 0 %1002 = extractelement <4 x float> %1000, i32 1 %1003 = extractelement <4 x float> %1000, i32 2 %1004 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %1005 = load <8 x i32>, <8 x i32> addrspace(2)* %1004, align 32, !invariant.load !0 %1006 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1007 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1006, i64 0, i64 15, !amdgpu.uniform !0 %1008 = load <4 x i32>, <4 x i32> addrspace(2)* %1007, align 16, !invariant.load !0 %1009 = bitcast float %986 to i32 %1010 = bitcast float %988 to i32 %1011 = bitcast float %963 to i32 %1012 = insertelement <4 x i32> undef, i32 %1009, i32 0 %1013 = insertelement <4 x i32> %1012, i32 %1010, i32 1 %1014 = insertelement <4 x i32> %1013, i32 %1011, i32 2 %1015 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %1014, <8 x i32> %1005, <4 x i32> %1008, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1016 = extractelement <4 x float> %1015, i32 0 %1017 = extractelement <4 x float> %1015, i32 1 %1018 = extractelement <4 x float> %1015, i32 3 %1019 = fsub float 1.000000e+00, %620 %1020 = fmul float %1016, %620 %1021 = fmul float %888, %1019 %1022 = fadd float %1020, %1021 %1023 = fsub float 1.000000e+00, %620 %1024 = fmul float %1017, %620 %1025 = fmul float %889, %1023 %1026 = fadd float %1024, %1025 %1027 = fsub float 1.000000e+00, %620 %1028 = fmul float %1018, %620 %1029 = fmul float %890, %1027 %1030 = fadd float %1028, %1029 %1031 = fsub float 1.000000e+00, %620 %1032 = fmul float %1001, %620 %1033 = fmul float %873, %1031 %1034 = fadd float %1032, %1033 %1035 = fsub float 1.000000e+00, %620 %1036 = fmul float %1002, %620 %1037 = fmul float %874, %1035 %1038 = fadd float %1036, %1037 %1039 = fsub float 1.000000e+00, %620 %1040 = fmul float %1003, %620 %1041 = fmul float %875, %1039 %1042 = fadd float %1040, %1041 %1043 = fsub float 1.000000e+00, %618 %1044 = fmul float %1034, %618 %1045 = fmul float %624, %1043 %1046 = fadd float %1044, %1045 %1047 = fsub float 1.000000e+00, %618 %1048 = fmul float %1038, %618 %1049 = fmul float %628, %1047 %1050 = fadd float %1048, %1049 %1051 = fsub float 1.000000e+00, %618 %1052 = fmul float %1042, %618 %1053 = fmul float %632, %1051 %1054 = fadd float %1052, %1053 %1055 = fmul float %1046, 0x3FC99999A0000000 %1056 = fmul float %1050, 0x3FE6666660000000 %1057 = fadd float %1056, %1055 %1058 = fmul float %1054, 0x3FB99999A0000000 %1059 = fadd float %1057, %1058 %1060 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %1061 = load <8 x i32>, <8 x i32> addrspace(2)* %1060, align 32, !invariant.load !0 %1062 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1063 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1062, i64 0, i64 27, !amdgpu.uniform !0 %1064 = load <4 x i32>, <4 x i32> addrspace(2)* %1063, align 16, !invariant.load !0 %1065 = bitcast float %171 to i32 %1066 = bitcast float %173 to i32 %1067 = insertelement <4 x i32> undef, i32 %1065, i32 0 %1068 = insertelement <4 x i32> %1067, i32 %1066, i32 1 %1069 = insertelement <4 x i32> %1068, i32 0, i32 2 %1070 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %1069, <8 x i32> %1061, <4 x i32> %1064, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1071 = extractelement <4 x float> %1070, i32 0 %1072 = extractelement <4 x float> %1070, i32 1 %1073 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %1074 = load <8 x i32>, <8 x i32> addrspace(2)* %1073, align 32, !invariant.load !0 %1075 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1076 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1075, i64 0, i64 27, !amdgpu.uniform !0 %1077 = load <4 x i32>, <4 x i32> addrspace(2)* %1076, align 16, !invariant.load !0 %1078 = bitcast float %499 to i32 %1079 = bitcast float %501 to i32 %1080 = insertelement <4 x i32> undef, i32 %1078, i32 0 %1081 = insertelement <4 x i32> %1080, i32 %1079, i32 1 %1082 = insertelement <4 x i32> %1081, i32 0, i32 2 %1083 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %1082, <8 x i32> %1074, <4 x i32> %1077, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1084 = extractelement <4 x float> %1083, i32 0 %1085 = extractelement <4 x float> %1083, i32 1 %1086 = fsub float 1.000000e+00, %620 %1087 = fmul float %1084, %620 %1088 = fmul float %1071, %1086 %1089 = fadd float %1087, %1088 %1090 = fsub float 1.000000e+00, %620 %1091 = fmul float %1085, %620 %1092 = fmul float %1072, %1090 %1093 = fadd float %1091, %1092 %1094 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %1095 = load <8 x i32>, <8 x i32> addrspace(2)* %1094, align 32, !invariant.load !0 %1096 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1097 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1096, i64 0, i64 27, !amdgpu.uniform !0 %1098 = load <4 x i32>, <4 x i32> addrspace(2)* %1097, align 16, !invariant.load !0 %1099 = bitcast float %673 to i32 %1100 = bitcast float %675 to i32 %1101 = insertelement <4 x i32> undef, i32 %1099, i32 0 %1102 = insertelement <4 x i32> %1101, i32 %1100, i32 1 %1103 = insertelement <4 x i32> %1102, i32 0, i32 2 %1104 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %1103, <8 x i32> %1095, <4 x i32> %1098, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1105 = extractelement <4 x float> %1104, i32 0 %1106 = extractelement <4 x float> %1104, i32 1 %1107 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 12, !amdgpu.uniform !0 %1108 = load <8 x i32>, <8 x i32> addrspace(2)* %1107, align 32, !invariant.load !0 %1109 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1110 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1109, i64 0, i64 27, !amdgpu.uniform !0 %1111 = load <4 x i32>, <4 x i32> addrspace(2)* %1110, align 16, !invariant.load !0 %1112 = bitcast float %909 to i32 %1113 = bitcast float %911 to i32 %1114 = insertelement <4 x i32> undef, i32 %1112, i32 0 %1115 = insertelement <4 x i32> %1114, i32 %1113, i32 1 %1116 = insertelement <4 x i32> %1115, i32 0, i32 2 %1117 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %1116, <8 x i32> %1108, <4 x i32> %1111, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1118 = extractelement <4 x float> %1117, i32 0 %1119 = extractelement <4 x float> %1117, i32 1 %1120 = fsub float 1.000000e+00, %620 %1121 = fmul float %1118, %620 %1122 = fmul float %1105, %1120 %1123 = fadd float %1121, %1122 %1124 = fsub float 1.000000e+00, %620 %1125 = fmul float %1119, %620 %1126 = fmul float %1106, %1124 %1127 = fadd float %1125, %1126 %1128 = fsub float 1.000000e+00, %618 %1129 = fmul float %1123, %618 %1130 = fmul float %1089, %1128 %1131 = fadd float %1129, %1130 %1132 = fsub float 1.000000e+00, %618 %1133 = fmul float %1127, %618 %1134 = fmul float %1093, %1132 %1135 = fadd float %1133, %1134 %1136 = fsub float %1059, %1131 %1137 = fmul float %1135, %1136 %1138 = call float @llvm.AMDGPU.clamp.(float %1137, float 0.000000e+00, float 1.000000e+00) %1139 = call float @llvm.AMDGPU.clamp.(float %1137, float 0.000000e+00, float 1.000000e+00) %1140 = call float @llvm.AMDGPU.clamp.(float %1137, float 0.000000e+00, float 1.000000e+00) %1141 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %1142 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %1143 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %1144 = load <8 x i32>, <8 x i32> addrspace(2)* %1143, align 32, !invariant.load !0 %1145 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1146 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1145, i64 0, i64 3, !amdgpu.uniform !0 %1147 = load <4 x i32>, <4 x i32> addrspace(2)* %1146, align 16, !invariant.load !0 %1148 = bitcast float %1141 to i32 %1149 = bitcast float %1142 to i32 %1150 = insertelement <2 x i32> undef, i32 %1148, i32 0 %1151 = insertelement <2 x i32> %1150, i32 %1149, i32 1 %1152 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1151, <8 x i32> %1144, <4 x i32> %1147, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1153 = extractelement <4 x float> %1152, i32 0 %1154 = extractelement <4 x float> %1152, i32 1 %1155 = extractelement <4 x float> %1152, i32 2 %1156 = fadd float %1046, %1153 %1157 = fadd float %1050, %1154 %1158 = fadd float %1054, %1155 %1159 = fadd float %1156, -5.000000e-01 %1160 = fadd float %1157, -5.000000e-01 %1161 = fadd float %1158, -5.000000e-01 %1162 = call float @llvm.fabs.f32(float %198) %1163 = fcmp ogt float %1162, -0.000000e+00 %1164 = select i1 %1163, float 1.000000e+00, float %225 %1165 = select i1 %1163, float 0.000000e+00, float 0x3F947AE140000000 %1166 = call float @llvm.fabs.f32(float %199) %1167 = fcmp ule float %1166, -0.000000e+00 %1168 = select i1 %1167, float 0x3F9EB851E0000000, float %1164 %1169 = select i1 %1167, float %228, float %1165 %1170 = select i1 %1167, float 0x3F889374C0000000, float 0.000000e+00 %1171 = call float @llvm.fabs.f32(float %200) %1172 = fcmp ogt float %1171, -0.000000e+00 %1173 = select i1 %1172, float 1.000000e+00, float %225 %1174 = select i1 %1172, float 0.000000e+00, float 0x3F947AE140000000 %1175 = call float @llvm.fabs.f32(float %201) %1176 = fcmp ogt float %1175, -0.000000e+00 %1177 = select i1 %1176, float %1173, float 0x3F9EB851E0000000 %1178 = select i1 %1176, float %1174, float %228 %1179 = select i1 %1176, float 0.000000e+00, float 0x3F889374C0000000 %1180 = fsub float 1.000000e+00, %620 %1181 = fmul float %1177, %620 %1182 = fmul float %1168, %1180 %1183 = fadd float %1181, %1182 %1184 = fsub float 1.000000e+00, %620 %1185 = fmul float %1178, %620 %1186 = fmul float %1169, %1184 %1187 = fadd float %1185, %1186 %1188 = fsub float 1.000000e+00, %620 %1189 = fmul float %1179, %620 %1190 = fmul float %1170, %1188 %1191 = fadd float %1189, %1190 %1192 = call float @llvm.fabs.f32(float %202) %1193 = fcmp ogt float %1192, -0.000000e+00 %1194 = select i1 %1193, float 1.000000e+00, float %225 %1195 = select i1 %1193, float 0.000000e+00, float 0x3F947AE140000000 %1196 = call float @llvm.fabs.f32(float %204) %1197 = fcmp ogt float %1196, -0.000000e+00 %1198 = select i1 %1197, float 1.000000e+00, float %225 %1199 = select i1 %1197, float 0.000000e+00, float 0x3F947AE140000000 %1200 = call float @llvm.fabs.f32(float %205) %1201 = fcmp ule float %1200, -0.000000e+00 %1202 = select i1 %1201, float 0x3F9EB851E0000000, float %1198 %1203 = select i1 %1201, float %228, float %1199 %1204 = select i1 %1201, float 0x3F889374C0000000, float 0.000000e+00 %1205 = call float @llvm.fabs.f32(float %203) %1206 = fcmp ule float %1205, -0.000000e+00 %1207 = select i1 %1206, float 0x3F9EB851E0000000, float %1194 %1208 = select i1 %1206, float %228, float %1195 %1209 = select i1 %1206, float 0x3F889374C0000000, float 0.000000e+00 %1210 = fsub float 1.000000e+00, %620 %1211 = fmul float %1202, %620 %1212 = fmul float %1207, %1210 %1213 = fadd float %1211, %1212 %1214 = fsub float 1.000000e+00, %620 %1215 = fmul float %1203, %620 %1216 = fmul float %1208, %1214 %1217 = fadd float %1215, %1216 %1218 = fsub float 1.000000e+00, %620 %1219 = fmul float %1204, %620 %1220 = fmul float %1209, %1218 %1221 = fadd float %1219, %1220 %1222 = fsub float 1.000000e+00, %618 %1223 = fmul float %1213, %618 %1224 = fmul float %1183, %1222 %1225 = fadd float %1223, %1224 %1226 = fsub float 1.000000e+00, %618 %1227 = fmul float %1217, %618 %1228 = fmul float %1187, %1226 %1229 = fadd float %1227, %1228 %1230 = fsub float 1.000000e+00, %618 %1231 = fmul float %1221, %618 %1232 = fmul float %1191, %1230 %1233 = fadd float %1231, %1232 %1234 = fmul float %55, %55 %1235 = fsub float 1.000000e+00, %1234 %1236 = fmul float %57, %57 %1237 = fsub float %1235, %1236 %1238 = fcmp oeq float %1237, 0.000000e+00 %1239 = call float @llvm.fabs.f32(float %1237) %1240 = call float @llvm.sqrt.f32(float %1239) %1241 = fdiv float 1.000000e+00, %1240, !fpmath !1 %1242 = select i1 %1238, float 0x47EFFFFFE0000000, float %1241 %1243 = fcmp oeq float %1242, 0.000000e+00 %1244 = fdiv float 1.000000e+00, %1242, !fpmath !1 %1245 = select i1 %1243, float 0x47EFFFFFE0000000, float %1244 %1246 = fmul float %55, %1225 %1247 = fmul float %57, %1229 %1248 = fadd float %1247, %1246 %1249 = fmul float %1245, %1233 %1250 = fadd float %1248, %1249 %1251 = fsub float %1225, %1250 %1252 = fsub float %1229, %1250 %1253 = fsub float %1233, %1250 %1254 = fmul float %1251, %1251 %1255 = fmul float %1252, %1252 %1256 = fadd float %1255, %1254 %1257 = fmul float %1253, %1253 %1258 = fadd float %1256, %1257 %1259 = call float @llvm.sqrt.f32(float %1258) %1260 = fcmp une float %1259, 0.000000e+00 %1261 = fdiv float 1.000000e+00, %1259, !fpmath !1 %.v23 = select i1 %1260, float %1261, float 0x47EFFFFFE0000000 %1262 = fmul float %1251, %.v23 %.v = select i1 %1260, float %1261, float 0x47EFFFFFE0000000 %1263 = fmul float %1252, %.v %.v10 = select i1 %1260, float %1261, float 0x47EFFFFFE0000000 %1264 = fmul float %1253, %.v10 %1265 = fmul float %57, %1264 %1266 = fmul float %1245, %1262 %1267 = fmul float %55, %1263 %1268 = fmul float %1263, %1245 %1269 = fsub float %1268, %1265 %1270 = fmul float %1264, %55 %1271 = fsub float %1270, %1266 %1272 = fmul float %1262, %57 %1273 = fsub float %1272, %1267 %1274 = fsub float 1.000000e+00, %620 %1275 = fmul float %606, %620 %1276 = fmul float %466, %1274 %1277 = fadd float %1275, %1276 %1278 = fsub float 1.000000e+00, %620 %1279 = fmul float %607, %620 %1280 = fmul float %467, %1278 %1281 = fadd float %1279, %1280 %1282 = fsub float 1.000000e+00, %620 %1283 = fmul float %608, %620 %1284 = fmul float %468, %1282 %1285 = fadd float %1283, %1284 %1286 = fsub float 1.000000e+00, %618 %1287 = fmul float %1022, %618 %1288 = fmul float %1277, %1286 %1289 = fadd float %1287, %1288 %1290 = fsub float 1.000000e+00, %618 %1291 = fmul float %1026, %618 %1292 = fmul float %1281, %1290 %1293 = fadd float %1291, %1292 %1294 = fsub float 1.000000e+00, %618 %1295 = fmul float %1030, %618 %1296 = fmul float %1285, %1294 %1297 = fadd float %1295, %1296 %1298 = fmul float %1289, 2.000000e+00 %1299 = fadd float %1298, -1.000000e+00 %1300 = fmul float %1293, 2.000000e+00 %1301 = fadd float %1300, -1.000000e+00 %1302 = fmul float %1297, 2.000000e+00 %1303 = fadd float %1302, -1.000000e+00 %1304 = fmul float %1299, %1303 %1305 = fmul float %1304, %1304 %1306 = fsub float 1.000000e+00, %1305 %1307 = fmul float %1301, %1301 %1308 = fsub float %1306, %1307 %1309 = fcmp oeq float %1308, 0.000000e+00 %1310 = call float @llvm.fabs.f32(float %1308) %1311 = call float @llvm.sqrt.f32(float %1310) %1312 = fdiv float 1.000000e+00, %1311, !fpmath !1 %1313 = select i1 %1309, float 0x47EFFFFFE0000000, float %1312 %1314 = fcmp oeq float %1313, 0.000000e+00 %1315 = fdiv float 1.000000e+00, %1313, !fpmath !1 %.op11 = fmul float %1315, 0x3FDCCCCCC0000000 %1316 = select i1 %1314, float 0x47DCCCCCA0000000, float %.op11 %1317 = fmul float %1304, %1304 %1318 = fmul float %1301, %1301 %1319 = fadd float %1318, %1317 %1320 = fmul float %1316, %1316 %1321 = fadd float %1319, %1320 %1322 = call float @llvm.sqrt.f32(float %1321) %1323 = fcmp une float %1322, 0.000000e+00 %1324 = fdiv float 1.000000e+00, %1322, !fpmath !1 %.v12 = select i1 %1323, float %1324, float 0x47EFFFFFE0000000 %1325 = fmul float %1304, %.v12 %.v13 = select i1 %1323, float %1324, float 0x47EFFFFFE0000000 %1326 = fmul float %1301, %.v13 %.v14 = select i1 %1323, float %1324, float 0x47EFFFFFE0000000 %1327 = fmul float %1316, %.v14 %1328 = fmul float %1269, %1326 %1329 = fmul float %1271, %1326 %1330 = fmul float %1273, %1326 %1331 = fmul float %1325, %1262 %1332 = fadd float %1331, %1328 %1333 = fmul float %1325, %1263 %1334 = fadd float %1333, %1329 %1335 = fmul float %1325, %1264 %1336 = fadd float %1335, %1330 %1337 = fmul float %1327, %55 %1338 = fadd float %1337, %1332 %1339 = fmul float %1327, %57 %1340 = fadd float %1339, %1334 %1341 = fmul float %1327, %1245 %1342 = fadd float %1341, %1336 %1343 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %1344 = load <16 x i8>, <16 x i8> addrspace(2)* %1343, align 16, !invariant.load !0 %1345 = call float @llvm.SI.load.const(<16 x i8> %1344, i32 192) %1346 = call float @llvm.SI.load.const(<16 x i8> %1344, i32 196) %1347 = call float @llvm.SI.load.const(<16 x i8> %1344, i32 200) %1348 = fmul float %1338, %1345 %1349 = fmul float %1340, %1346 %1350 = fadd float %1349, %1348 %1351 = fmul float %1342, %1347 %1352 = fadd float %1350, %1351 %1353 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %1354 = load <16 x i8>, <16 x i8> addrspace(2)* %1353, align 16, !invariant.load !0 %1355 = call float @llvm.SI.load.const(<16 x i8> %1354, i32 224) %1356 = call float @llvm.SI.load.const(<16 x i8> %1354, i32 228) %1357 = call float @llvm.SI.load.const(<16 x i8> %1354, i32 232) %1358 = fmul float %1338, %1355 %1359 = fmul float %1340, %1356 %1360 = fadd float %1359, %1358 %1361 = fmul float %1342, %1357 %1362 = fadd float %1360, %1361 %1363 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4, !amdgpu.uniform !0 %1364 = load <16 x i8>, <16 x i8> addrspace(2)* %1363, align 16, !invariant.load !0 %1365 = call float @llvm.SI.load.const(<16 x i8> %1364, i32 208) %1366 = call float @llvm.SI.load.const(<16 x i8> %1364, i32 212) %1367 = call float @llvm.SI.load.const(<16 x i8> %1364, i32 216) %1368 = fmul float %1338, %1365 %1369 = fmul float %1340, %1366 %1370 = fadd float %1369, %1368 %1371 = fmul float %1342, %1367 %1372 = fadd float %1370, %1371 %1373 = fmul float %1352, %1352 %1374 = fmul float %1362, %1362 %1375 = fadd float %1374, %1373 %1376 = fmul float %1372, %1372 %1377 = fadd float %1375, %1376 %1378 = call float @llvm.sqrt.f32(float %1377) %1379 = fcmp une float %1378, 0.000000e+00 %1380 = fdiv float 1.000000e+00, %1378, !fpmath !1 %.v15 = select i1 %1379, float %1380, float 0x47EFFFFFE0000000 %1381 = fmul float %1352, %.v15 %.v16 = select i1 %1379, float %1380, float 0x47EFFFFFE0000000 %1382 = fmul float %1362, %.v16 %.v17 = select i1 %1379, float %1380, float 0x47EFFFFFE0000000 %1383 = fmul float %1372, %.v17 %1384 = call float @llvm.fabs.f32(float %1381) %1385 = call float @llvm.fabs.f32(float %1382) %1386 = call float @llvm.maxnum.f32(float %1384, float %1385) %1387 = call float @llvm.fabs.f32(float %1383) %1388 = call float @llvm.maxnum.f32(float %1387, float %1386) %1389 = call float @llvm.fabs.f32(float %1383) %1390 = call float @llvm.fabs.f32(float %1382) %1391 = fsub float %1389, %1388 %1392 = fsub float %1390, %1388 %1393 = fcmp oeq float %1388, 0.000000e+00 %1394 = fdiv float 1.000000e+00, %1388, !fpmath !1 %1395 = select i1 %1393, float 0x47EFFFFFE0000000, float %1394 %1396 = fmul float %1381, %1395 %1397 = fmul float %1382, %1395 %1398 = fmul float %1383, %1395 %1399 = fcmp uge float %1392, 0.000000e+00 %1400 = call float @llvm.fabs.f32(float %1382) %1401 = call float @llvm.fabs.f32(float %1383) %1402 = call float @llvm.fabs.f32(float %1381) %1403 = call float @llvm.fabs.f32(float %1383) %1404 = select i1 %1399, float %1402, float %1400 %1405 = select i1 %1399, float %1403, float %1401 %1406 = fcmp olt float %1391, 0.000000e+00 %1407 = call float @llvm.fabs.f32(float %1381) %1408 = call float @llvm.fabs.f32(float %1382) %1409 = select i1 %1406, float %1404, float %1407 %1410 = select i1 %1406, float %1405, float %1408 %1411 = fsub float %1409, %1410 %1412 = fcmp olt float %1411, 0.000000e+00 %1413 = select i1 %1412, float %1410, float %1409 %1414 = select i1 %1412, float %1409, float %1410 %1415 = fcmp oeq float %1413, 0.000000e+00 %1416 = fdiv float 1.000000e+00, %1413, !fpmath !1 %1417 = select i1 %1415, float 0x47EFFFFFE0000000, float %1416 %1418 = fmul float %1414, %1417 %1419 = getelementptr [24 x <8 x i32>], [24 x <8 x i32>] addrspace(2)* %2, i64 0, i64 10, !amdgpu.uniform !0 %1420 = load <8 x i32>, <8 x i32> addrspace(2)* %1419, align 32, !invariant.load !0 %1421 = bitcast [24 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %1422 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %1421, i64 0, i64 23, !amdgpu.uniform !0 %1423 = load <4 x i32>, <4 x i32> addrspace(2)* %1422, align 16, !invariant.load !0 %1424 = bitcast float %1413 to i32 %1425 = bitcast float %1418 to i32 %1426 = insertelement <2 x i32> undef, i32 %1424, i32 0 %1427 = insertelement <2 x i32> %1426, i32 %1425, i32 1 %1428 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %1427, <8 x i32> %1420, <4 x i32> %1423, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %1429 = extractelement <4 x float> %1428, i32 0 %1430 = fmul float %1396, %1429 %1431 = fmul float %1397, %1429 %1432 = fmul float %1398, %1429 %1433 = fmul float %1430, 5.000000e-01 %1434 = fadd float %1433, 5.000000e-01 %1435 = fmul float %1431, 5.000000e-01 %1436 = fadd float %1435, 5.000000e-01 %1437 = fmul float %1432, 5.000000e-01 %1438 = fadd float %1437, 5.000000e-01 %1439 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %1440 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %1441 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %1442 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %1443 = bitcast float %5 to i32 %1444 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %1443, 10 %1445 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1444, float %1159, 11 %1446 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1445, float %1160, 12 %1447 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1446, float %1161, 13 %1448 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1447, float 0.000000e+00, 14 %1449 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1448, float %1138, 15 %1450 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1449, float %1139, 16 %1451 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1450, float %1140, 17 %1452 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1451, float 0.000000e+00, 18 %1453 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1452, float %1434, 19 %1454 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1453, float %1436, 20 %1455 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1454, float %1438, 21 %1456 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1455, float 0x3FD99999A0000000, 22 %1457 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1456, float %1439, 23 %1458 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1457, float %1440, 24 %1459 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1458, float %1441, 25 %1460 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1459, float %1442, 26 %1461 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1460, float undef, 27 %1462 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1461, float undef, 28 %1463 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1462, float undef, 29 %1464 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1463, float undef, 30 %1465 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1464, float undef, 31 %1466 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1465, float undef, 32 %1467 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1466, float undef, 33 %1468 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1467, float undef, 34 %1469 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1468, float undef, 35 %1470 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1469, float undef, 36 %1471 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1470, float undef, 37 %1472 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1471, float undef, 38 %1473 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1472, float undef, 39 %1474 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1473, float undef, 40 %1475 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1474, float undef, 41 %1476 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1475, float undef, 42 %1477 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1476, float %21, 43 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %1477 } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 ; Function Attrs: convergent nounwind readnone declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.exp2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } attributes #2 = { convergent nounwind readnone } !0 = !{} !1 = !{float 2.500000e+00} !2 = !{i32 0, i32 64} radeonsi: Compiling shader 5 Fragment Shader Epilog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps void @ps_epilog(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 { main_body: %38 = call i32 @llvm.SI.packf16(float %6, float %7) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %8, float %9) %41 = bitcast i32 %40 to float %42 = call i32 @llvm.SI.packf16(float %10, float %11) %43 = bitcast i32 %42 to float %44 = call i32 @llvm.SI.packf16(float %12, float %13) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %14, float %15) %47 = bitcast i32 %46 to float %48 = call i32 @llvm.SI.packf16(float %16, float %17) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %18, float %19) %51 = bitcast i32 %50 to float %52 = call i32 @llvm.SI.packf16(float %20, float %21) %53 = bitcast i32 %52 to float %54 = call i32 @llvm.SI.packf16(float %22, float %23) %55 = bitcast i32 %54 to float %56 = call i32 @llvm.SI.packf16(float %24, float %25) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %26, float %27) %59 = bitcast i32 %58 to float %60 = call i32 @llvm.SI.packf16(float %28, float %29) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %30, float %31) %63 = bitcast i32 %62 to float %64 = call i32 @llvm.SI.packf16(float %32, float %33) %65 = bitcast i32 %64 to float %66 = call i32 @llvm.SI.packf16(float %34, float %35) %67 = bitcast i32 %66 to float %68 = call i32 @llvm.SI.packf16(float %36, float %37) %69 = bitcast i32 %68 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %39, float %41, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %43, float %45, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 1, float %47, float %49, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 3, i32 1, float %51, float %53, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 4, i32 1, float %55, float %57, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 5, i32 1, float %59, float %61, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 6, i32 1, float %63, float %65, float undef, float undef) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 7, i32 1, float %67, float %69, float undef, float undef) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 ; Function Attrs: nounwind declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2 attributes #0 = { "InitialPSInputAddr"="16777215" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } SHADER KEY part.ps.prolog.color_two_side = 0 part.ps.prolog.flatshade_colors = 0 part.ps.prolog.poly_stipple = 0 part.ps.prolog.force_persp_sample_interp = 0 part.ps.prolog.force_linear_sample_interp = 0 part.ps.prolog.force_persp_center_interp = 0 part.ps.prolog.force_linear_center_interp = 0 part.ps.prolog.bc_optimize_for_persp = 0 part.ps.prolog.bc_optimize_for_linear = 0 part.ps.epilog.spi_shader_col_format = 0x44444444 part.ps.epilog.color_is_int8 = 0x0 part.ps.epilog.last_cbuf = 0 part.ps.epilog.alpha_func = 7 part.ps.epilog.alpha_to_one = 0 part.ps.epilog.poly_line_smoothing = 0 part.ps.epilog.clamp_color = 0 Pixel Shader: Shader main disassembly: s_mov_b32 s84, SCRATCH_RSRC_DWORD0 ; BED400FF 00000000 s_mov_b32 s85, SCRATCH_RSRC_DWORD1 ; BED500FF 00000000 s_mov_b32 s86, -1 ; BED600C1 v_writelane_b32 v58, exec_lo, 0 ; D28A003A 0001007E s_mov_b32 s89, s12 ; BED9000C s_mov_b32 s87, 0xe80000 ; BED700FF 00E80000 v_writelane_b32 v58, exec_hi, 1 ; D28A003A 0001027F v_mov_b32_e32 v16, v13 ; 7E20030D s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 s19, s11 ; BE93000B s_mov_b32 s22, s10 ; BE96000A s_load_dwordx8 s[8:15], s[4:5], 0x140 ; C00E0202 00000140 s_mov_b32 m0, s19 ; BEFC0013 v_interp_p1_f32 v4, v2, 0, 0, [m0] ; D4100002 v_interp_p2_f32 v4, [v4], v3, 0, 0, [m0] ; D4110003 v_interp_p1_f32 v5, v2, 1, 0, [m0] ; D4140102 s_mov_b64 s[6:7], s[2:3] ; BE860102 v_interp_p2_f32 v5, [v5], v3, 1, 0, [m0] ; D4150103 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s8, 2 ; D28A003A 00010408 s_load_dwordx4 s[28:31], s[6:7], 0x40 ; C00A0703 00000040 v_writelane_b32 v58, s9, 3 ; D28A003A 00010609 v_writelane_b32 v58, s10, 4 ; D28A003A 0001080A v_writelane_b32 v58, s11, 5 ; D28A003A 00010A0B s_load_dwordx4 s[8:11], s[6:7], 0x0 ; C00A0203 00000000 v_writelane_b32 v58, s12, 6 ; D28A003A 00010C0C v_writelane_b32 v58, s13, 7 ; D28A003A 00010E0D s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s7, s[28:31], 0x52c ; C02201CE 0000052C v_writelane_b32 v58, s14, 8 ; D28A003A 0001100E s_buffer_load_dword s6, s[28:31], 0x528 ; C022018E 00000528 s_load_dwordx4 s[0:3], s[4:5], 0x70 ; C00A0002 00000070 s_load_dwordx4 s[24:27], s[4:5], 0x130 ; C00A0602 00000130 v_writelane_b32 v58, s15, 9 ; D28A003A 0001120F s_buffer_load_dword s41, s[8:11], 0x70 ; C0220A44 00000070 s_load_dwordx8 s[8:15], s[4:5], 0x40 ; C00E0202 00000040 s_load_dwordx8 s[48:55], s[4:5], 0x100 ; C00E0C02 00000100 v_mul_f32_e32 v6, 0, v4 ; 0A0C0880 v_mov_b32_e32 v20, v7 ; 7E280307 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v13, s7, v5 ; 021A0A07 v_mov_b32_e32 v18, v5 ; 7E240305 v_add_f32_e32 v10, s6, v4 ; 02140806 v_mov_b32_e32 v11, v5 ; 7E160305 v_mov_b32_e32 v12, v6 ; 7E180306 v_mov_b32_e32 v19, v6 ; 7E260306 v_mov_b32_e32 v17, v4 ; 7E220304 v_mov_b32_e32 v7, 0xc2000000 ; 7E0E02FF C2000000 v_mov_b32_e32 v18, v13 ; 7E24030D image_sample_l v14, v[4:7], s[8:15], s[0:3] dmask:0x1 ; F0900100 00020E04 s_nop 0 ; BF800000 image_sample_l v[0:1], v[4:7], s[48:55], s[24:27] dmask:0x3 ; F0900300 00CC0004 s_nop 0 ; BF800000 image_sample_l v15, v[10:13], s[8:15], s[0:3] dmask:0x1 ; F0900100 00020F0A s_mov_b32 s16, 0x40ff0000 ; BE9000FF 40FF0000 image_sample_l v17, v[17:20], s[8:15], s[0:3] dmask:0x1 ; F0900100 00021111 v_mov_b32_e32 v11, v13 ; 7E16030D image_sample_l v6, v[10:13], s[8:15], s[0:3] dmask:0x1 ; F0900100 0002060A s_waitcnt vmcnt(1) ; BF8C0F71 v_mul_f32_e32 v12, s16, v17 ; 0A182210 v_floor_f32_e32 v12, v12 ; 7E183F0C v_mad_f32 v12, v17, s16, -v12 ; D1C1000C 84302111 v_mad_f32 v13, v17, s16, -v12 ; D1C1000D 84302111 v_mul_f32_e32 v12, s16, v15 ; 0A181E10 v_floor_f32_e32 v12, v12 ; 7E183F0C v_mul_f32_e32 v10, s16, v14 ; 0A141C10 v_mad_f32 v12, v15, s16, -v12 ; D1C1000C 8430210F v_mad_f32 v24, v15, s16, -v12 ; D1C10018 8430210F v_floor_f32_e32 v10, v10 ; 7E143F0A s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v12, s16, v6 ; 0A180C10 s_buffer_load_dword s21, s[28:31], 0x1a4 ; C022054E 000001A4 s_buffer_load_dword s0, s[28:31], 0x1a0 ; C022000E 000001A0 v_mad_f32 v10, v14, s16, -v10 ; D1C1000A 8428210E v_floor_f32_e32 v12, v12 ; 7E183F0C s_buffer_load_dword s43, s[28:31], 0x1a8 ; C0220ACE 000001A8 s_buffer_load_dword s42, s[28:31], 0x1ac ; C0220A8E 000001AC v_mad_f32 v10, v14, s16, -v10 ; D1C1000A 8428210E v_mov_b32_e32 v8, 0x437f0000 ; 7E1002FF 437F0000 v_mul_f32_e32 v25, v7, v24 ; 0A323107 s_load_dwordx4 s[44:47], s[4:5], 0x1b0 ; C00A0B02 000001B0 v_mad_f32 v12, v6, s16, -v12 ; D1C1000C 84302106 s_load_dwordx8 s[48:55], s[4:5], 0x180 ; C00E0C02 00000180 v_mul_f32_e32 v11, v7, v10 ; 0A161507 v_mac_f32_e32 v25, v8, v15 ; 2C321F08 v_mad_f32 v15, v6, s16, -v12 ; D1C1000F 84302106 s_buffer_load_dword s17, s[28:31], 0x580 ; C022044E 00000580 v_mul_f32_e32 v23, v7, v13 ; 0A2E1B07 v_mul_f32_e32 v40, v7, v15 ; 0A501F07 v_mac_f32_e32 v11, v8, v14 ; 2C161D08 v_mac_f32_e32 v23, v8, v17 ; 2C2E2308 v_mac_f32_e32 v40, v8, v6 ; 2C500D08 v_add_f32_e32 v6, v11, v11 ; 020C170B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s0 ; 7E220200 v_mov_b32_e32 v18, s21 ; 7E240215 v_mov_b32_e32 v9, 0 ; 7E120280 v_mad_f32 v7, s43, v6, v17 ; D1C10007 04460C2B v_mad_f32 v8, s42, v6, v18 ; D1C10008 044A0C2A image_sample_l v[19:22], v[7:10], s[48:55], s[44:47] dmask:0xf ; F0900F00 016C1307 v_interp_p1_f32 v6, v2, 2, 1, [m0] ; D4180602 v_interp_p2_f32 v6, [v6], v3, 2, 1, [m0] ; D4190603 v_add_f32_e64 v11, s17, s17 ; D101000B 00002211 v_mul_f32_e32 v6, v6, v11 ; 0A0C1706 s_brev_b32 s18, 1 ; BE920881 v_sub_f32_e32 v11, 2.0, v13 ; 04161AF4 v_cmp_gt_f32_e64 vcc, |v11|, s18 ; D044016A 0000250B v_sub_f32_e32 v11, 1.0, v10 ; 041614F2 v_sub_f32_e32 v12, 2.0, v10 ; 041814F4 v_sub_f32_e32 v10, 1.0, v13 ; 04141AF2 v_cmp_gt_f32_e64 s[10:11], |v11|, s18 ; D044010A 0000250B v_interp_p1_f32 v11, v2, 0, 1, [m0] ; D42C0402 v_cmp_gt_f32_e64 s[2:3], |v10|, s18 ; D0440102 0000250A v_sub_f32_e32 v10, 1.0, v24 ; 041430F2 v_interp_p2_f32 v11, [v11], v3, 0, 1, [m0] ; D42D0403 v_cmp_gt_f32_e64 s[32:33], |v10|, s18 ; D0440120 0000250A v_interp_p1_f32 v10, v2, 1, 1, [m0] ; D4280502 v_mul_f32_e32 v42, -0.5, v6 ; 0A540CF1 v_mad_f32 v6, v1, -2.0, 1.0 ; D1C10006 03C9EB01 v_interp_p2_f32 v10, [v10], v3, 1, 1, [m0] ; D4290503 v_mul_f32_e32 v41, s17, v11 ; 0A521611 v_mul_f32_e32 v11, s17, v10 ; 0A161411 v_mad_f32 v10, 2.0, v0, -1.0 ; D1C1000A 03CE00F4 v_cmp_nlt_f32_e64 s[8:9], s18, v6 ; D04E0008 00020C12 v_cmp_gt_f32_e64 s[12:13], 0, v10 ; D044000C 00021480 v_cndmask_b32_e64 v14, -1.0, 1.0, s[8:9] ; D100000E 0021E4F3 v_cndmask_b32_e64 v13, 1.0, -1.0, s[12:13] ; D100000D 0031E6F2 v_mul_f32_e32 v45, v14, v41 ; 0A5A530E v_xor_b32_e32 v43, 0x80000000, v11 ; 2A5616FF 80000000 v_mul_f32_e32 v44, v13, v11 ; 0A58170D v_cmp_gt_f32_e64 s[12:13], |v12|, s18 ; D044010C 0000250C v_cndmask_b32_e64 v11, v45, v41, s[10:11] ; D100000B 002A532D v_cndmask_b32_e64 v26, v44, v11, s[12:13] ; D100001A 0032172C v_cndmask_b32_e64 v11, v42, v43, s[10:11] ; D100000B 002A572A v_cndmask_b32_e64 v27, v42, v11, s[12:13] ; D100001B 0032172A v_cndmask_b32_e64 v11, v45, v41, s[2:3] ; D100000B 000A532D v_cndmask_b32_e32 v28, v44, v11, vcc ; 0038172C v_cndmask_b32_e64 v11, v42, v43, s[2:3] ; D100000B 000A572A v_cndmask_b32_e32 v29, v42, v11, vcc ; 003A172A v_mbcnt_lo_u32_b32_e64 v11, -1, 0 ; D28C000B 000100C1 v_mbcnt_hi_u32_b32_e64 v11, -1, v11 ; D28D000B 000216C1 v_and_b32_e32 v11, 60, v11 ; 261616BC v_lshlrev_b32_e32 v46, 2, v11 ; 245C1682 ds_bpermute_b32 v11, v46, v26 ; D87E0000 0B001A2E ds_bpermute_b32 v30, v46, v26 offset:4 ; D87E0004 1E001A2E ds_bpermute_b32 v31, v46, v27 offset:4 ; D87E0004 1F001B2E ds_bpermute_b32 v32, v46, v28 offset:4 ; D87E0004 20001C2E s_buffer_load_dword s83, s[28:31], 0x294 ; C02214CE 00000294 s_buffer_load_dword s80, s[28:31], 0x1b0 ; C022140E 000001B0 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, v11, v30 ; 06163D0B ds_bpermute_b32 v30, v46, v27 ; D87E0000 1E001B2E s_buffer_load_dword s88, s[28:31], 0x290 ; C022160E 00000290 s_buffer_load_dword s81, s[28:31], 0x1b8 ; C022144E 000001B8 v_mul_f32_e32 v35, s41, v27 ; 0A463629 v_add_f32_e32 v47, v25, v25 ; 025E3319 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v30, v30, v31 ; 063C3F1E ds_bpermute_b32 v31, v46, v28 ; D87E0000 1F001C2E v_mov_b32_e32 v48, 0x7effffff ; 7E6002FF 7EFFFFFF s_buffer_load_dword s82, s[28:31], 0x298 ; C022148E 00000298 v_mov_b32_e32 v50, 0x3f600000 ; 7E6402FF 3F600000 v_mov_b32_e32 v51, 0xff5fffff ; 7E6602FF FF5FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v37, v31, v32 ; 064A411F ds_bpermute_b32 v31, v46, v29 ; D87E0000 1F001D2E ds_bpermute_b32 v32, v46, v29 offset:4 ; D87E0004 20001D2E v_mul_f32_e32 v36, s41, v28 ; 0A483829 s_load_dwordx8 s[56:63], s[4:5], 0x80 ; C00E0E02 00000080 s_load_dwordx4 s[76:79], s[4:5], 0xb0 ; C00A1302 000000B0 s_load_dwordx8 s[64:71], s[4:5], 0xc0 ; C00E1002 000000C0 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v38, v31, v32 ; 064C411F s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v32, s83, v21 ; 0A402A53 v_mul_f32_e32 v32, s80, v32 ; 0A404050 v_mul_f32_e32 v31, s88, v21 ; 0A3E2A58 v_mul_f32_e32 v31, s81, v31 ; 0A3E3E51 v_mul_f32_e32 v34, v32, v22 ; 0A442D20 v_mul_f32_e32 v30, v30, v34 ; 0A3C451E v_mul_f32_e32 v33, v31, v22 ; 0A422D1F v_mul_f32_e32 v11, v11, v33 ; 0A16430B v_mul_f32_e32 v30, v30, v30 ; 0A3C3D1E v_mac_f32_e32 v30, v11, v11 ; 2C3C170B v_mul_f32_e32 v11, s41, v26 ; 0A163429 ds_bpermute_b32 v39, v46, v11 ; D87E0000 27000B2E ds_bpermute_b32 v11, v46, v11 offset:8 ; D87E0008 0B000B2E v_rcp_f32_e32 v25, v32 ; 7E324520 v_cmp_eq_f32_e64 s[12:13], 0, v32 ; D042000C 00024080 v_cmp_neq_f32_e64 s[8:9], 0, v31 ; D04D0008 00023E80 s_load_dwordx4 s[72:75], s[4:5], 0xf0 ; C00A1202 000000F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, v39, v11 ; 06161727 ds_bpermute_b32 v39, v46, v35 ; D87E0000 2700232E ds_bpermute_b32 v35, v46, v35 offset:8 ; D87E0008 2300232E v_mul_f32_e32 v11, v33, v11 ; 0A161721 v_mul_f32_e32 v25, 0.5, v25 ; 0A3232F0 v_sub_f32_e32 v52, 2.0, v24 ; 046830F4 s_buffer_load_dword s6, s[28:31], 0x524 ; C022018E 00000524 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v35, v39, v35 ; 06464727 v_mul_f32_e32 v33, v34, v35 ; 0A424722 v_mul_f32_e32 v33, v33, v33 ; 0A424321 v_mac_f32_e32 v33, v11, v11 ; 2C42170B v_max_f32_e32 v11, v33, v30 ; 16163D21 v_add_f32_e32 v30, v23, v23 ; 023C2F17 v_rcp_f32_e32 v23, v31 ; 7E2E451F v_rsq_f32_e64 v32, |v11| ; D1640120 0000010B v_cndmask_b32_e64 v31, v25, v48, s[12:13] ; D100001F 00326119 v_mul_f32_e32 v25, s41, v29 ; 0A323A29 v_mul_f32_e32 v23, 0.5, v23 ; 0A2E2EF0 v_cndmask_b32_e64 v23, v48, v23, s[8:9] ; D1000017 00222F30 v_cmp_neq_f32_e64 s[8:9], 0, v11 ; D04D0008 00021680 v_mov_b32_e32 v11, 0x7f7fffff ; 7E1602FF 7F7FFFFF v_cndmask_b32_e64 v32, v11, v32, s[8:9] ; D1000020 0022410B v_rcp_f32_e32 v34, v32 ; 7E444520 v_cmp_eq_f32_e64 s[8:9], 0, v32 ; D0420008 00024080 ds_bpermute_b32 v33, v46, v25 ; D87E0000 2100192E ds_bpermute_b32 v25, v46, v25 offset:8 ; D87E0008 1900192E v_cndmask_b32_e64 v32, v34, v11, s[8:9] ; D1000020 00221722 v_log_f32_e64 v34, |v32| ; D1610122 00000120 v_cmp_eq_f32_e64 s[8:9], 0, v32 ; D0420008 00024080 v_mul_f32_e32 v35, v27, v22 ; 0A462D1B s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v49, v33, v25 ; 06623321 v_mul_f32_e32 v25, v50, v34 ; 0A324532 v_cndmask_b32_e64 v25, v25, v51, s[8:9] ; D1000019 00226719 v_min_f32_e32 v25, s82, v25 ; 14323252 v_max_f32_e32 v25, 0, v25 ; 16323280 v_floor_f32_e64 v32, -v25 ; D15F0020 20000119 v_sub_f32_e64 v32, -v25, v32 ; D1020020 20024119 v_add_f32_e32 v32, v32, v25 ; 02403320 v_exp_f32_e32 v32, v32 ; 7E404120 v_mul_f32_e32 v33, v32, v23 ; 0A422F20 v_mul_f32_e32 v34, v32, v31 ; 0A443F20 v_mad_f32 v23, -v23, v32, 1.0 ; D1C10017 23CA4117 v_mad_f32 v31, -v31, v32, 1.0 ; D1C1001F 23CA411F v_mul_f32_e32 v32, v26, v22 ; 0A402D1A v_floor_f32_e32 v32, v32 ; 7E403F20 ds_bpermute_b32 v39, v46, v36 ; D87E0000 2700242E ds_bpermute_b32 v36, v46, v36 offset:8 ; D87E0008 2400242E v_mad_f32 v32, v22, v26, -v32 ; D1C10020 84823516 v_floor_f32_e32 v26, v35 ; 7E343F23 v_mad_f32 v22, v22, v27, -v26 ; D1C10016 846A3716 v_max_f32_e32 v22, v34, v22 ; 162C2D22 v_add_f32_e32 v26, s43, v7 ; 02340E2B v_max_f32_e32 v7, v33, v32 ; 160E4121 v_add_f32_e32 v27, s42, v8 ; 0236102A v_mul_f32_e32 v8, s81, v21 ; 0A102A51 v_min_f32_e32 v7, v7, v23 ; 140E2F07 v_mul_f32_e32 v21, s80, v21 ; 0A2A2A50 v_min_f32_e32 v22, v22, v31 ; 142C3F16 v_mad_f32 v23, v8, v7, v19 ; D1C10017 044E0F08 v_mad_f32 v24, v21, v22, v20 ; D1C10018 04522D15 v_mad_f32 v7, s43, v30, v17 ; D1C10007 04463C2B v_mad_f32 v8, s42, v30, v18 ; D1C10008 044A3C2A s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v39, v39, v36 ; 064E4927 image_sample_l v[31:33], v[23:26], s[56:63], s[76:79] dmask:0x7 ; F0900700 026E1F17 s_nop 0 ; BF800000 image_sample_l v[34:36], v[23:26], s[64:71], s[72:75] dmask:0xb ; F0900B00 02502217 s_nop 0 ; BF800000 image_sample_l v[19:22], v[7:10], s[48:55], s[44:47] dmask:0xf ; F0900F00 016C1307 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v24, s83, v21 ; 0A302A53 v_mul_f32_e32 v23, s88, v21 ; 0A2E2A58 v_mul_f32_e32 v24, s80, v24 ; 0A303050 v_mul_f32_e32 v23, s81, v23 ; 0A2E2E51 v_mul_f32_e32 v30, v24, v22 ; 0A3C2D18 v_rcp_f32_e32 v25, v23 ; 7E324517 v_cmp_neq_f32_e64 s[12:13], 0, v23 ; D04D000C 00022E80 v_mul_f32_e32 v23, v23, v22 ; 0A2E2D17 v_mul_f32_e32 v38, v30, v38 ; 0A4C4D1E v_mul_f32_e32 v30, v30, v49 ; 0A3C631E v_mul_f32_e32 v37, v23, v37 ; 0A4A4B17 v_mul_f32_e32 v23, v23, v39 ; 0A2E4F17 v_mul_f32_e32 v30, v30, v30 ; 0A3C3D1E v_mac_f32_e32 v30, v23, v23 ; 2C3C2F17 v_mul_f32_e32 v23, v38, v38 ; 0A2E4D26 v_mac_f32_e32 v23, v37, v37 ; 2C2E4B25 v_max_f32_e32 v23, v30, v23 ; 162E2F1E v_rsq_f32_e64 v30, |v23| ; D164011E 00000117 v_cmp_eq_f32_e64 s[16:17], 0, v23 ; D0420010 00022E80 v_mul_f32_e32 v25, 0.5, v25 ; 0A3232F0 v_cndmask_b32_e64 v25, v48, v25, s[12:13] ; D1000019 00323330 v_cndmask_b32_e64 v23, v30, v11, s[16:17] ; D1000017 0042171E v_rcp_f32_e32 v30, v23 ; 7E3C4517 v_cmp_eq_f32_e64 s[12:13], 0, v23 ; D042000C 00022E80 v_cmp_eq_f32_e64 s[14:15], 0, v24 ; D042000E 00023080 v_rcp_f32_e32 v24, v24 ; 7E304518 v_cndmask_b32_e64 v23, v30, v11, s[12:13] ; D1000017 0032171E v_log_f32_e64 v30, |v23| ; D161011E 00000117 v_cmp_eq_f32_e64 s[12:13], 0, v23 ; D042000C 00022E80 v_mul_f32_e32 v24, 0.5, v24 ; 0A3030F0 v_cndmask_b32_e64 v24, v24, v48, s[14:15] ; D1000018 003A6118 v_mul_f32_e32 v23, v50, v30 ; 0A2E3D32 v_cndmask_b32_e64 v23, v23, v51, s[12:13] ; D1000017 00326717 v_min_f32_e32 v23, s82, v23 ; 142E2E52 v_max_f32_e32 v30, 0, v23 ; 163C2E80 v_floor_f32_e64 v23, -v30 ; D15F0017 2000011E v_sub_f32_e64 v23, -v30, v23 ; D1020017 20022F1E v_add_f32_e32 v23, v23, v30 ; 022E3D17 v_exp_f32_e32 v23, v23 ; 7E2E4117 v_mul_f32_e32 v37, v23, v25 ; 0A4A3317 v_mul_f32_e32 v38, v23, v24 ; 0A4C3117 v_mad_f32 v25, -v25, v23, 1.0 ; D1C10019 23CA2F19 v_mad_f32 v23, -v24, v23, 1.0 ; D1C10017 23CA2F18 v_mul_f32_e32 v24, v22, v28 ; 0A303916 v_floor_f32_e32 v24, v24 ; 7E303F18 v_mad_f32 v24, v28, v22, -v24 ; D1C10018 84622D1C v_mul_f32_e32 v28, v22, v29 ; 0A383B16 v_floor_f32_e32 v28, v28 ; 7E383F1C v_mad_f32 v22, v29, v22, -v28 ; D1C10016 84722D1D v_max_f32_e32 v22, v38, v22 ; 162C2D26 v_cmp_gt_f32_e64 s[8:9], |v52|, s18 ; D0440108 00002534 v_cndmask_b32_e64 v28, v45, v41, s[32:33] ; D100001C 0082532D v_cndmask_b32_e64 v29, v42, v43, s[32:33] ; D100001D 0082572A v_cndmask_b32_e64 v49, v44, v28, s[8:9] ; D1000031 0022392C v_mul_f32_e32 v28, s81, v21 ; 0A382A51 v_mul_f32_e32 v21, s80, v21 ; 0A2A2A50 v_min_f32_e32 v22, v22, v23 ; 142C2F16 v_max_f32_e32 v24, v37, v24 ; 16303125 v_cndmask_b32_e64 v53, v42, v29, s[8:9] ; D1000035 00223B2A v_mad_f32 v29, v21, v22, v20 ; D1C1001D 04522D15 v_add_f32_e32 v20, s43, v7 ; 02280E2B v_mul_f32_e32 v7, s6, v5 ; 0A0E0A06 v_min_f32_e32 v24, v24, v25 ; 14303318 v_floor_f32_e32 v7, v7 ; 7E0E3F07 v_mad_f32 v54, s6, v5, -v7 ; D1C10036 841E0A06 v_mad_f32 v28, v28, v24, v19 ; D1C1001C 044E311C image_sample_l v[22:24], v[28:31], s[56:63], s[76:79] dmask:0x7 ; F0900700 026E161C v_mad_f32 v55, -v54, v31, v31 ; D1C10037 247E3F36 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v55, v54, v22 ; 2C6E2D36 ds_bpermute_b32 v19, v46, v49 ; D87E0000 1300312E ds_bpermute_b32 v22, v46, v49 offset:4 ; D87E0004 1600312E v_mad_f32 v56, -v54, v32, v32 ; D1C10038 24824136 v_mac_f32_e32 v56, v54, v23 ; 2C702F36 ds_bpermute_b32 v23, v46, v53 offset:4 ; D87E0004 1700352E v_mad_f32 v57, -v54, v33, v33 ; D1C10039 24864336 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v19, v19, v22 ; 06262D13 ds_bpermute_b32 v22, v46, v53 ; D87E0000 1600352E v_add_f32_e32 v21, s42, v8 ; 022A102A v_mad_f32 v7, s43, v47, v17 ; D1C10007 04465E2B v_mad_f32 v8, s42, v47, v18 ; D1C10008 044A5E2A image_sample_l v[37:39], v[28:31], s[64:71], s[72:75] dmask:0xb ; F0900B00 0250251C v_mac_f32_e32 v57, v54, v24 ; 2C723136 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v28, v22, v23 ; 06382F16 image_sample_l v[22:25], v[7:10], s[48:55], s[44:47] dmask:0xf ; F0900F00 016C1607 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v30, s83, v24 ; 0A3C3053 v_mul_f32_e32 v29, s88, v24 ; 0A3A3058 v_mul_f32_e32 v31, s80, v30 ; 0A3E3C50 v_mul_f32_e32 v29, s81, v29 ; 0A3A3A51 v_mul_f32_e32 v32, v31, v25 ; 0A40331F v_mul_f32_e32 v28, v28, v32 ; 0A38411C v_mul_f32_e32 v30, v29, v25 ; 0A3C331D v_mul_f32_e32 v19, v19, v30 ; 0A263D13 v_mul_f32_e32 v28, v28, v28 ; 0A38391C v_mac_f32_e32 v28, v19, v19 ; 2C382713 v_mul_f32_e32 v19, s41, v49 ; 0A266229 ds_bpermute_b32 v47, v46, v19 ; D87E0000 2F00132E ds_bpermute_b32 v19, v46, v19 offset:8 ; D87E0008 1300132E v_mul_f32_e32 v33, s41, v53 ; 0A426A29 v_cmp_eq_f32_e64 s[12:13], 0, v29 ; D042000C 00023A80 v_add_f32_e32 v7, s43, v7 ; 020E0E2B v_add_f32_e32 v8, s42, v8 ; 0210102A s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v19, v47, v19 ; 0626272F ds_bpermute_b32 v47, v46, v33 ; D87E0000 2F00212E ds_bpermute_b32 v33, v46, v33 offset:8 ; D87E0008 2100212E v_mul_f32_e32 v19, v30, v19 ; 0A26271E s_buffer_load_dword s40, s[28:31], 0x520 ; C0220A0E 00000520 v_cmp_ngt_f32_e64 s[0:1], |v52|, s18 ; D04B0100 00002534 s_load_dwordx4 s[36:39], s[4:5], 0x30 ; C00A0902 00000030 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v33, v47, v33 ; 0642432F v_mul_f32_e32 v30, v32, v33 ; 0A3C4320 v_mul_f32_e32 v30, v30, v30 ; 0A3C3D1E v_mac_f32_e32 v30, v19, v19 ; 2C3C2713 v_max_f32_e32 v19, v30, v28 ; 1626391E v_cmp_neq_f32_e64 s[8:9], 0, v19 ; D04D0008 00022680 v_rsq_f32_e64 v19, |v19| ; D1640113 00000113 v_rcp_f32_e32 v28, v29 ; 7E38451D v_mul_f32_e32 v33, v25, v49 ; 0A426319 v_floor_f32_e32 v33, v33 ; 7E423F21 v_cndmask_b32_e64 v19, v11, v19, s[8:9] ; D1000013 0022270B v_cmp_eq_f32_e64 s[8:9], 0, v19 ; D0420008 00022680 v_rcp_f32_e32 v19, v19 ; 7E264513 v_mul_f32_e32 v28, 0.5, v28 ; 0A3838F0 v_cndmask_b32_e64 v28, v28, v48, s[12:13] ; D100001C 0032611C v_mad_f32 v33, v49, v25, -v33 ; D1C10021 84863331 v_cndmask_b32_e64 v19, v19, v11, s[8:9] ; D1000013 00221713 v_cmp_eq_f32_e64 s[8:9], 0, v19 ; D0420008 00022680 v_log_f32_e64 v19, |v19| ; D1610113 00000113 v_mul_f32_e32 v29, s81, v24 ; 0A3A3051 v_add_f32_e32 v0, v0, v0 ; 02000100 v_mul_f32_e32 v1, -2.0, v1 ; 0A0202F5 v_mul_f32_e32 v19, v50, v19 ; 0A262732 v_cndmask_b32_e64 v19, v19, v51, s[8:9] ; D1000013 00226713 v_min_f32_e32 v19, s82, v19 ; 14262652 v_max_f32_e32 v30, 0, v19 ; 163C2680 v_floor_f32_e64 v19, -v30 ; D15F0013 2000011E v_sub_f32_e64 v19, -v30, v19 ; D1020013 2002271E v_add_f32_e32 v19, v19, v30 ; 02263D13 v_exp_f32_e32 v19, v19 ; 7E264113 v_mul_f32_e32 v32, v19, v28 ; 0A403913 v_mad_f32 v28, -v28, v19, 1.0 ; D1C1001C 23CA271C v_max_f32_e32 v32, v32, v33 ; 16404320 v_min_f32_e32 v28, v32, v28 ; 14383920 v_mad_f32 v28, v29, v28, v22 ; D1C1001C 045A391D v_sub_f32_e32 v22, 1.0, v15 ; 042C1EF2 v_cmp_gt_f32_e64 s[8:9], |v22|, s18 ; D0440108 00002516 v_sub_f32_e32 v15, 2.0, v15 ; 041E1EF4 v_cndmask_b32_e64 v22, v45, v41, s[8:9] ; D1000016 0022532D v_cmp_gt_f32_e64 s[12:13], |v15|, s18 ; D044010C 0000250F v_cndmask_b32_e64 v32, v44, v22, s[12:13] ; D1000020 00322D2C v_cndmask_b32_e64 v22, v42, v43, s[8:9] ; D1000016 0022572A v_cndmask_b32_e64 v47, v42, v22, s[12:13] ; D100002F 00322D2A v_add_f32_e32 v22, v40, v40 ; 022C5128 v_mac_f32_e32 v17, s43, v22 ; 2C222C2B v_mac_f32_e32 v18, s42, v22 ; 2C242C2A ds_bpermute_b32 v22, v46, v32 ; D87E0000 1600202E ds_bpermute_b32 v29, v46, v32 offset:4 ; D87E0004 1D00202E v_cmp_eq_f32_e64 s[12:13], 0, v31 ; D042000C 00023E80 s_buffer_load_dword s20, s[28:31], 0xc0 ; C022050E 000000C0 s_buffer_load_dword s34, s[28:31], 0xc4 ; C022088E 000000C4 s_buffer_load_dword s6, s[28:31], 0xe0 ; C022018E 000000E0 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v33, v22, v29 ; 06423B16 ds_bpermute_b32 v22, v46, v47 ; D87E0000 16002F2E ds_bpermute_b32 v29, v46, v47 offset:4 ; D87E0004 1D002F2E s_buffer_load_dword s35, s[28:31], 0xc8 ; C02208CE 000000C8 s_buffer_load_dword s7, s[28:31], 0xd0 ; C02201CE 000000D0 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v49, v22, v29 ; 06623B16 v_rcp_f32_e32 v22, v31 ; 7E2C451F v_mul_f32_e32 v22, 0.5, v22 ; 0A2C2CF0 v_cndmask_b32_e64 v22, v22, v48, s[12:13] ; D1000016 00326116 v_mul_f32_e32 v29, v19, v22 ; 0A3A2D13 v_mad_f32 v19, -v22, v19, 1.0 ; D1C10013 23CA2716 v_mul_f32_e32 v22, v25, v53 ; 0A2C6B19 v_floor_f32_e32 v22, v22 ; 7E2C3F16 v_mad_f32 v22, v53, v25, -v22 ; D1C10016 845A3335 v_max_f32_e32 v22, v29, v22 ; 162C2D1D v_min_f32_e32 v19, v22, v19 ; 14262716 v_mul_f32_e32 v22, s80, v24 ; 0A2C3050 v_mad_f32 v29, v22, v19, v23 ; D1C1001D 045E2716 v_mul_f32_e32 v19, s41, v32 ; 0A264029 ds_bpermute_b32 v23, v46, v19 ; D87E0000 1700132E ds_bpermute_b32 v19, v46, v19 offset:8 ; D87E0008 1300132E v_mul_f32_e32 v22, s41, v47 ; 0A2C5E29 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v31, v23, v19 ; 063E2717 ds_bpermute_b32 v19, v46, v22 ; D87E0000 1300162E ds_bpermute_b32 v22, v46, v22 offset:8 ; D87E0008 1600162E image_sample_l v[40:42], v[28:31], s[56:63], s[76:79] dmask:0x7 ; F0900700 026E281C s_nop 0 ; BF800000 image_sample_l v[43:45], v[28:31], s[64:71], s[72:75] dmask:0xb ; F0900B00 02502B1C v_add_f32_e32 v29, s43, v17 ; 023A222B v_add_f32_e32 v30, s42, v18 ; 023C242A s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v46, v19, v22 ; 065C2D13 v_mov_b32_e32 v19, v9 ; 7E260309 image_sample_l v[22:25], v[17:20], s[48:55], s[44:47] dmask:0xf ; F0900F00 016C1611 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v17, s88, v24 ; 0A223058 v_mul_f32_e32 v18, s83, v24 ; 0A243053 v_mul_f32_e32 v17, s81, v17 ; 0A222251 v_mul_f32_e32 v18, s80, v18 ; 0A242450 v_rcp_f32_e32 v19, v17 ; 7E264511 v_rcp_f32_e32 v28, v18 ; 7E384512 v_cmp_eq_f32_e64 s[12:13], 0, v17 ; D042000C 00022280 v_cmp_eq_f32_e64 s[14:15], 0, v18 ; D042000E 00022480 v_mul_f32_e32 v17, v17, v25 ; 0A223311 v_mul_f32_e32 v18, v18, v25 ; 0A243312 v_mul_f32_e32 v33, v17, v33 ; 0A424311 v_mul_f32_e32 v17, v17, v31 ; 0A223F11 v_mul_f32_e32 v31, v18, v49 ; 0A3E6312 v_mul_f32_e32 v18, v18, v46 ; 0A245D12 v_mul_f32_e32 v18, v18, v18 ; 0A242512 v_mac_f32_e32 v18, v17, v17 ; 2C242311 v_mul_f32_e32 v17, v31, v31 ; 0A223F1F v_mac_f32_e32 v17, v33, v33 ; 2C224321 v_max_f32_e32 v17, v18, v17 ; 16222312 v_rsq_f32_e64 v18, |v17| ; D1640112 00000111 v_mul_f32_e32 v19, 0.5, v19 ; 0A2626F0 v_cndmask_b32_e64 v19, v19, v48, s[12:13] ; D1000013 00326113 v_cmp_eq_f32_e64 s[12:13], 0, v17 ; D042000C 00022280 v_cndmask_b32_e64 v17, v18, v11, s[12:13] ; D1000011 00321712 v_rcp_f32_e32 v18, v17 ; 7E244511 v_cmp_eq_f32_e64 s[12:13], 0, v17 ; D042000C 00022280 v_mul_f32_e32 v28, 0.5, v28 ; 0A3838F0 v_cndmask_b32_e64 v28, v28, v48, s[14:15] ; D100001C 003A611C v_cndmask_b32_e64 v17, v18, v11, s[12:13] ; D1000011 00321712 v_log_f32_e64 v18, |v17| ; D1610112 00000111 v_cmp_eq_f32_e64 s[12:13], 0, v17 ; D042000C 00022280 v_mul_f32_e32 v17, v50, v18 ; 0A222532 v_cndmask_b32_e64 v17, v17, v51, s[12:13] ; D1000011 00326711 v_min_f32_e32 v17, s82, v17 ; 14222252 v_max_f32_e32 v33, 0, v17 ; 16422280 v_floor_f32_e64 v17, -v33 ; D15F0011 20000121 v_sub_f32_e64 v17, -v33, v17 ; D1020011 20022321 v_add_f32_e32 v17, v17, v33 ; 02224311 v_exp_f32_e32 v17, v17 ; 7E224111 v_mul_f32_e32 v18, v17, v19 ; 0A242711 v_mad_f32 v19, -v19, v17, 1.0 ; D1C10013 23CA2313 v_mul_f32_e32 v31, v17, v28 ; 0A3E3911 v_mad_f32 v17, -v28, v17, 1.0 ; D1C10011 23CA231C v_mul_f32_e32 v28, v25, v32 ; 0A384119 v_floor_f32_e32 v28, v28 ; 7E383F1C v_mad_f32 v28, v32, v25, -v28 ; D1C1001C 84723320 v_mul_f32_e32 v32, v25, v47 ; 0A405F19 v_floor_f32_e32 v32, v32 ; 7E403F20 v_mad_f32 v25, v47, v25, -v32 ; D1C10019 8482332F v_max_f32_e32 v18, v18, v28 ; 16243912 v_max_f32_e32 v25, v31, v25 ; 1632331F v_min_f32_e32 v18, v18, v19 ; 14242712 v_mul_f32_e32 v19, s81, v24 ; 0A263051 v_min_f32_e32 v17, v25, v17 ; 14222319 v_mul_f32_e32 v24, s80, v24 ; 0A303050 v_mad_f32 v31, v19, v18, v22 ; D1C1001F 045A2513 v_mad_f32 v32, v24, v17, v23 ; D1C10020 045E2318 image_sample_l v[17:19], v[31:34], s[56:63], s[76:79] dmask:0x7 ; F0900700 026E111F s_nop 0 ; BF800000 image_sample_l v[22:24], v[31:34], s[64:71], s[72:75] dmask:0xb ; F0900B00 0250161F v_mad_f32 v32, -v54, v44, v44 ; D1C10020 24B25936 v_mad_f32 v25, -v54, v43, v43 ; D1C10019 24AE5736 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v32, v54, v23 ; 2C402F36 v_mad_f32 v23, -v54, v45, v45 ; D1C10017 24B65B36 v_mad_f32 v33, -v54, v41, v41 ; D1C10021 24A65336 v_mac_f32_e32 v25, v54, v22 ; 2C322D36 v_mac_f32_e32 v23, v54, v24 ; 2C2E3136 v_mad_f32 v24, -v54, v40, v40 ; D1C10018 24A25136 v_mad_f32 v40, -v54, v42, v42 ; D1C10028 24AA5536 v_mov_b32_e32 v28, v9 ; 7E380309 v_mov_b32_e32 v22, v9 ; 7E2C0309 v_mac_f32_e32 v24, v54, v17 ; 2C302336 v_mac_f32_e32 v33, v54, v18 ; 2C422536 image_sample_l v[17:18], v[26:29], s[48:55], s[44:47] dmask:0x3 ; F0900300 016C111A v_mac_f32_e32 v40, v54, v19 ; 2C502736 image_sample_l v[19:20], v[20:23], s[48:55], s[44:47] dmask:0x3 ; F0900300 016C1314 s_nop 0 ; BF800000 image_sample_l v[7:8], v[7:10], s[48:55], s[44:47] dmask:0x3 ; F0900300 016C0707 v_mov_b32_e32 v31, v9 ; 7E3E0309 s_waitcnt vmcnt(2) ; BF8C0F72 v_mad_f32 v27, -v54, v17, v17 ; D1C1001B 24462336 v_mad_f32 v28, -v54, v18, v18 ; D1C1001C 244A2536 v_cmp_ngt_f32_e64 s[12:13], |v12|, s18 ; D04B010C 0000250C image_sample_l v[17:18], v[29:32], s[48:55], s[44:47] dmask:0x3 ; F0900300 016C111D s_waitcnt vmcnt(1) ; BF8C0F71 v_mad_f32 v7, -v54, v7, v7 ; D1C10007 241E0F36 v_cndmask_b32_e64 v9, v14, 1.0, s[10:11] ; D1000009 0029E50E v_mov_b32_e32 v12, 0x3cf5c28f ; 7E1802FF 3CF5C28F v_mad_f32 v8, -v54, v8, v8 ; D1C10008 24221136 s_waitcnt vmcnt(0) ; BF8C0F70 v_mac_f32_e32 v7, v54, v17 ; 2C0E2336 v_cndmask_b32_e64 v17, v14, 1.0, s[2:3] ; D1000011 0009E50E v_cndmask_b32_e64 v9, v9, v12, s[12:13] ; D1000009 00321909 v_mac_f32_e32 v8, v54, v18 ; 2C102536 v_mad_f32 v18, -v54, v9, v9 ; D1C10012 24261336 v_cndmask_b32_e32 v17, v12, v17, vcc ; 0022230C v_mov_b32_e32 v9, 0x3ca3d70a ; 7E1202FF 3CA3D70A v_mac_f32_e32 v18, v54, v17 ; 2C242336 v_cndmask_b32_e64 v17, v9, 0, s[10:11] ; D1000011 00290109 v_cndmask_b32_e64 v17, v17, v13, s[12:13] ; D1000011 00321B11 v_mac_f32_e32 v27, v54, v19 ; 2C362736 v_cndmask_b32_e64 v19, v9, 0, s[2:3] ; D1000013 00090109 v_cndmask_b32_e32 v19, v13, v19, vcc ; 0026270D v_mad_f32 v17, -v54, v17, v17 ; D1C10011 24462336 v_mac_f32_e32 v17, v54, v19 ; 2C222736 v_mov_b32_e32 v19, 0x3c449ba6 ; 7E2602FF 3C449BA6 v_cndmask_b32_e64 v29, v19, 0, vcc ; D100001D 01A90113 v_cmp_ngt_f32_e64 vcc, |v15|, s18 ; D04B016A 0000250F v_cndmask_b32_e64 v15, v14, 1.0, s[32:33] ; D100000F 0081E50E v_cndmask_b32_e64 v14, v14, 1.0, s[8:9] ; D100000E 0021E50E v_cndmask_b32_e32 v14, v14, v12, vcc ; 001C190E v_cndmask_b32_e64 v12, v15, v12, s[0:1] ; D100000C 0002190F v_cndmask_b32_e64 v15, v9, 0, s[32:33] ; D100000F 00810109 v_cndmask_b32_e64 v9, v9, 0, s[8:9] ; D1000009 00210109 v_mad_f32 v12, -v54, v12, v12 ; D1C1000C 24321936 v_cndmask_b32_e32 v9, v9, v13, vcc ; 00121B09 v_cndmask_b32_e64 v13, v15, v13, s[0:1] ; D100000D 00021B0F v_mad_f32 v13, -v54, v13, v13 ; D1C1000D 24361B36 v_mac_f32_e32 v12, v54, v14 ; 2C181D36 v_cndmask_b32_e64 v14, 0, v19, s[0:1] ; D100000E 00022680 v_mac_f32_e32 v13, v54, v9 ; 2C1A1336 v_cndmask_b32_e32 v9, 0, v19, vcc ; 00122680 v_mad_f32 v14, -v54, v14, v14 ; D1C1000E 243A1D36 v_mac_f32_e32 v14, v54, v9 ; 2C1C1336 v_mul_f32_e32 v9, s40, v4 ; 0A120828 v_floor_f32_e32 v9, v9 ; 7E123F09 v_mad_f32 v15, s40, v4, -v9 ; D1C1000F 84260828 v_mad_f32 v9, -v15, v55, v55 ; D1C10009 24DE6F0F v_mac_f32_e32 v9, v15, v24 ; 2C12310F v_mac_f32_e32 v28, v54, v20 ; 2C382936 v_cndmask_b32_e64 v20, 0, v19, s[12:13] ; D1000014 00322680 v_mad_f32 v19, -v15, v56, v56 ; D1C10013 24E2710F v_mac_f32_e32 v19, v15, v33 ; 2C26430F v_mul_f32_e32 v9, 0x3e4ccccd, v9 ; 0A1212FF 3E4CCCCD s_load_dwordx8 s[8:15], s[4:5], 0x0 ; C00E0202 00000000 v_mac_f32_e32 v9, 0x3f333333, v19 ; 2C1226FF 3F333333 v_mad_f32 v19, -v15, v57, v57 ; D1C10013 24E6730F v_mac_f32_e32 v19, v15, v40 ; 2C26510F v_mac_f32_e32 v9, 0x3dcccccd, v19 ; 2C1226FF 3DCCCCCD v_mad_f32 v19, -v15, v28, v28 ; D1C10013 2472390F v_sub_f32_e32 v28, 1.0, v15 ; 04381EF2 v_mac_f32_e32 v19, v15, v8 ; 2C26110F v_mad_f32 v8, -v27, v28, v9 ; D1C10008 2426391B v_mad_f32 v27, -v7, v15, v8 ; D1C1001B 24221F07 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:9], v[4:5], s[8:15], s[36:39] dmask:0x7 ; F0800700 01220704 v_mad_f32 v4, -v15, v18, v18 ; D1C10004 244A250F s_waitcnt vmcnt(0) ; BF8C0F70 v_mad_f32 v5, v28, v55, v7 ; D1C10005 041E6F1C v_mad_f32 v7, v28, v56, v8 ; D1C10007 0422711C v_mac_f32_e32 v4, v15, v12 ; 2C08190F v_mad_f32 v8, -v15, v17, v17 ; D1C10008 2446230F v_mac_f32_e32 v8, v15, v13 ; 2C101B0F v_mad_f32 v4, v0, v4, -v4 ; D1C10004 84120900 v_mac_f32_e32 v4, v8, v6 ; 2C080D08 v_mad_f32 v8, -v10, v10, 1.0 ; D1C10008 23CA150A v_mad_f32 v8, -v6, v6, v8 ; D1C10008 24220D06 v_cmp_eq_f32_e32 vcc, 0, v8 ; 7C841080 v_rsq_f32_e64 v8, |v8| ; D1640108 00000108 v_mad_f32 v20, -v54, v20, v20 ; D1C10014 24522936 v_mac_f32_e32 v20, v54, v29 ; 2C283B36 v_mac_f32_e32 v5, v15, v24 ; 2C0A310F v_cndmask_b32_e32 v8, v8, v11, vcc ; 00101708 v_cmp_eq_f32_e32 vcc, 0, v8 ; 7C841080 v_rcp_f32_e32 v8, v8 ; 7E104508 v_mad_f32 v24, -v15, v20, v20 ; D1C10018 2452290F v_mac_f32_e32 v24, v15, v14 ; 2C301D0F v_mad_f32 v21, -v54, v34, v34 ; D1C10015 248A4536 v_cndmask_b32_e32 v8, v8, v11, vcc ; 00101708 v_mac_f32_e32 v4, v24, v8 ; 2C081118 v_mad_f32 v22, -v54, v35, v35 ; D1C10016 248E4736 v_mad_f32 v18, v18, v28, -v4 ; D1C10012 84123912 v_mac_f32_e32 v18, v15, v12 ; 2C24190F v_mad_f32 v12, v17, v28, -v4 ; D1C1000C 84123911 v_mad_f32 v4, v20, v28, -v4 ; D1C10004 84123914 v_mac_f32_e32 v22, v54, v38 ; 2C2C4D36 v_mac_f32_e32 v21, v54, v37 ; 2C2A4B36 v_mad_f32 v26, -v54, v36, v36 ; D1C1001A 24924936 v_mad_f32 v17, -v15, v22, v22 ; D1C10011 245A2D0F v_mac_f32_e32 v4, v15, v14 ; 2C081D0F v_mad_f32 v14, -v15, v21, v21 ; D1C1000E 24562B0F v_mac_f32_e32 v14, v15, v25 ; 2C1C330F v_mac_f32_e32 v17, v15, v32 ; 2C22410F v_mac_f32_e32 v26, v54, v39 ; 2C344F36 v_mac_f32_e32 v9, v28, v57 ; 2C12731C v_mac_f32_e32 v14, v28, v21 ; 2C1C2B1C v_mad_f32 v20, v28, v22, v17 ; D1C10014 04462D1C v_mac_f32_e32 v12, v15, v13 ; 2C181B0F v_mad_f32 v13, -v15, v26, v26 ; D1C1000D 246A350F v_mac_f32_e32 v7, v15, v33 ; 2C0E430F v_mac_f32_e32 v9, v15, v40 ; 2C12510F v_mac_f32_e32 v13, v15, v23 ; 2C1A2F0F v_mac_f32_e32 v14, v15, v25 ; 2C1C330F v_mac_f32_e32 v20, v15, v32 ; 2C28410F v_mul_f32_e32 v15, v27, v19 ; 0A1E271B v_mul_f32_e32 v19, v18, v18 ; 0A262512 v_mac_f32_e32 v19, v12, v12 ; 2C26190C v_mac_f32_e32 v19, v4, v4 ; 2C260904 v_sqrt_f32_e32 v21, v19 ; 7E2A4F13 v_rsq_f32_e32 v19, v19 ; 7E264913 v_cmp_neq_f32_e32 vcc, 0, v21 ; 7C9A2A80 s_buffer_load_dword s8, s[28:31], 0xe4 ; C022020E 000000E4 s_load_dwordx4 s[0:3], s[4:5], 0x170 ; C00A0002 00000170 v_cndmask_b32_e32 v19, v11, v19, vcc ; 0026270B v_mul_f32_e32 v12, v19, v12 ; 0A181913 v_mul_f32_e32 v18, v19, v18 ; 0A242513 v_mul_f32_e32 v19, v19, v4 ; 0A260913 v_mad_f32 v4, 2.0, v13, -1.0 ; D1C10004 03CE1AF4 v_mad_f32 v4, v14, v4, -v4 ; D1C10004 8412090E v_mad_f32 v13, 2.0, v17, -1.0 ; D1C1000D 03CE22F4 v_mad_f32 v14, -v4, v4, 1.0 ; D1C1000E 23CA0904 v_mad_f32 v14, -v13, v13, v14 ; D1C1000E 243A1B0D v_cmp_eq_f32_e32 vcc, 0, v14 ; 7C841C80 v_rsq_f32_e64 v14, |v14| ; D164010E 0000010E v_mov_b32_e32 v17, 0x7ee66665 ; 7E2202FF 7EE66665 v_mad_f32 v1, v19, v1, v19 ; D1C10001 044E0313 v_mad_f32 v1, v12, v8, -v1 ; D1C10001 8406110C v_cndmask_b32_e32 v14, v14, v11, vcc ; 001C170E v_cmp_eq_f32_e32 vcc, 0, v14 ; 7C841C80 v_rcp_f32_e32 v14, v14 ; 7E1C450E v_mad_f32 v0, v0, v12, -v12 ; D1C10000 84321900 v_mad_f32 v0, v18, v6, -v0 ; D1C10000 84020D12 s_buffer_load_dword s9, s[28:31], 0xe8 ; C022024E 000000E8 v_mul_f32_e32 v14, 0x3ee66666, v14 ; 0A1C1CFF 3EE66666 v_cndmask_b32_e32 v14, v14, v17, vcc ; 001C230E v_mul_f32_e32 v17, v4, v4 ; 0A220904 v_mac_f32_e32 v17, v13, v13 ; 2C221B0D v_mac_f32_e32 v17, v14, v14 ; 2C221D0E v_sqrt_f32_e32 v13, v17 ; 7E1A4F11 v_cmp_neq_f32_e32 vcc, 0, v13 ; 7C9A1A80 v_rsq_f32_e32 v13, v17 ; 7E1A4911 s_buffer_load_dword s4, s[28:31], 0xd4 ; C022010E 000000D4 s_buffer_load_dword s10, s[28:31], 0xd8 ; C022028E 000000D8 v_cndmask_b32_e32 v13, v11, v13, vcc ; 001A1B0B v_mul_f32_e32 v17, v13, v4 ; 0A22090D v_mad_f32 v4, v20, v13, -v13 ; D1C10004 84361B14 v_mul_f32_e32 v13, v13, v14 ; 0A1A1D0D v_mul_f32_e32 v14, v18, v8 ; 0A1C1112 v_mul_f32_e32 v20, v4, v1 ; 0A280304 v_mad_f32 v14, v19, v10, -v14 ; D1C1000E 843A1513 v_mul_f32_e32 v14, v4, v14 ; 0A1C1D04 v_mac_f32_e32 v20, v18, v17 ; 2C282312 v_mul_f32_e32 v21, v4, v0 ; 0A2A0104 v_mac_f32_e32 v14, v12, v17 ; 2C1C230C v_mac_f32_e32 v20, v10, v13 ; 2C281B0A v_mac_f32_e32 v21, v19, v17 ; 2C2A2313 v_mac_f32_e32 v14, v6, v13 ; 2C1C1B06 v_mul_f32_e32 v6, s20, v20 ; 0A0C2814 v_add_f32_e32 v1, -0.5, v7 ; 02020EF1 v_mul_f32_e32 v7, s6, v20 ; 0A0E2806 v_mac_f32_e32 v21, v8, v13 ; 2C2A1B08 v_mac_f32_e32 v6, s34, v14 ; 2C0C1C22 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s8, v14 ; 2C0E1C08 v_mul_f32_e32 v8, s7, v20 ; 0A102807 v_mac_f32_e32 v6, s35, v21 ; 2C0C2A23 v_mac_f32_e32 v8, s4, v14 ; 2C101C04 v_mac_f32_e32 v7, s9, v21 ; 2C0E2A09 v_mul_f32_e32 v10, v6, v6 ; 0A140D06 v_mac_f32_e32 v8, s10, v21 ; 2C102A0A v_mac_f32_e32 v10, v7, v7 ; 2C140F07 v_mac_f32_e32 v10, v8, v8 ; 2C141108 v_rsq_f32_e32 v12, v10 ; 7E18490A v_add_f32_e32 v0, -0.5, v5 ; 02000AF1 v_add_f32_e32 v5, -0.5, v9 ; 020A12F1 v_sqrt_f32_e32 v9, v10 ; 7E124F0A v_cmp_neq_f32_e32 vcc, 0, v9 ; 7C9A1280 v_cndmask_b32_e32 v9, v11, v12, vcc ; 0012190B v_mul_f32_e32 v6, v9, v6 ; 0A0C0D09 v_mul_f32_e32 v7, v9, v7 ; 0A0E0F09 v_mul_f32_e32 v8, v9, v8 ; 0A101109 v_max3_f32 v13, |v8|, |v6|, |v7| ; D1D3070D 041E0D08 v_rcp_f32_e32 v14, v13 ; 7E1C450D v_bfrev_b32_e32 v9, -2 ; 7E1258C2 v_sub_f32_e64 v17, |v7|, v13 ; D1020111 00021B07 v_cmp_eq_f32_e32 vcc, 0, v13 ; 7C841A80 v_add_f32_e64 v4, v15, 0 clamp ; D1018004 0001010F v_sub_f32_e64 v15, |v8|, v13 ; D102010F 00021B08 v_cndmask_b32_e32 v13, v14, v11, vcc ; 001A170E v_and_b32_e32 v10, v9, v6 ; 26140D09 v_and_b32_e32 v12, v9, v7 ; 26180F09 v_cmp_ngt_f32_e32 vcc, 0, v17 ; 7C962280 v_and_b32_e32 v9, v9, v8 ; 26121109 v_mul_f32_e32 v14, v13, v6 ; 0A1C0D0D v_cndmask_b32_e32 v6, v12, v10, vcc ; 000C150C v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C881E80 v_mul_f32_e32 v18, v13, v7 ; 0A240F0D v_cndmask_b32_e32 v7, v10, v6, vcc ; 000E0D0A v_cndmask_b32_e32 v9, v12, v9, vcc ; 0012130C v_subrev_f32_e32 v6, v9, v7 ; 060C0F09 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C880C80 v_cndmask_b32_e32 v6, v7, v9, vcc ; 000C1307 v_rcp_f32_e32 v10, v6 ; 7E144506 v_readlane_b32 s4, v58, 0 ; D2890004 0001013A v_cndmask_b32_e32 v7, v9, v7, vcc ; 000E0F09 v_cmp_eq_f32_e32 vcc, 0, v6 ; 7C840C80 v_cndmask_b32_e32 v9, v10, v11, vcc ; 0012170A v_readlane_b32 s5, v58, 1 ; D2890005 0001033A v_mul_f32_e32 v8, v13, v8 ; 0A10110D v_mul_f32_e32 v7, v9, v7 ; 0A0E0F09 s_and_b64 exec, exec, s[4:5] ; 86FE047E v_readlane_b32 s4, v58, 2 ; D2890004 0001053A v_readlane_b32 s5, v58, 3 ; D2890005 0001073A v_readlane_b32 s6, v58, 4 ; D2890006 0001093A v_readlane_b32 s7, v58, 5 ; D2890007 00010B3A v_readlane_b32 s8, v58, 6 ; D2890008 00010D3A v_readlane_b32 s9, v58, 7 ; D2890009 00010F3A v_readlane_b32 s10, v58, 8 ; D289000A 0001113A v_readlane_b32 s11, v58, 9 ; D289000B 0001133A s_mov_b32 m0, s19 ; BEFC0013 v_mov_b32_e32 v11, 0x3ecccccd ; 7E1602FF 3ECCCCCD v_mov_b32_e32 v32, v16 ; 7E400310 s_nop 1 ; BF800001 image_sample v6, v[6:7], s[4:11], s[0:3] dmask:0x1 ; F0800100 00010606 v_interp_p1_f32 v12, v2, 2, 0, [m0] ; D4300202 v_interp_p2_f32 v12, [v12], v3, 2, 0, [m0] ; D4310203 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v9, v6, v18 ; 0A122506 v_mul_f32_e32 v7, v6, v14 ; 0A0E1D06 v_mul_f32_e32 v6, v6, v8 ; 0A0C1106 v_mad_f32 v8, v7, 0.5, 0.5 ; D1C10008 03C1E107 v_mad_f32 v10, v6, 0.5, 0.5 ; D1C1000A 03C1E106 v_mov_b32_e32 v2, v5 ; 7E040305 v_mad_f32 v9, v9, 0.5, 0.5 ; D1C10009 03C1E109 v_mov_b32_e32 v3, 0 ; 7E060280 v_mov_b32_e32 v7, 0 ; 7E0E0280 s_mov_b32 s10, s22 ; BE8A0016 v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v13, v12 ; 7E1A030C v_mov_b32_e32 v14, v12 ; 7E1C030C v_mov_b32_e32 v15, v12 ; 7E1E030C Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 v_cvt_pkrtz_f16_f32_e64 v2, v4, v5 ; D2960002 00020B04 v_cvt_pkrtz_f16_f32_e64 v3, v6, v7 ; D2960003 00020F06 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; C400040F 00000100 v_cvt_pkrtz_f16_f32_e64 v4, v8, v9 ; D2960004 00021308 v_cvt_pkrtz_f16_f32_e64 v5, v10, v11 ; D2960005 0002170A exp 15, 1, 1, 0, 0, v2, v3, v0, v0 ; C400041F 00000302 v_cvt_pkrtz_f16_f32_e64 v6, v12, v13 ; D2960006 00021B0C v_cvt_pkrtz_f16_f32_e64 v7, v14, v15 ; D2960007 00021F0E exp 15, 2, 1, 0, 0, v4, v5, v0, v0 ; C400042F 00000504 v_cvt_pkrtz_f16_f32_e64 v8, v16, v17 ; D2960008 00022310 v_cvt_pkrtz_f16_f32_e64 v9, v18, v19 ; D2960009 00022712 exp 15, 3, 1, 0, 0, v6, v7, v0, v0 ; C400043F 00000706 v_cvt_pkrtz_f16_f32_e64 v10, v20, v21 ; D296000A 00022B14 v_cvt_pkrtz_f16_f32_e64 v11, v22, v23 ; D296000B 00022F16 exp 15, 4, 1, 0, 0, v8, v9, v0, v0 ; C400044F 00000908 v_cvt_pkrtz_f16_f32_e64 v12, v24, v25 ; D296000C 00023318 v_cvt_pkrtz_f16_f32_e64 v13, v26, v27 ; D296000D 0002371A exp 15, 5, 1, 0, 0, v10, v11, v0, v0 ; C400045F 00000B0A v_cvt_pkrtz_f16_f32_e64 v14, v28, v29 ; D296000E 00023B1C v_cvt_pkrtz_f16_f32_e64 v15, v30, v31 ; D296000F 00023F1E exp 15, 6, 1, 0, 0, v12, v13, v0, v0 ; C400046F 00000D0C exp 15, 7, 1, 1, 1, v14, v15, v0, v0 ; C4001C7F 00000F0E s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 96 VGPRS: 60 Spilled SGPRs: 10 Spilled VGPRs: 0 Code Size: 4772 bytes LDS: 0 blocks Scratch: 3072 bytes per wave Max Waves: 4 ********************