299: MUL TEMP[3].x, CONST[5][13].xxxx, IN[0].xxxx 300: MAD TEMP[3].x, CONST[5][13].yyyy, IN[0].yyyy, TEMP[3].xxxx 301: MAD TEMP[3].x, CONST[5][13].zzzz, IN[0].zzzz, TEMP[3].xxxx 302: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][13].wwww 303: MOV TEMP[2].y, TEMP[3].xxxx 304: MUL TEMP[3].x, CONST[5][14].xxxx, IN[0].xxxx 305: MAD TEMP[3].x, CONST[5][14].yyyy, IN[0].yyyy, TEMP[3].xxxx 306: MAD TEMP[3].x, CONST[5][14].zzzz, IN[0].zzzz, TEMP[3].xxxx 307: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][14].wwww 308: MOV TEMP[2].z, TEMP[3].xxxx 309: MUL TEMP[3].x, CONST[5][15].xxxx, IN[0].xxxx 310: MAD TEMP[3].x, CONST[5][15].yyyy, IN[0].yyyy, TEMP[3].xxxx 311: MAD TEMP[3].x, CONST[5][15].zzzz, IN[0].zzzz, TEMP[3].xxxx 312: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][15].wwww 313: RCP TEMP[3].xyz, TEMP[3].xxxx 314: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 315: MOV_SAT TEMP[3].x, -TEMP[2].zzzz 316: MOV TEMP[4].x, -CONST[5][16].xxxx 317: MOV TEMP[5].x, TEMP[4].xxxx 318: MOV TEMP[5].y, CONST[5][16].xxxx 319: MOV TEMP[6].x, CONST[5][16].xxxx 320: MOV TEMP[6].y, TEMP[4].xxxx 321: ADD TEMP[10].xy, TEMP[2].xyyy, CONST[5][16].xxxx 322: MOV TEMP[10].xy, TEMP[10].xyyy 323: MOV TEMP[10].z, TEMP[3].xxxx 324: MOV TEMP[10].w, IMM[0].wwww 325: TXL TEMP[10].x, TEMP[10], SAMP[0], SHADOW2D 326: MOV TEMP[10].x, TEMP[10].xxxx 327: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[2].xyyy 328: MOV TEMP[5].xy, TEMP[5].xyyy 329: MOV TEMP[5].z, TEMP[3].xxxx 330: MOV TEMP[5].w, IMM[0].wwww 331: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 332: MOV TEMP[10].y, TEMP[5].xxxx 333: ADD TEMP[5].xy, TEMP[2].xyyy, TEMP[6].xyyy 334: MOV TEMP[5].xy, TEMP[5].xyyy 335: MOV TEMP[5].z, TEMP[3].xxxx 336: MOV TEMP[5].w, IMM[0].wwww 337: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 338: MOV TEMP[10].z, TEMP[5].xxxx 339: ADD TEMP[5].xy, TEMP[2].xyyy, TEMP[4].xxxx 340: MOV TEMP[5].xy, TEMP[5].xyyy 341: MOV TEMP[5].z, TEMP[3].xxxx 342: MOV TEMP[5].w, IMM[0].wwww 343: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 344: MOV TEMP[10].w, TEMP[5].xxxx 345: MOV TEMP[5].y, IMM[0].wwww 346: MOV TEMP[5].x, CONST[5][16].xxxx 347: MOV TEMP[6].y, IMM[0].wwww 348: MOV TEMP[6].x, TEMP[4].xxxx 349: MOV TEMP[11].x, IMM[0].wwww 350: MOV TEMP[11].y, TEMP[4].xxxx 351: MOV TEMP[4].x, IMM[0].wwww 352: MOV TEMP[4].y, CONST[5][16].xxxx 353: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[2].xyyy 354: MOV TEMP[5].xy, TEMP[5].xyyy 355: MOV TEMP[5].z, TEMP[3].xxxx 356: MOV TEMP[5].w, IMM[0].wwww 357: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 358: MOV TEMP[5].x, TEMP[5].xxxx 359: ADD TEMP[6].xy, TEMP[6].xyyy, TEMP[2].xyyy 360: MOV TEMP[6].xy, TEMP[6].xyyy 361: MOV TEMP[6].z, TEMP[3].xxxx 362: MOV TEMP[6].w, IMM[0].wwww 363: TXL TEMP[6].x, TEMP[6], SAMP[0], SHADOW2D 364: MOV TEMP[5].y, TEMP[6].xxxx 365: ADD TEMP[6].xy, TEMP[11].xyyy, TEMP[2].xyyy 366: MOV TEMP[6].xy, TEMP[6].xyyy 367: MOV TEMP[6].z, TEMP[3].xxxx 368: MOV TEMP[6].w, IMM[0].wwww 369: TXL TEMP[6].x, TEMP[6], SAMP[0], SHADOW2D 370: MOV TEMP[5].z, TEMP[6].xxxx 371: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[2].xyyy 372: MOV TEMP[4].xy, TEMP[4].xyyy 373: MOV TEMP[4].z, TEMP[3].xxxx 374: MOV TEMP[4].w, IMM[0].wwww 375: TXL TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D 376: MOV TEMP[5].w, TEMP[4].xxxx 377: MUL TEMP[4].xyz, CONST[5][4].wwww, CONST[5][5].xyzz 378: MUL TEMP[6].xyz, CONST[5][5].wwww, CONST[5][6].xyzz 379: MOV TEMP[2].xy, TEMP[2].xyyy 380: MOV TEMP[2].z, TEMP[3].xxxx 381: MOV TEMP[2].w, IMM[0].wwww 382: TXL TEMP[2].x, TEMP[2], SAMP[0], SHADOW2D 383: DP4 TEMP[3].x, TEMP[10], IMM[10].yyyy 384: DP4 TEMP[5].x, TEMP[5], IMM[10].zzzz 385: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx 386: MAD TEMP[2].x, TEMP[2].xxxx, IMM[10].xxxx, TEMP[3].xxxx 387: DP3 TEMP[3].x, -CONST[5][0].xyzz, TEMP[0].xyzz 388: MOV_SAT TEMP[3].x, TEMP[3].xxxx 389: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 390: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[1].xyzz 391: MUL TEMP[3].xyz, TEMP[3].xxxx, TEMP[0].xyzz 392: MUL TEMP[3].xyz, IMM[0].zzzz, TEMP[3].xyzz 393: ADD TEMP[1].xyz, TEMP[1].xyzz, -TEMP[3].xyzz 394: DP3 TEMP[1].x, CONST[5][2].xyzz, TEMP[1].xyzz 395: MOV_SAT TEMP[1].x, TEMP[1].xxxx 396: POW TEMP[1].x, TEMP[1].xxxx, CONST[5][2].wwww 397: MUL TEMP[1].x, TEMP[9].xxxx, TEMP[1].xxxx 398: MAD TEMP[1].xyz, TEMP[1].xxxx, CONST[5][3].xyzz, TEMP[7].xyzz 399: ADD TEMP[3].x, IMM[0].yyyy, -TEMP[2].xxxx 400: DP3 TEMP[5].x, TEMP[6].xyzz, IMM[12].xyzz 401: LRP TEMP[5].xyz, TEMP[8].xxxx, TEMP[5].xxxx, TEMP[6].xyzz 402: DP3 TEMP[0].x, CONST[5][4].xyzz, TEMP[0].xyzz 403: MOV_SAT TEMP[0].x, TEMP[0].xxxx 404: DP3 TEMP[6].x, TEMP[4].xyzz, IMM[12].xyzz 405: LRP TEMP[4].xyz, TEMP[8].xxxx, TEMP[6].xxxx, TEMP[4].xyzz 406: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][1].xyzz 407: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz, TEMP[2].xyzz 408: MAD TEMP[0].xyz, TEMP[3].xxxx, TEMP[5].xyzz, TEMP[0].xyzz 409: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 410: MOV TEMP[1].w, IMM[0].yyyy 411: MOV TEMP[1].x, IN[0].xxxx 412: MOV TEMP[1].y, IN[0].yyyy 413: MOV TEMP[1].z, IN[0].zzzz 414: MOV TEMP[2].x, TEMP[0].xxxx 415: MOV TEMP[2].y, TEMP[0].yyyy 416: MOV TEMP[2].z, TEMP[0].zzzz 417: DP4 TEMP[0].x, CONST[4][17], TEMP[1] 418: MUL TEMP[0].x, TEMP[0].xxxx, IMM[10].wwww 419: MOV TEMP[2].w, TEMP[0].xxxx 420: MOV OUT[1], IN[0].wwww 421: MOV OUT[0], TEMP[2] 422: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %51 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = call float @llvm.SI.load.const(<16 x i8> %52, i32 272) %54 = call float @llvm.SI.load.const(<16 x i8> %52, i32 276) %55 = call float @llvm.SI.load.const(<16 x i8> %52, i32 280) %56 = call float @llvm.SI.load.const(<16 x i8> %52, i32 284) %57 = call float @llvm.SI.load.const(<16 x i8> %52, i32 304) %58 = call float @llvm.SI.load.const(<16 x i8> %52, i32 308) %59 = call float @llvm.SI.load.const(<16 x i8> %52, i32 312) %60 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = call float @llvm.SI.load.const(<16 x i8> %61, i32 0) %63 = call float @llvm.SI.load.const(<16 x i8> %61, i32 4) %64 = call float @llvm.SI.load.const(<16 x i8> %61, i32 8) %65 = call float @llvm.SI.load.const(<16 x i8> %61, i32 16) %66 = call float @llvm.SI.load.const(<16 x i8> %61, i32 20) %67 = call float @llvm.SI.load.const(<16 x i8> %61, i32 24) %68 = call float @llvm.SI.load.const(<16 x i8> %61, i32 32) %69 = call float @llvm.SI.load.const(<16 x i8> %61, i32 36) %70 = call float @llvm.SI.load.const(<16 x i8> %61, i32 40) %71 = call float @llvm.SI.load.const(<16 x i8> %61, i32 44) %72 = call float @llvm.SI.load.const(<16 x i8> %61, i32 48) %73 = call float @llvm.SI.load.const(<16 x i8> %61, i32 52) %74 = call float @llvm.SI.load.const(<16 x i8> %61, i32 56) %75 = call float @llvm.SI.load.const(<16 x i8> %61, i32 64) %76 = call float @llvm.SI.load.const(<16 x i8> %61, i32 68) %77 = call float @llvm.SI.load.const(<16 x i8> %61, i32 72) %78 = call float @llvm.SI.load.const(<16 x i8> %61, i32 76) %79 = call float @llvm.SI.load.const(<16 x i8> %61, i32 80) %80 = call float @llvm.SI.load.const(<16 x i8> %61, i32 84) %81 = call float @llvm.SI.load.const(<16 x i8> %61, i32 88) %82 = call float @llvm.SI.load.const(<16 x i8> %61, i32 92) %83 = call float @llvm.SI.load.const(<16 x i8> %61, i32 96) %84 = call float @llvm.SI.load.const(<16 x i8> %61, i32 100) %85 = call float @llvm.SI.load.const(<16 x i8> %61, i32 104) %86 = call float @llvm.SI.load.const(<16 x i8> %61, i32 192) %87 = call float @llvm.SI.load.const(<16 x i8> %61, i32 196) %88 = call float @llvm.SI.load.const(<16 x i8> %61, i32 200) %89 = call float @llvm.SI.load.const(<16 x i8> %61, i32 204) %90 = call float @llvm.SI.load.const(<16 x i8> %61, i32 208) %91 = call float @llvm.SI.load.const(<16 x i8> %61, i32 212) %92 = call float @llvm.SI.load.const(<16 x i8> %61, i32 216) %93 = call float @llvm.SI.load.const(<16 x i8> %61, i32 220) %94 = call float @llvm.SI.load.const(<16 x i8> %61, i32 224) %95 = call float @llvm.SI.load.const(<16 x i8> %61, i32 228) %96 = call float @llvm.SI.load.const(<16 x i8> %61, i32 232) %97 = call float @llvm.SI.load.const(<16 x i8> %61, i32 236) %98 = call float @llvm.SI.load.const(<16 x i8> %61, i32 240) %99 = call float @llvm.SI.load.const(<16 x i8> %61, i32 244) %100 = call float @llvm.SI.load.const(<16 x i8> %61, i32 248) %101 = call float @llvm.SI.load.const(<16 x i8> %61, i32 252) %102 = call float @llvm.SI.load.const(<16 x i8> %61, i32 256) %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %104 = load <8 x i32>, <8 x i32> addrspace(2)* %103, align 32, !tbaa !0 %105 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %106 = load <4 x i32>, <4 x i32> addrspace(2)* %105, align 16, !tbaa !0 %107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %108 = load <8 x i32>, <8 x i32> addrspace(2)* %107, align 32, !tbaa !0 %109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %110 = load <4 x i32>, <4 x i32> addrspace(2)* %109, align 16, !tbaa !0 %111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %112 = load <8 x i32>, <8 x i32> addrspace(2)* %111, align 32, !tbaa !0 %113 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %114 = load <4 x i32>, <4 x i32> addrspace(2)* %113, align 16, !tbaa !0 %115 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %116 = load <8 x i32>, <8 x i32> addrspace(2)* %115, align 32, !tbaa !0 %117 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %118 = load <4 x i32>, <4 x i32> addrspace(2)* %117, align 16, !tbaa !0 %119 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %120 = load <8 x i32>, <8 x i32> addrspace(2)* %119, align 32, !tbaa !0 %121 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %122 = load <4 x i32>, <4 x i32> addrspace(2)* %121, align 16, !tbaa !0 %123 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %124 = load <8 x i32>, <8 x i32> addrspace(2)* %123, align 32, !tbaa !0 %125 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %126 = load <4 x i32>, <4 x i32> addrspace(2)* %125, align 16, !tbaa !0 %127 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %128 = load <8 x i32>, <8 x i32> addrspace(2)* %127, align 32, !tbaa !0 %129 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %130 = load <4 x i32>, <4 x i32> addrspace(2)* %129, align 16, !tbaa !0 %131 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %132 = load <8 x i32>, <8 x i32> addrspace(2)* %131, align 32, !tbaa !0 %133 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %134 = load <4 x i32>, <4 x i32> addrspace(2)* %133, align 16, !tbaa !0 %135 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %136 = load <8 x i32>, <8 x i32> addrspace(2)* %135, align 32, !tbaa !0 %137 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %138 = load <4 x i32>, <4 x i32> addrspace(2)* %137, align 16, !tbaa !0 %139 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %140 = load <8 x i32>, <8 x i32> addrspace(2)* %139, align 32, !tbaa !0 %141 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %142 = load <4 x i32>, <4 x i32> addrspace(2)* %141, align 16, !tbaa !0 %143 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %144 = load <8 x i32>, <8 x i32> addrspace(2)* %143, align 32, !tbaa !0 %145 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %146 = load <4 x i32>, <4 x i32> addrspace(2)* %145, align 16, !tbaa !0 %147 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 11 %148 = load <8 x i32>, <8 x i32> addrspace(2)* %147, align 32, !tbaa !0 %149 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 11 %150 = load <4 x i32>, <4 x i32> addrspace(2)* %149, align 16, !tbaa !0 %151 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 12 %152 = load <8 x i32>, <8 x i32> addrspace(2)* %151, align 32, !tbaa !0 %153 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 12 %154 = load <4 x i32>, <4 x i32> addrspace(2)* %153, align 16, !tbaa !0 %155 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %156 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %157 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %158 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %159 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %160 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %161 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %162 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %163 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %164 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %165 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %166 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %167 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %168 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %169 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %170 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %171 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %172 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %173 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %174 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %175 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %176 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %177 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %178 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %179 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %180 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %181 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %182 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %183 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %184 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %185 = call float @llvm.SI.fs.interp(i32 0, i32 8, i32 %5, <2 x i32> %7) %186 = call float @llvm.SI.fs.interp(i32 1, i32 8, i32 %5, <2 x i32> %7) %187 = call float @llvm.SI.fs.interp(i32 2, i32 8, i32 %5, <2 x i32> %7) %188 = call float @llvm.SI.fs.interp(i32 3, i32 8, i32 %5, <2 x i32> %7) %189 = fmul float %159, %159 %190 = fmul float %160, %160 %191 = fadd float %190, %189 %192 = fmul float %161, %161 %193 = fadd float %191, %192 %194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193) %195 = fmul float %159, %194 %196 = fmul float %160, %194 %197 = fmul float %161, %194 %198 = fmul float %155, 7.812500e-03 %199 = fmul float %156, 7.812500e-03 %200 = fmul float %157, 7.812500e-03 %201 = fsub float -0.000000e+00, %200 %202 = fsub float -0.000000e+00, %199 %203 = fmul float %45, %199 %204 = fmul float %46, %201 %205 = fadd float %204, %203 %206 = fadd float %205, %47 %207 = fmul float %48, %199 %208 = fmul float %49, %201 %209 = fadd float %208, %207 %210 = fadd float %209, %50 %211 = fmul float %45, %198 %212 = fmul float %46, %201 %213 = fadd float %212, %211 %214 = fadd float %213, %47 %215 = fmul float %48, %198 %216 = fmul float %49, %201 %217 = fadd float %216, %215 %218 = fadd float %217, %50 %219 = fmul float %45, %198 %220 = fmul float %46, %202 %221 = fadd float %220, %219 %222 = fadd float %221, %47 %223 = fmul float %48, %198 %224 = fmul float %49, %202 %225 = fadd float %224, %223 %226 = fadd float %225, %50 %227 = fmul float %39, %198 %228 = fmul float %40, %202 %229 = fadd float %228, %227 %230 = fadd float %229, %41 %231 = fmul float %42, %198 %232 = fmul float %43, %202 %233 = fadd float %232, %231 %234 = fadd float %233, %44 %235 = fmul float %168, 2.000000e+00 %236 = call float @llvm.AMDIL.clamp.(float %235, float 0.000000e+00, float 1.000000e+00) %237 = fmul float %172, 2.000000e+00 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = bitcast float %177 to i32 %240 = bitcast float %178 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = bitcast <8 x i32> %112 to <32 x i8> %244 = bitcast <4 x i32> %114 to <16 x i8> %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = extractelement <4 x float> %245, i32 2 %249 = extractelement <4 x float> %245, i32 3 %250 = bitcast float %179 to i32 %251 = bitcast float %180 to i32 %252 = insertelement <2 x i32> undef, i32 %250, i32 0 %253 = insertelement <2 x i32> %252, i32 %251, i32 1 %254 = bitcast <8 x i32> %112 to <32 x i8> %255 = bitcast <4 x i32> %114 to <16 x i8> %256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %254, <16 x i8> %255, i32 2) %257 = extractelement <4 x float> %256, i32 0 %258 = extractelement <4 x float> %256, i32 1 %259 = extractelement <4 x float> %256, i32 2 %260 = extractelement <4 x float> %256, i32 3 %261 = call float @llvm.AMDGPU.lrp(float %236, float %257, float %246) %262 = call float @llvm.AMDGPU.lrp(float %236, float %258, float %247) %263 = call float @llvm.AMDGPU.lrp(float %236, float %259, float %248) %264 = call float @llvm.AMDGPU.lrp(float %236, float %260, float %249) %265 = bitcast float %187 to i32 %266 = bitcast float %188 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = bitcast <8 x i32> %112 to <32 x i8> %270 = bitcast <4 x i32> %114 to <16 x i8> %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = extractelement <4 x float> %271, i32 2 %275 = extractelement <4 x float> %271, i32 3 %276 = call float @llvm.AMDGPU.lrp(float %238, float %272, float %261) %277 = call float @llvm.AMDGPU.lrp(float %238, float %273, float %262) %278 = call float @llvm.AMDGPU.lrp(float %238, float %274, float %263) %279 = call float @llvm.AMDGPU.lrp(float %238, float %275, float %264) %280 = bitcast float %181 to i32 %281 = bitcast float %182 to i32 %282 = insertelement <2 x i32> undef, i32 %280, i32 0 %283 = insertelement <2 x i32> %282, i32 %281, i32 1 %284 = bitcast <8 x i32> %116 to <32 x i8> %285 = bitcast <4 x i32> %118 to <16 x i8> %286 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %283, <32 x i8> %284, <16 x i8> %285, i32 2) %287 = extractelement <4 x float> %286, i32 0 %288 = extractelement <4 x float> %286, i32 1 %289 = extractelement <4 x float> %286, i32 2 %290 = extractelement <4 x float> %286, i32 3 %291 = bitcast float %183 to i32 %292 = bitcast float %184 to i32 %293 = insertelement <2 x i32> undef, i32 %291, i32 0 %294 = insertelement <2 x i32> %293, i32 %292, i32 1 %295 = bitcast <8 x i32> %116 to <32 x i8> %296 = bitcast <4 x i32> %118 to <16 x i8> %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %294, <32 x i8> %295, <16 x i8> %296, i32 2) %298 = extractelement <4 x float> %297, i32 0 %299 = extractelement <4 x float> %297, i32 1 %300 = extractelement <4 x float> %297, i32 2 %301 = extractelement <4 x float> %297, i32 3 %302 = call float @llvm.AMDGPU.lrp(float %236, float %298, float %287) %303 = call float @llvm.AMDGPU.lrp(float %236, float %299, float %288) %304 = call float @llvm.AMDGPU.lrp(float %236, float %300, float %289) %305 = call float @llvm.AMDGPU.lrp(float %236, float %301, float %290) %306 = bitcast float %230 to i32 %307 = bitcast float %234 to i32 %308 = insertelement <2 x i32> undef, i32 %306, i32 0 %309 = insertelement <2 x i32> %308, i32 %307, i32 1 %310 = bitcast <8 x i32> %116 to <32 x i8> %311 = bitcast <4 x i32> %118 to <16 x i8> %312 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %309, <32 x i8> %310, <16 x i8> %311, i32 2) %313 = extractelement <4 x float> %312, i32 0 %314 = extractelement <4 x float> %312, i32 1 %315 = extractelement <4 x float> %312, i32 2 %316 = extractelement <4 x float> %312, i32 3 %317 = call float @llvm.AMDGPU.lrp(float %238, float %313, float %302) %318 = call float @llvm.AMDGPU.lrp(float %238, float %314, float %303) %319 = call float @llvm.AMDGPU.lrp(float %238, float %315, float %304) %320 = call float @llvm.AMDGPU.lrp(float %238, float %316, float %305) %321 = bitcast float %206 to i32 %322 = bitcast float %210 to i32 %323 = insertelement <2 x i32> undef, i32 %321, i32 0 %324 = insertelement <2 x i32> %323, i32 %322, i32 1 %325 = bitcast <8 x i32> %120 to <32 x i8> %326 = bitcast <4 x i32> %122 to <16 x i8> %327 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %324, <32 x i8> %325, <16 x i8> %326, i32 2) %328 = extractelement <4 x float> %327, i32 0 %329 = extractelement <4 x float> %327, i32 1 %330 = extractelement <4 x float> %327, i32 2 %331 = extractelement <4 x float> %327, i32 3 %332 = bitcast float %214 to i32 %333 = bitcast float %218 to i32 %334 = insertelement <2 x i32> undef, i32 %332, i32 0 %335 = insertelement <2 x i32> %334, i32 %333, i32 1 %336 = bitcast <8 x i32> %120 to <32 x i8> %337 = bitcast <4 x i32> %122 to <16 x i8> %338 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2) %339 = extractelement <4 x float> %338, i32 0 %340 = extractelement <4 x float> %338, i32 1 %341 = extractelement <4 x float> %338, i32 2 %342 = extractelement <4 x float> %338, i32 3 %343 = call float @llvm.AMDGPU.lrp(float %236, float %339, float %328) %344 = call float @llvm.AMDGPU.lrp(float %236, float %340, float %329) %345 = call float @llvm.AMDGPU.lrp(float %236, float %341, float %330) %346 = call float @llvm.AMDGPU.lrp(float %236, float %342, float %331) %347 = bitcast float %222 to i32 %348 = bitcast float %226 to i32 %349 = insertelement <2 x i32> undef, i32 %347, i32 0 %350 = insertelement <2 x i32> %349, i32 %348, i32 1 %351 = bitcast <8 x i32> %120 to <32 x i8> %352 = bitcast <4 x i32> %122 to <16 x i8> %353 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %350, <32 x i8> %351, <16 x i8> %352, i32 2) %354 = extractelement <4 x float> %353, i32 0 %355 = extractelement <4 x float> %353, i32 1 %356 = extractelement <4 x float> %353, i32 2 %357 = extractelement <4 x float> %353, i32 3 %358 = call float @llvm.AMDGPU.lrp(float %238, float %354, float %343) %359 = call float @llvm.AMDGPU.lrp(float %238, float %355, float %344) %360 = call float @llvm.AMDGPU.lrp(float %238, float %356, float %345) %361 = call float @llvm.AMDGPU.lrp(float %238, float %357, float %346) %362 = fsub float %279, %169 %363 = call float @llvm.maxnum.f32(float %362, float 0.000000e+00) %364 = fsub float %165, %363 %365 = fadd float %169, %279 %366 = call float @llvm.minnum.f32(float %365, float 1.000000e+00) %367 = fsub float %366, %363 %368 = fdiv float 1.000000e+00, %367 %369 = fmul float %364, %368 %370 = call float @llvm.AMDIL.clamp.(float %369, float 0.000000e+00, float 1.000000e+00) %371 = fmul float %370, 2.000000e+00 %372 = fsub float 3.000000e+00, %371 %373 = fmul float %370, %372 %374 = fmul float %370, %373 %375 = fsub float 1.000000e+00, %374 %376 = fsub float %320, %170 %377 = call float @llvm.maxnum.f32(float %376, float 0.000000e+00) %378 = fsub float %166, %377 %379 = fadd float %170, %320 %380 = call float @llvm.minnum.f32(float %379, float 1.000000e+00) %381 = fsub float %380, %377 %382 = fdiv float 1.000000e+00, %381 %383 = fmul float %378, %382 %384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) %385 = fmul float %384, 2.000000e+00 %386 = fsub float 3.000000e+00, %385 %387 = fmul float %384, %386 %388 = fmul float %384, %387 %389 = call float @llvm.maxnum.f32(float %388, float 0.000000e+00) %390 = call float @llvm.minnum.f32(float %389, float %375) %391 = fsub float %375, %390 %392 = fsub float %361, %171 %393 = call float @llvm.maxnum.f32(float %392, float 0.000000e+00) %394 = fsub float %167, %393 %395 = fadd float %171, %361 %396 = call float @llvm.minnum.f32(float %395, float 1.000000e+00) %397 = fsub float %396, %393 %398 = fdiv float 1.000000e+00, %397 %399 = fmul float %394, %398 %400 = call float @llvm.AMDIL.clamp.(float %399, float 0.000000e+00, float 1.000000e+00) %401 = fmul float %400, 2.000000e+00 %402 = fsub float 3.000000e+00, %401 %403 = fmul float %400, %402 %404 = fmul float %400, %403 %405 = call float @llvm.maxnum.f32(float %404, float 0.000000e+00) %406 = call float @llvm.minnum.f32(float %405, float %391) %407 = fsub float %391, %406 %408 = fmul float %34, %358 %409 = fmul float %35, %359 %410 = fmul float %36, %360 %411 = fmul float %31, %317 %412 = fmul float %32, %318 %413 = fmul float %33, %319 %414 = fmul float %28, %276 %415 = fmul float %29, %277 %416 = fmul float %30, %278 %417 = bitcast float %173 to i32 %418 = bitcast float %174 to i32 %419 = insertelement <2 x i32> undef, i32 %417, i32 0 %420 = insertelement <2 x i32> %419, i32 %418, i32 1 %421 = bitcast <8 x i32> %108 to <32 x i8> %422 = bitcast <4 x i32> %110 to <16 x i8> %423 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %420, <32 x i8> %421, <16 x i8> %422, i32 2) %424 = extractelement <4 x float> %423, i32 0 %425 = extractelement <4 x float> %423, i32 1 %426 = extractelement <4 x float> %423, i32 2 %427 = extractelement <4 x float> %423, i32 3 %428 = bitcast float %175 to i32 %429 = bitcast float %176 to i32 %430 = insertelement <2 x i32> undef, i32 %428, i32 0 %431 = insertelement <2 x i32> %430, i32 %429, i32 1 %432 = bitcast <8 x i32> %108 to <32 x i8> %433 = bitcast <4 x i32> %110 to <16 x i8> %434 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %431, <32 x i8> %432, <16 x i8> %433, i32 2) %435 = extractelement <4 x float> %434, i32 0 %436 = extractelement <4 x float> %434, i32 1 %437 = extractelement <4 x float> %434, i32 2 %438 = extractelement <4 x float> %434, i32 3 %439 = call float @llvm.AMDGPU.lrp(float %236, float %435, float %424) %440 = call float @llvm.AMDGPU.lrp(float %236, float %436, float %425) %441 = call float @llvm.AMDGPU.lrp(float %236, float %437, float %426) %442 = call float @llvm.AMDGPU.lrp(float %236, float %438, float %427) %443 = bitcast float %185 to i32 %444 = bitcast float %186 to i32 %445 = insertelement <2 x i32> undef, i32 %443, i32 0 %446 = insertelement <2 x i32> %445, i32 %444, i32 1 %447 = bitcast <8 x i32> %108 to <32 x i8> %448 = bitcast <4 x i32> %110 to <16 x i8> %449 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %446, <32 x i8> %447, <16 x i8> %448, i32 2) %450 = extractelement <4 x float> %449, i32 0 %451 = extractelement <4 x float> %449, i32 1 %452 = extractelement <4 x float> %449, i32 2 %453 = call float @llvm.AMDGPU.lrp(float %238, float %450, float %439) %454 = call float @llvm.AMDGPU.lrp(float %238, float %451, float %440) %455 = call float @llvm.AMDGPU.lrp(float %238, float %452, float %441) %456 = fmul float %25, %453 %457 = fmul float %26, %454 %458 = fmul float %27, %455 %459 = fmul float %456, %407 %460 = fmul float %457, %407 %461 = fmul float %458, %407 %462 = fmul float %414, %374 %463 = fadd float %462, %459 %464 = fmul float %415, %374 %465 = fadd float %464, %460 %466 = fmul float %416, %374 %467 = fadd float %466, %461 %468 = fmul float %390, %411 %469 = fadd float %468, %463 %470 = fmul float %390, %412 %471 = fadd float %470, %465 %472 = fmul float %390, %413 %473 = fadd float %472, %467 %474 = fmul float %408, %406 %475 = fadd float %474, %469 %476 = fmul float %409, %406 %477 = fadd float %476, %471 %478 = fmul float %410, %406 %479 = fadd float %478, %473 %480 = fmul float %475, %162 %481 = fmul float %477, %163 %482 = fmul float %479, %164 %483 = bitcast float %206 to i32 %484 = bitcast float %210 to i32 %485 = insertelement <2 x i32> undef, i32 %483, i32 0 %486 = insertelement <2 x i32> %485, i32 %484, i32 1 %487 = bitcast <8 x i32> %136 to <32 x i8> %488 = bitcast <4 x i32> %138 to <16 x i8> %489 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %486, <32 x i8> %487, <16 x i8> %488, i32 2) %490 = extractelement <4 x float> %489, i32 0 %491 = extractelement <4 x float> %489, i32 1 %492 = extractelement <4 x float> %489, i32 2 %493 = extractelement <4 x float> %489, i32 3 %494 = bitcast float %214 to i32 %495 = bitcast float %218 to i32 %496 = insertelement <2 x i32> undef, i32 %494, i32 0 %497 = insertelement <2 x i32> %496, i32 %495, i32 1 %498 = bitcast <8 x i32> %136 to <32 x i8> %499 = bitcast <4 x i32> %138 to <16 x i8> %500 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %497, <32 x i8> %498, <16 x i8> %499, i32 2) %501 = extractelement <4 x float> %500, i32 0 %502 = extractelement <4 x float> %500, i32 1 %503 = extractelement <4 x float> %500, i32 2 %504 = extractelement <4 x float> %500, i32 3 %505 = call float @llvm.AMDGPU.lrp(float %236, float %501, float %490) %506 = call float @llvm.AMDGPU.lrp(float %236, float %502, float %491) %507 = call float @llvm.AMDGPU.lrp(float %236, float %503, float %492) %508 = call float @llvm.AMDGPU.lrp(float %236, float %504, float %493) %509 = bitcast float %222 to i32 %510 = bitcast float %226 to i32 %511 = insertelement <2 x i32> undef, i32 %509, i32 0 %512 = insertelement <2 x i32> %511, i32 %510, i32 1 %513 = bitcast <8 x i32> %136 to <32 x i8> %514 = bitcast <4 x i32> %138 to <16 x i8> %515 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %512, <32 x i8> %513, <16 x i8> %514, i32 2) %516 = extractelement <4 x float> %515, i32 0 %517 = extractelement <4 x float> %515, i32 1 %518 = extractelement <4 x float> %515, i32 2 %519 = call float @llvm.AMDGPU.lrp(float %238, float %516, float %505) %520 = call float @llvm.AMDGPU.lrp(float %238, float %517, float %506) %521 = call float @llvm.AMDGPU.lrp(float %238, float %518, float %507) %522 = bitcast float %181 to i32 %523 = bitcast float %182 to i32 %524 = insertelement <2 x i32> undef, i32 %522, i32 0 %525 = insertelement <2 x i32> %524, i32 %523, i32 1 %526 = bitcast <8 x i32> %132 to <32 x i8> %527 = bitcast <4 x i32> %134 to <16 x i8> %528 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %525, <32 x i8> %526, <16 x i8> %527, i32 2) %529 = extractelement <4 x float> %528, i32 0 %530 = extractelement <4 x float> %528, i32 1 %531 = extractelement <4 x float> %528, i32 2 %532 = extractelement <4 x float> %528, i32 3 %533 = bitcast float %183 to i32 %534 = bitcast float %184 to i32 %535 = insertelement <2 x i32> undef, i32 %533, i32 0 %536 = insertelement <2 x i32> %535, i32 %534, i32 1 %537 = bitcast <8 x i32> %132 to <32 x i8> %538 = bitcast <4 x i32> %134 to <16 x i8> %539 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %536, <32 x i8> %537, <16 x i8> %538, i32 2) %540 = extractelement <4 x float> %539, i32 0 %541 = extractelement <4 x float> %539, i32 1 %542 = extractelement <4 x float> %539, i32 2 %543 = extractelement <4 x float> %539, i32 3 %544 = call float @llvm.AMDGPU.lrp(float %236, float %540, float %529) %545 = call float @llvm.AMDGPU.lrp(float %236, float %541, float %530) %546 = call float @llvm.AMDGPU.lrp(float %236, float %542, float %531) %547 = call float @llvm.AMDGPU.lrp(float %236, float %543, float %532) %548 = bitcast float %230 to i32 %549 = bitcast float %234 to i32 %550 = insertelement <2 x i32> undef, i32 %548, i32 0 %551 = insertelement <2 x i32> %550, i32 %549, i32 1 %552 = bitcast <8 x i32> %132 to <32 x i8> %553 = bitcast <4 x i32> %134 to <16 x i8> %554 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %551, <32 x i8> %552, <16 x i8> %553, i32 2) %555 = extractelement <4 x float> %554, i32 0 %556 = extractelement <4 x float> %554, i32 1 %557 = extractelement <4 x float> %554, i32 2 %558 = call float @llvm.AMDGPU.lrp(float %238, float %555, float %544) %559 = call float @llvm.AMDGPU.lrp(float %238, float %556, float %545) %560 = call float @llvm.AMDGPU.lrp(float %238, float %557, float %546) %561 = bitcast float %177 to i32 %562 = bitcast float %178 to i32 %563 = insertelement <2 x i32> undef, i32 %561, i32 0 %564 = insertelement <2 x i32> %563, i32 %562, i32 1 %565 = bitcast <8 x i32> %128 to <32 x i8> %566 = bitcast <4 x i32> %130 to <16 x i8> %567 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %564, <32 x i8> %565, <16 x i8> %566, i32 2) %568 = extractelement <4 x float> %567, i32 0 %569 = extractelement <4 x float> %567, i32 1 %570 = extractelement <4 x float> %567, i32 2 %571 = extractelement <4 x float> %567, i32 3 %572 = bitcast float %179 to i32 %573 = bitcast float %180 to i32 %574 = insertelement <2 x i32> undef, i32 %572, i32 0 %575 = insertelement <2 x i32> %574, i32 %573, i32 1 %576 = bitcast <8 x i32> %128 to <32 x i8> %577 = bitcast <4 x i32> %130 to <16 x i8> %578 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %575, <32 x i8> %576, <16 x i8> %577, i32 2) %579 = extractelement <4 x float> %578, i32 0 %580 = extractelement <4 x float> %578, i32 1 %581 = extractelement <4 x float> %578, i32 2 %582 = extractelement <4 x float> %578, i32 3 %583 = call float @llvm.AMDGPU.lrp(float %236, float %579, float %568) %584 = call float @llvm.AMDGPU.lrp(float %236, float %580, float %569) %585 = call float @llvm.AMDGPU.lrp(float %236, float %581, float %570) %586 = call float @llvm.AMDGPU.lrp(float %236, float %582, float %571) %587 = bitcast float %187 to i32 %588 = bitcast float %188 to i32 %589 = insertelement <2 x i32> undef, i32 %587, i32 0 %590 = insertelement <2 x i32> %589, i32 %588, i32 1 %591 = bitcast <8 x i32> %128 to <32 x i8> %592 = bitcast <4 x i32> %130 to <16 x i8> %593 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %590, <32 x i8> %591, <16 x i8> %592, i32 2) %594 = extractelement <4 x float> %593, i32 0 %595 = extractelement <4 x float> %593, i32 1 %596 = extractelement <4 x float> %593, i32 2 %597 = call float @llvm.AMDGPU.lrp(float %238, float %594, float %583) %598 = call float @llvm.AMDGPU.lrp(float %238, float %595, float %584) %599 = call float @llvm.AMDGPU.lrp(float %238, float %596, float %585) %600 = bitcast float %173 to i32 %601 = bitcast float %174 to i32 %602 = insertelement <2 x i32> undef, i32 %600, i32 0 %603 = insertelement <2 x i32> %602, i32 %601, i32 1 %604 = bitcast <8 x i32> %124 to <32 x i8> %605 = bitcast <4 x i32> %126 to <16 x i8> %606 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %603, <32 x i8> %604, <16 x i8> %605, i32 2) %607 = extractelement <4 x float> %606, i32 0 %608 = extractelement <4 x float> %606, i32 1 %609 = extractelement <4 x float> %606, i32 2 %610 = extractelement <4 x float> %606, i32 3 %611 = bitcast float %175 to i32 %612 = bitcast float %176 to i32 %613 = insertelement <2 x i32> undef, i32 %611, i32 0 %614 = insertelement <2 x i32> %613, i32 %612, i32 1 %615 = bitcast <8 x i32> %124 to <32 x i8> %616 = bitcast <4 x i32> %126 to <16 x i8> %617 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %614, <32 x i8> %615, <16 x i8> %616, i32 2) %618 = extractelement <4 x float> %617, i32 0 %619 = extractelement <4 x float> %617, i32 1 %620 = extractelement <4 x float> %617, i32 2 %621 = extractelement <4 x float> %617, i32 3 %622 = call float @llvm.AMDGPU.lrp(float %236, float %618, float %607) %623 = call float @llvm.AMDGPU.lrp(float %236, float %619, float %608) %624 = call float @llvm.AMDGPU.lrp(float %236, float %620, float %609) %625 = call float @llvm.AMDGPU.lrp(float %236, float %621, float %610) %626 = bitcast float %185 to i32 %627 = bitcast float %186 to i32 %628 = insertelement <2 x i32> undef, i32 %626, i32 0 %629 = insertelement <2 x i32> %628, i32 %627, i32 1 %630 = bitcast <8 x i32> %124 to <32 x i8> %631 = bitcast <4 x i32> %126 to <16 x i8> %632 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %629, <32 x i8> %630, <16 x i8> %631, i32 2) %633 = extractelement <4 x float> %632, i32 0 %634 = extractelement <4 x float> %632, i32 1 %635 = extractelement <4 x float> %632, i32 2 %636 = call float @llvm.AMDGPU.lrp(float %238, float %633, float %622) %637 = call float @llvm.AMDGPU.lrp(float %238, float %634, float %623) %638 = call float @llvm.AMDGPU.lrp(float %238, float %635, float %624) %639 = fmul float %407, %636 %640 = fmul float %407, %638 %641 = fmul float %374, %597 %642 = fadd float %641, %639 %643 = fmul float %374, %599 %644 = fadd float %643, %640 %645 = fmul float %390, %558 %646 = fadd float %645, %642 %647 = fmul float %390, %560 %648 = fadd float %647, %644 %649 = fmul float %406, %519 %650 = fadd float %649, %646 %651 = fmul float %406, %521 %652 = fadd float %651, %648 %653 = fmul float %650, %37 %654 = fmul float %652, %38 %655 = bitcast float %206 to i32 %656 = bitcast float %210 to i32 %657 = insertelement <2 x i32> undef, i32 %655, i32 0 %658 = insertelement <2 x i32> %657, i32 %656, i32 1 %659 = bitcast <8 x i32> %152 to <32 x i8> %660 = bitcast <4 x i32> %154 to <16 x i8> %661 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %658, <32 x i8> %659, <16 x i8> %660, i32 2) %662 = extractelement <4 x float> %661, i32 1 %663 = extractelement <4 x float> %661, i32 3 %664 = bitcast float %181 to i32 %665 = bitcast float %182 to i32 %666 = insertelement <2 x i32> undef, i32 %664, i32 0 %667 = insertelement <2 x i32> %666, i32 %665, i32 1 %668 = bitcast <8 x i32> %148 to <32 x i8> %669 = bitcast <4 x i32> %150 to <16 x i8> %670 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %667, <32 x i8> %668, <16 x i8> %669, i32 2) %671 = extractelement <4 x float> %670, i32 1 %672 = extractelement <4 x float> %670, i32 3 %673 = bitcast float %173 to i32 %674 = bitcast float %174 to i32 %675 = insertelement <2 x i32> undef, i32 %673, i32 0 %676 = insertelement <2 x i32> %675, i32 %674, i32 1 %677 = bitcast <8 x i32> %140 to <32 x i8> %678 = bitcast <4 x i32> %142 to <16 x i8> %679 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %676, <32 x i8> %677, <16 x i8> %678, i32 2) %680 = extractelement <4 x float> %679, i32 1 %681 = extractelement <4 x float> %679, i32 3 %682 = bitcast float %177 to i32 %683 = bitcast float %178 to i32 %684 = insertelement <2 x i32> undef, i32 %682, i32 0 %685 = insertelement <2 x i32> %684, i32 %683, i32 1 %686 = bitcast <8 x i32> %144 to <32 x i8> %687 = bitcast <4 x i32> %146 to <16 x i8> %688 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %685, <32 x i8> %686, <16 x i8> %687, i32 2) %689 = extractelement <4 x float> %688, i32 1 %690 = extractelement <4 x float> %688, i32 3 %691 = fmul float %374, %690 %692 = fmul float %374, %689 %693 = fmul float %407, %681 %694 = fadd float %693, %691 %695 = fmul float %407, %680 %696 = fadd float %695, %692 %697 = fmul float %390, %672 %698 = fadd float %697, %694 %699 = fmul float %390, %671 %700 = fadd float %699, %696 %701 = fmul float %406, %663 %702 = fadd float %701, %698 %703 = fmul float %406, %662 %704 = fadd float %703, %700 %705 = fmul float %702, 2.000000e+00 %706 = fadd float %705, -1.000000e+00 %707 = fmul float %704, 2.000000e+00 %708 = fadd float %707, -1.000000e+00 %709 = fmul float %706, %24 %710 = fmul float %708, %24 %711 = fmul float %706, %706 %712 = fmul float %708, %708 %713 = fadd float %711, %712 %714 = fsub float 1.000000e+00, %713 %715 = call float @llvm.AMDIL.clamp.(float %714, float 0.000000e+00, float 1.000000e+00) %716 = call float @llvm.sqrt.f32(float %715) %717 = fmul float %709, %709 %718 = fmul float %710, %710 %719 = fadd float %718, %717 %720 = fmul float %716, %716 %721 = fadd float %719, %720 %722 = call float @llvm.AMDGPU.rsq.clamped.f32(float %721) %723 = fmul float %709, %722 %724 = fmul float %710, %722 %725 = fsub float -0.000000e+00, %724 %726 = fmul float %716, %722 %727 = fmul float %195, 0.000000e+00 %728 = fadd float %196, %727 %729 = fmul float %197, 0.000000e+00 %730 = fadd float %728, %729 %731 = fmul float %730, %195 %732 = fmul float %730, %196 %733 = fmul float %730, %197 %734 = fsub float 0.000000e+00, %731 %735 = fsub float 1.000000e+00, %732 %736 = fsub float 0.000000e+00, %733 %737 = fmul float %734, %734 %738 = fmul float %735, %735 %739 = fadd float %738, %737 %740 = fmul float %736, %736 %741 = fadd float %739, %740 %742 = call float @llvm.sqrt.f32(float %741) %743 = fcmp olt float %742, 0x3EE4F8B580000000 %. = select i1 %743, float 0.000000e+00, float %734 %.81 = select i1 %743, float 1.000000e+00, float %735 %.82 = select i1 %743, float 0.000000e+00, float %736 %744 = fmul float %195, 0.000000e+00 %745 = fmul float %196, 0.000000e+00 %746 = fadd float %745, %744 %747 = fsub float %746, %197 %748 = fmul float %747, %195 %749 = fmul float %747, %196 %750 = fmul float %747, %197 %751 = fsub float 0.000000e+00, %748 %752 = fsub float 0.000000e+00, %749 %753 = fsub float -1.000000e+00, %750 %754 = fmul float %751, %751 %755 = fmul float %752, %752 %756 = fadd float %755, %754 %757 = fmul float %753, %753 %758 = fadd float %756, %757 %759 = call float @llvm.sqrt.f32(float %758) %760 = fcmp olt float %759, 0x3EE4F8B580000000 %temp64.0 = select i1 %760, float 0.000000e+00, float %751 %temp65.0 = select i1 %760, float 0.000000e+00, float %752 %temp66.0 = select i1 %760, float -1.000000e+00, float %753 %761 = fmul float %temp64.0, %725 %762 = fmul float %temp65.0, %725 %763 = fmul float %temp66.0, %725 %764 = fmul float %., %723 %765 = fadd float %764, %761 %766 = fmul float %.81, %723 %767 = fadd float %766, %762 %768 = fmul float %.82, %723 %769 = fadd float %768, %763 %770 = fmul float %195, %726 %771 = fadd float %770, %765 %772 = fmul float %196, %726 %773 = fadd float %772, %767 %774 = fmul float %197, %726 %775 = fadd float %774, %769 %776 = bitcast float %214 to i32 %777 = bitcast float %218 to i32 %778 = insertelement <2 x i32> undef, i32 %776, i32 0 %779 = insertelement <2 x i32> %778, i32 %777, i32 1 %780 = bitcast <8 x i32> %152 to <32 x i8> %781 = bitcast <4 x i32> %154 to <16 x i8> %782 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %779, <32 x i8> %780, <16 x i8> %781, i32 2) %783 = extractelement <4 x float> %782, i32 1 %784 = extractelement <4 x float> %782, i32 3 %785 = bitcast float %183 to i32 %786 = bitcast float %184 to i32 %787 = insertelement <2 x i32> undef, i32 %785, i32 0 %788 = insertelement <2 x i32> %787, i32 %786, i32 1 %789 = bitcast <8 x i32> %148 to <32 x i8> %790 = bitcast <4 x i32> %150 to <16 x i8> %791 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %788, <32 x i8> %789, <16 x i8> %790, i32 2) %792 = extractelement <4 x float> %791, i32 1 %793 = extractelement <4 x float> %791, i32 3 %794 = bitcast float %175 to i32 %795 = bitcast float %176 to i32 %796 = insertelement <2 x i32> undef, i32 %794, i32 0 %797 = insertelement <2 x i32> %796, i32 %795, i32 1 %798 = bitcast <8 x i32> %140 to <32 x i8> %799 = bitcast <4 x i32> %142 to <16 x i8> %800 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %797, <32 x i8> %798, <16 x i8> %799, i32 2) %801 = extractelement <4 x float> %800, i32 1 %802 = extractelement <4 x float> %800, i32 3 %803 = bitcast float %179 to i32 %804 = bitcast float %180 to i32 %805 = insertelement <2 x i32> undef, i32 %803, i32 0 %806 = insertelement <2 x i32> %805, i32 %804, i32 1 %807 = bitcast <8 x i32> %144 to <32 x i8> %808 = bitcast <4 x i32> %146 to <16 x i8> %809 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %806, <32 x i8> %807, <16 x i8> %808, i32 2) %810 = extractelement <4 x float> %809, i32 1 %811 = extractelement <4 x float> %809, i32 3 %812 = fmul float %374, %811 %813 = fmul float %374, %810 %814 = fmul float %407, %802 %815 = fadd float %814, %812 %816 = fmul float %407, %801 %817 = fadd float %816, %813 %818 = fmul float %390, %793 %819 = fadd float %818, %815 %820 = fmul float %390, %792 %821 = fadd float %820, %817 %822 = fmul float %406, %784 %823 = fadd float %822, %819 %824 = fmul float %406, %783 %825 = fadd float %824, %821 %826 = fmul float %823, 2.000000e+00 %827 = fadd float %826, -1.000000e+00 %828 = fmul float %825, 2.000000e+00 %829 = fadd float %828, -1.000000e+00 %830 = fmul float %24, %827 %831 = fmul float %829, %24 %832 = fmul float %827, %827 %833 = fmul float %829, %829 %834 = fadd float %832, %833 %835 = fsub float 1.000000e+00, %834 %836 = call float @llvm.AMDIL.clamp.(float %835, float 0.000000e+00, float 1.000000e+00) %837 = call float @llvm.sqrt.f32(float %836) %838 = fmul float %830, %830 %839 = fmul float %831, %831 %840 = fadd float %839, %838 %841 = fmul float %837, %837 %842 = fadd float %840, %841 %843 = call float @llvm.AMDGPU.rsq.clamped.f32(float %842) %844 = fmul float %830, %843 %845 = fmul float %831, %843 %846 = fsub float -0.000000e+00, %845 %847 = fmul float %837, %843 %848 = fmul float %196, 0.000000e+00 %849 = fadd float %848, %195 %850 = fmul float %197, 0.000000e+00 %851 = fadd float %849, %850 %852 = fmul float %851, %195 %853 = fmul float %851, %196 %854 = fmul float %851, %197 %855 = fsub float 1.000000e+00, %852 %856 = fsub float 0.000000e+00, %853 %857 = fsub float 0.000000e+00, %854 %858 = fmul float %855, %855 %859 = fmul float %856, %856 %860 = fadd float %859, %858 %861 = fmul float %857, %857 %862 = fadd float %860, %861 %863 = call float @llvm.sqrt.f32(float %862) %864 = fcmp olt float %863, 0x3EE4F8B580000000 %.83 = select i1 %864, float 1.000000e+00, float %855 %.84 = select i1 %864, float 0.000000e+00, float %856 %.85 = select i1 %864, float 0.000000e+00, float %857 %865 = fmul float %temp64.0, %846 %866 = fmul float %temp65.0, %846 %867 = fmul float %temp66.0, %846 %868 = fmul float %.83, %844 %869 = fadd float %868, %865 %870 = fmul float %.84, %844 %871 = fadd float %870, %866 %872 = fmul float %.85, %844 %873 = fadd float %872, %867 %874 = fmul float %195, %847 %875 = fadd float %874, %869 %876 = fmul float %196, %847 %877 = fadd float %876, %871 %878 = fmul float %197, %847 %879 = fadd float %878, %873 %880 = bitcast float %222 to i32 %881 = bitcast float %226 to i32 %882 = insertelement <2 x i32> undef, i32 %880, i32 0 %883 = insertelement <2 x i32> %882, i32 %881, i32 1 %884 = bitcast <8 x i32> %152 to <32 x i8> %885 = bitcast <4 x i32> %154 to <16 x i8> %886 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %883, <32 x i8> %884, <16 x i8> %885, i32 2) %887 = extractelement <4 x float> %886, i32 1 %888 = extractelement <4 x float> %886, i32 3 %889 = bitcast float %230 to i32 %890 = bitcast float %234 to i32 %891 = insertelement <2 x i32> undef, i32 %889, i32 0 %892 = insertelement <2 x i32> %891, i32 %890, i32 1 %893 = bitcast <8 x i32> %148 to <32 x i8> %894 = bitcast <4 x i32> %150 to <16 x i8> %895 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %892, <32 x i8> %893, <16 x i8> %894, i32 2) %896 = extractelement <4 x float> %895, i32 1 %897 = extractelement <4 x float> %895, i32 3 %898 = bitcast float %187 to i32 %899 = bitcast float %188 to i32 %900 = insertelement <2 x i32> undef, i32 %898, i32 0 %901 = insertelement <2 x i32> %900, i32 %899, i32 1 %902 = bitcast <8 x i32> %144 to <32 x i8> %903 = bitcast <4 x i32> %146 to <16 x i8> %904 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %901, <32 x i8> %902, <16 x i8> %903, i32 2) %905 = extractelement <4 x float> %904, i32 1 %906 = extractelement <4 x float> %904, i32 3 %907 = bitcast float %185 to i32 %908 = bitcast float %186 to i32 %909 = insertelement <2 x i32> undef, i32 %907, i32 0 %910 = insertelement <2 x i32> %909, i32 %908, i32 1 %911 = bitcast <8 x i32> %140 to <32 x i8> %912 = bitcast <4 x i32> %142 to <16 x i8> %913 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %910, <32 x i8> %911, <16 x i8> %912, i32 2) %914 = extractelement <4 x float> %913, i32 1 %915 = extractelement <4 x float> %913, i32 3 %916 = fmul float %407, %915 %917 = fmul float %407, %914 %918 = fmul float %374, %906 %919 = fadd float %918, %916 %920 = fmul float %374, %905 %921 = fadd float %920, %917 %922 = fmul float %390, %897 %923 = fadd float %922, %919 %924 = fmul float %390, %896 %925 = fadd float %924, %921 %926 = fmul float %406, %888 %927 = fadd float %926, %923 %928 = fmul float %406, %887 %929 = fadd float %928, %925 %930 = fmul float %927, 2.000000e+00 %931 = fadd float %930, -1.000000e+00 %932 = fmul float %929, 2.000000e+00 %933 = fadd float %932, -1.000000e+00 %934 = fmul float %931, %24 %935 = fmul float %933, %24 %936 = fmul float %931, %931 %937 = fmul float %933, %933 %938 = fadd float %936, %937 %939 = fsub float 1.000000e+00, %938 %940 = call float @llvm.AMDIL.clamp.(float %939, float 0.000000e+00, float 1.000000e+00) %941 = call float @llvm.sqrt.f32(float %940) %942 = fmul float %934, %934 %943 = fmul float %935, %935 %944 = fadd float %943, %942 %945 = fmul float %941, %941 %946 = fadd float %944, %945 %947 = call float @llvm.AMDGPU.rsq.clamped.f32(float %946) %948 = fmul float %934, %947 %949 = fmul float %935, %947 %950 = fsub float -0.000000e+00, %949 %951 = fmul float %941, %947 %952 = fmul float %195, 0.000000e+00 %953 = fsub float %952, %196 %954 = fmul float %197, 0.000000e+00 %955 = fadd float %953, %954 %956 = fmul float %955, %195 %957 = fmul float %955, %196 %958 = fmul float %955, %197 %959 = fsub float 0.000000e+00, %956 %960 = fsub float -1.000000e+00, %957 %961 = fsub float 0.000000e+00, %958 %962 = fmul float %959, %959 %963 = fmul float %960, %960 %964 = fadd float %963, %962 %965 = fmul float %961, %961 %966 = fadd float %964, %965 %967 = call float @llvm.sqrt.f32(float %966) %968 = fcmp olt float %967, 0x3EE4F8B580000000 %temp40.0 = select i1 %968, float 0.000000e+00, float %959 %temp41.0 = select i1 %968, float -1.000000e+00, float %960 %temp42.0 = select i1 %968, float 0.000000e+00, float %961 %969 = call float @llvm.AMDGPU.lrp(float %236, float %875, float %771) %970 = call float @llvm.AMDGPU.lrp(float %236, float %877, float %773) %971 = call float @llvm.AMDGPU.lrp(float %236, float %879, float %775) %972 = fmul float %temp40.0, %950 %973 = fmul float %temp41.0, %950 %974 = fmul float %temp42.0, %950 %975 = fmul float %.83, %948 %976 = fadd float %975, %972 %977 = fmul float %.84, %948 %978 = fadd float %977, %973 %979 = fmul float %.85, %948 %980 = fadd float %979, %974 %981 = fmul float %195, %951 %982 = fadd float %981, %976 %983 = fmul float %196, %951 %984 = fadd float %983, %978 %985 = fmul float %197, %951 %986 = fadd float %985, %980 %987 = call float @llvm.AMDGPU.lrp(float %238, float %982, float %969) %988 = call float @llvm.AMDGPU.lrp(float %238, float %984, float %970) %989 = call float @llvm.AMDGPU.lrp(float %238, float %986, float %971) %990 = fsub float %155, %57 %991 = fsub float %156, %58 %992 = fsub float %157, %59 %993 = fmul float %990, %990 %994 = fmul float %991, %991 %995 = fadd float %994, %993 %996 = fmul float %992, %992 %997 = fadd float %995, %996 %998 = call float @llvm.AMDGPU.rsq.clamped.f32(float %997) %999 = fmul float %990, %998 %1000 = fmul float %991, %998 %1001 = fmul float %992, %998 %1002 = fmul float %86, %155 %1003 = fmul float %87, %156 %1004 = fadd float %1003, %1002 %1005 = fmul float %88, %157 %1006 = fadd float %1005, %1004 %1007 = fadd float %1006, %89 %1008 = fmul float %90, %155 %1009 = fmul float %91, %156 %1010 = fadd float %1009, %1008 %1011 = fmul float %92, %157 %1012 = fadd float %1011, %1010 %1013 = fadd float %1012, %93 %1014 = fmul float %94, %155 %1015 = fmul float %95, %156 %1016 = fadd float %1015, %1014 %1017 = fmul float %96, %157 %1018 = fadd float %1017, %1016 %1019 = fadd float %1018, %97 %1020 = fmul float %98, %155 %1021 = fmul float %99, %156 %1022 = fadd float %1021, %1020 %1023 = fmul float %100, %157 %1024 = fadd float %1023, %1022 %1025 = fadd float %1024, %101 %1026 = fdiv float 1.000000e+00, %1025 %1027 = fmul float %1007, %1026 %1028 = fmul float %1013, %1026 %1029 = fmul float %1019, %1026 %1030 = fsub float -0.000000e+00, %1029 %1031 = call float @llvm.AMDIL.clamp.(float %1030, float 0.000000e+00, float 1.000000e+00) %1032 = fadd float %1027, %102 %1033 = fadd float %1028, %102 %1034 = bitcast float %1031 to i32 %1035 = bitcast float %1032 to i32 %1036 = bitcast float %1033 to i32 %1037 = insertelement <4 x i32> undef, i32 %1034, i32 0 %1038 = insertelement <4 x i32> %1037, i32 %1035, i32 1 %1039 = insertelement <4 x i32> %1038, i32 %1036, i32 2 %1040 = insertelement <4 x i32> %1039, i32 0, i32 3 %1041 = bitcast <8 x i32> %104 to <32 x i8> %1042 = bitcast <4 x i32> %106 to <16 x i8> %1043 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1040, <32 x i8> %1041, <16 x i8> %1042, i32 7) %1044 = extractelement <4 x float> %1043, i32 0 %1045 = fsub float %1027, %102 %1046 = fadd float %102, %1028 %1047 = bitcast float %1031 to i32 %1048 = bitcast float %1045 to i32 %1049 = bitcast float %1046 to i32 %1050 = insertelement <4 x i32> undef, i32 %1047, i32 0 %1051 = insertelement <4 x i32> %1050, i32 %1048, i32 1 %1052 = insertelement <4 x i32> %1051, i32 %1049, i32 2 %1053 = insertelement <4 x i32> %1052, i32 0, i32 3 %1054 = bitcast <8 x i32> %104 to <32 x i8> %1055 = bitcast <4 x i32> %106 to <16 x i8> %1056 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1053, <32 x i8> %1054, <16 x i8> %1055, i32 7) %1057 = extractelement <4 x float> %1056, i32 0 %1058 = fadd float %1027, %102 %1059 = fsub float %1028, %102 %1060 = bitcast float %1031 to i32 %1061 = bitcast float %1058 to i32 %1062 = bitcast float %1059 to i32 %1063 = insertelement <4 x i32> undef, i32 %1060, i32 0 %1064 = insertelement <4 x i32> %1063, i32 %1061, i32 1 %1065 = insertelement <4 x i32> %1064, i32 %1062, i32 2 %1066 = insertelement <4 x i32> %1065, i32 0, i32 3 %1067 = bitcast <8 x i32> %104 to <32 x i8> %1068 = bitcast <4 x i32> %106 to <16 x i8> %1069 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1066, <32 x i8> %1067, <16 x i8> %1068, i32 7) %1070 = extractelement <4 x float> %1069, i32 0 %1071 = fsub float %1027, %102 %1072 = fsub float %1028, %102 %1073 = bitcast float %1031 to i32 %1074 = bitcast float %1071 to i32 %1075 = bitcast float %1072 to i32 %1076 = insertelement <4 x i32> undef, i32 %1073, i32 0 %1077 = insertelement <4 x i32> %1076, i32 %1074, i32 1 %1078 = insertelement <4 x i32> %1077, i32 %1075, i32 2 %1079 = insertelement <4 x i32> %1078, i32 0, i32 3 %1080 = bitcast <8 x i32> %104 to <32 x i8> %1081 = bitcast <4 x i32> %106 to <16 x i8> %1082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1079, <32 x i8> %1080, <16 x i8> %1081, i32 7) %1083 = extractelement <4 x float> %1082, i32 0 %1084 = fadd float %102, %1027 %1085 = fadd float %1028, 0.000000e+00 %1086 = bitcast float %1031 to i32 %1087 = bitcast float %1084 to i32 %1088 = bitcast float %1085 to i32 %1089 = insertelement <4 x i32> undef, i32 %1086, i32 0 %1090 = insertelement <4 x i32> %1089, i32 %1087, i32 1 %1091 = insertelement <4 x i32> %1090, i32 %1088, i32 2 %1092 = insertelement <4 x i32> %1091, i32 0, i32 3 %1093 = bitcast <8 x i32> %104 to <32 x i8> %1094 = bitcast <4 x i32> %106 to <16 x i8> %1095 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1092, <32 x i8> %1093, <16 x i8> %1094, i32 7) %1096 = extractelement <4 x float> %1095, i32 0 %1097 = fsub float %1027, %102 %1098 = fadd float %1028, 0.000000e+00 %1099 = bitcast float %1031 to i32 %1100 = bitcast float %1097 to i32 %1101 = bitcast float %1098 to i32 %1102 = insertelement <4 x i32> undef, i32 %1099, i32 0 %1103 = insertelement <4 x i32> %1102, i32 %1100, i32 1 %1104 = insertelement <4 x i32> %1103, i32 %1101, i32 2 %1105 = insertelement <4 x i32> %1104, i32 0, i32 3 %1106 = bitcast <8 x i32> %104 to <32 x i8> %1107 = bitcast <4 x i32> %106 to <16 x i8> %1108 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1105, <32 x i8> %1106, <16 x i8> %1107, i32 7) %1109 = extractelement <4 x float> %1108, i32 0 %1110 = fadd float %1027, 0.000000e+00 %1111 = fsub float %1028, %102 %1112 = bitcast float %1031 to i32 %1113 = bitcast float %1110 to i32 %1114 = bitcast float %1111 to i32 %1115 = insertelement <4 x i32> undef, i32 %1112, i32 0 %1116 = insertelement <4 x i32> %1115, i32 %1113, i32 1 %1117 = insertelement <4 x i32> %1116, i32 %1114, i32 2 %1118 = insertelement <4 x i32> %1117, i32 0, i32 3 %1119 = bitcast <8 x i32> %104 to <32 x i8> %1120 = bitcast <4 x i32> %106 to <16 x i8> %1121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1118, <32 x i8> %1119, <16 x i8> %1120, i32 7) %1122 = extractelement <4 x float> %1121, i32 0 %1123 = fadd float %1027, 0.000000e+00 %1124 = fadd float %102, %1028 %1125 = bitcast float %1031 to i32 %1126 = bitcast float %1123 to i32 %1127 = bitcast float %1124 to i32 %1128 = insertelement <4 x i32> undef, i32 %1125, i32 0 %1129 = insertelement <4 x i32> %1128, i32 %1126, i32 1 %1130 = insertelement <4 x i32> %1129, i32 %1127, i32 2 %1131 = insertelement <4 x i32> %1130, i32 0, i32 3 %1132 = bitcast <8 x i32> %104 to <32 x i8> %1133 = bitcast <4 x i32> %106 to <16 x i8> %1134 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1131, <32 x i8> %1132, <16 x i8> %1133, i32 7) %1135 = extractelement <4 x float> %1134, i32 0 %1136 = fmul float %78, %79 %1137 = fmul float %78, %80 %1138 = fmul float %78, %81 %1139 = fmul float %82, %83 %1140 = fmul float %82, %84 %1141 = fmul float %82, %85 %1142 = bitcast float %1031 to i32 %1143 = bitcast float %1027 to i32 %1144 = bitcast float %1028 to i32 %1145 = insertelement <4 x i32> undef, i32 %1142, i32 0 %1146 = insertelement <4 x i32> %1145, i32 %1143, i32 1 %1147 = insertelement <4 x i32> %1146, i32 %1144, i32 2 %1148 = insertelement <4 x i32> %1147, i32 0, i32 3 %1149 = bitcast <8 x i32> %104 to <32 x i8> %1150 = bitcast <4 x i32> %106 to <16 x i8> %1151 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1148, <32 x i8> %1149, <16 x i8> %1150, i32 7) %1152 = extractelement <4 x float> %1151, i32 0 %1153 = fmul float %1044, 0x3FB32D10E0000000 %1154 = fmul float %1057, 0x3FB32D10E0000000 %1155 = fadd float %1153, %1154 %1156 = fmul float %1070, 0x3FB32D10E0000000 %1157 = fadd float %1155, %1156 %1158 = fmul float %1083, 0x3FB32D10E0000000 %1159 = fadd float %1157, %1158 %1160 = fmul float %1096, 0x3FBFA3FCC0000000 %1161 = fmul float %1109, 0x3FBFA3FCC0000000 %1162 = fadd float %1160, %1161 %1163 = fmul float %1122, 0x3FBFA3FCC0000000 %1164 = fadd float %1162, %1163 %1165 = fmul float %1135, 0x3FBFA3FCC0000000 %1166 = fadd float %1164, %1165 %1167 = fadd float %1159, %1166 %1168 = fmul float %1152, 0x3FCA5DFA80000000 %1169 = fadd float %1168, %1167 %1170 = fmul float %62, %987 %1171 = fsub float -0.000000e+00, %1170 %1172 = fmul float %63, %988 %1173 = fsub float %1171, %1172 %1174 = fmul float %64, %989 %1175 = fsub float %1173, %1174 %1176 = call float @llvm.AMDIL.clamp.(float %1175, float 0.000000e+00, float 1.000000e+00) %1177 = fmul float %1169, %1176 %1178 = fmul float %999, %987 %1179 = fsub float -0.000000e+00, %1178 %1180 = fmul float %1000, %988 %1181 = fsub float %1179, %1180 %1182 = fmul float %1001, %989 %1183 = fsub float %1181, %1182 %1184 = fmul float %1183, %987 %1185 = fmul float %1183, %988 %1186 = fmul float %1183, %989 %1187 = fmul float %1184, 2.000000e+00 %1188 = fmul float %1185, 2.000000e+00 %1189 = fmul float %1186, 2.000000e+00 %1190 = fsub float -0.000000e+00, %1187 %1191 = fsub float %1190, %999 %1192 = fsub float -0.000000e+00, %1188 %1193 = fsub float %1192, %1000 %1194 = fsub float -0.000000e+00, %1189 %1195 = fsub float %1194, %1001 %1196 = fmul float %68, %1191 %1197 = fmul float %69, %1193 %1198 = fadd float %1197, %1196 %1199 = fmul float %70, %1195 %1200 = fadd float %1198, %1199 %1201 = call float @llvm.AMDIL.clamp.(float %1200, float 0.000000e+00, float 1.000000e+00) %1202 = call float @llvm.pow.f32(float %1201, float %71) %1203 = fmul float %653, %1202 %1204 = fmul float %1203, %72 %1205 = fadd float %1204, %480 %1206 = fmul float %1203, %73 %1207 = fadd float %1206, %481 %1208 = fmul float %1203, %74 %1209 = fadd float %1208, %482 %1210 = fsub float 1.000000e+00, %1177 %1211 = fmul float %1139, 0x3FCB333340000000 %1212 = fmul float %1140, 0x3FE6E48E80000000 %1213 = fadd float %1212, %1211 %1214 = fmul float %1141, 0x3FB2752540000000 %1215 = fadd float %1213, %1214 %1216 = call float @llvm.AMDGPU.lrp(float %654, float %1215, float %1139) %1217 = call float @llvm.AMDGPU.lrp(float %654, float %1215, float %1140) %1218 = call float @llvm.AMDGPU.lrp(float %654, float %1215, float %1141) %1219 = fmul float %75, %987 %1220 = fmul float %76, %988 %1221 = fadd float %1220, %1219 %1222 = fmul float %77, %989 %1223 = fadd float %1221, %1222 %1224 = call float @llvm.AMDIL.clamp.(float %1223, float 0.000000e+00, float 1.000000e+00) %1225 = fmul float %1136, 0x3FCB333340000000 %1226 = fmul float %1137, 0x3FE6E48E80000000 %1227 = fadd float %1226, %1225 %1228 = fmul float %1138, 0x3FB2752540000000 %1229 = fadd float %1227, %1228 %1230 = call float @llvm.AMDGPU.lrp(float %654, float %1229, float %1136) %1231 = call float @llvm.AMDGPU.lrp(float %654, float %1229, float %1137) %1232 = call float @llvm.AMDGPU.lrp(float %654, float %1229, float %1138) %1233 = fmul float %1177, %65 %1234 = fmul float %1177, %66 %1235 = fmul float %1177, %67 %1236 = fmul float %1224, %1230 %1237 = fadd float %1236, %1233 %1238 = fmul float %1224, %1231 %1239 = fadd float %1238, %1234 %1240 = fmul float %1224, %1232 %1241 = fadd float %1240, %1235 %1242 = fmul float %1210, %1216 %1243 = fadd float %1242, %1237 %1244 = fmul float %1210, %1217 %1245 = fadd float %1244, %1239 %1246 = fmul float %1210, %1218 %1247 = fadd float %1246, %1241 %1248 = fmul float %1205, %1243 %1249 = fmul float %1207, %1245 %1250 = fmul float %1209, %1247 %1251 = fmul float %53, %155 %1252 = fmul float %54, %156 %1253 = fadd float %1251, %1252 %1254 = fmul float %55, %157 %1255 = fadd float %1253, %1254 %1256 = fadd float %1255, %56 %1257 = fmul float %1256, 0x3F9EB851E0000000 %1258 = call i32 @llvm.SI.packf16(float %1248, float %1249) %1259 = bitcast i32 %1258 to float %1260 = call i32 @llvm.SI.packf16(float %1250, float %1257) %1261 = bitcast i32 %1260 to float %1262 = call i32 @llvm.SI.packf16(float %158, float %158) %1263 = bitcast i32 %1262 to float %1264 = call i32 @llvm.SI.packf16(float %158, float %158) %1265 = bitcast i32 %1264 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %1259, float %1261, float %1259, float %1261) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %1263, float %1265, float %1263, float %1265) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v12, v0, 2, 3, [m0] ; C8300E00 v_interp_p2_f32 v12, [v12], v1, 2, 3, [m0] ; C8310E01 v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00 v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01 v_interp_p1_f32 v17, v0, 0, 4, [m0] ; C8441000 v_interp_p2_f32 v17, [v17], v1, 0, 4, [m0] ; C8451001 v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100 v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101 v_interp_p1_f32 v14, v0, 2, 4, [m0] ; C8381200 v_interp_p2_f32 v14, [v14], v1, 2, 4, [m0] ; C8391201 v_interp_p1_f32 v18, v0, 3, 4, [m0] ; C8481300 v_interp_p2_f32 v18, [v18], v1, 3, 4, [m0] ; C8491301 v_interp_p1_f32 v36, v0, 0, 5, [m0] ; C8901400 v_interp_p2_f32 v36, [v36], v1, 0, 5, [m0] ; C8911401 v_interp_p1_f32 v37, v0, 1, 5, [m0] ; C8941500 v_interp_p2_f32 v37, [v37], v1, 1, 5, [m0] ; C8951501 s_load_dwordx4 s[12:15], s[2:3], 0x10 ; C0860310 s_load_dwordx4 s[8:11], s[2:3], 0x14 ; C0840314 v_interp_p1_f32 v62, v0, 2, 5, [m0] ; C8F81600 v_interp_p2_f32 v62, [v62], v1, 2, 5, [m0] ; C8F91601 v_interp_p1_f32 v63, v0, 3, 5, [m0] ; C8FC1700 v_interp_p2_f32 v63, [v63], v1, 3, 5, [m0] ; C8FD1701 v_interp_p1_f32 v74, v0, 0, 6, [m0] ; C9281800 v_interp_p2_f32 v74, [v74], v1, 0, 6, [m0] ; C9291801 v_interp_p1_f32 v75, v0, 1, 6, [m0] ; C92C1900 v_interp_p2_f32 v75, [v75], v1, 1, 6, [m0] ; C92D1901 v_interp_p1_f32 v64, v0, 2, 6, [m0] ; C9001A00 v_interp_p2_f32 v64, [v64], v1, 2, 6, [m0] ; C9011A01 v_interp_p1_f32 v65, v0, 3, 6, [m0] ; C9041B00 v_interp_p2_f32 v65, [v65], v1, 3, 6, [m0] ; C9051B01 v_interp_p1_f32 v76, v0, 0, 7, [m0] ; C9301C00 v_interp_p2_f32 v76, [v76], v1, 0, 7, [m0] ; C9311C01 v_interp_p1_f32 v77, v0, 1, 7, [m0] ; C9341D00 v_interp_p2_f32 v77, [v77], v1, 1, 7, [m0] ; C9351D01 v_interp_p1_f32 v69, v0, 2, 7, [m0] ; C9141E00 v_interp_p2_f32 v69, [v69], v1, 2, 7, [m0] ; C9151E01 v_interp_p1_f32 v70, v0, 3, 7, [m0] ; C9181F00 v_interp_p2_f32 v70, [v70], v1, 3, 7, [m0] ; C9191F01 v_interp_p1_f32 v20, v0, 0, 8, [m0] ; C8502000 v_interp_p2_f32 v20, [v20], v1, 0, 8, [m0] ; C8512001 v_interp_p1_f32 v21, v0, 1, 8, [m0] ; C8542100 v_interp_p2_f32 v21, [v21], v1, 1, 8, [m0] ; C8552101 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 v_interp_p1_f32 v22, v0, 2, 8, [m0] ; C8582200 v_interp_p2_f32 v22, [v22], v1, 2, 8, [m0] ; C8592201 v_interp_p1_f32 v23, v0, 3, 8, [m0] ; C85C2300 v_interp_p2_f32 v23, [v23], v1, 3, 8, [m0] ; C85D2301 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s52, s[16:19], 0x5c ; C21A115C s_buffer_load_dword s64, s[16:19], 0x5e ; C220115E s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_load_dwordx8 s[20:27], s[6:7], 0x20 ; C0CA0720 s_load_dwordx4 s[0:3], s[4:5], 0x10 ; C0800510 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[74:75], s[44:51], s[40:43] ; F0800F00 014B184A s_buffer_load_dword s53, s[16:19], 0x5d ; C21A915D image_sample v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[44:51], s[40:43] ; F0800F00 014B1C40 image_sample v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[44:51], s[40:43] ; F0800F00 014B2016 s_buffer_load_dword s40, s[16:19], 0x62 ; C2141162 v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000 v_mul_f32_e32 v1, v0, v5 ; 10020B00 s_buffer_load_dword s41, s[16:19], 0x60 ; C2149160 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v38, s64 ; 7E4C0240 s_buffer_load_dword s42, s[16:19], 0x61 ; C2151161 v_mad_f32 v38, s52, v1, v38 ; D2820026 049A0234 v_mul_f32_e32 v39, v0, v4 ; 104E0900 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v40, s53, v39 ; 10504E35 v_mad_f32 v78, s53, -v39, v38 ; D282004E 449A4E35 v_mul_f32_e32 v38, s53, v1 ; 104C0235 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mad_f32 v40, s52, v0, -v40 ; D2820028 84A20034 v_mad_f32 v50, s52, v0, -v38 ; D2820032 849A0034 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v38, s40 ; 7E4C0228 v_mad_f32 v38, s41, v1, v38 ; D2820026 049A0229 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v79, s42, -v39, v38 ; D282004F 449A4E2A s_buffer_load_dword s43, s[16:19], 0x4d ; C215914D s_buffer_load_dword s44, s[16:19], 0x4c ; C216114C s_buffer_load_dword s45, s[16:19], 0x51 ; C2169151 s_buffer_load_dword s46, s[16:19], 0x50 ; C2171150 v_mul_f32_e32 v38, s42, v39 ; 104C4E2A v_mul_f32_e32 v39, s42, v1 ; 104E022A s_buffer_load_dword s42, s[16:19], 0x4e ; C215114E s_buffer_load_dword s47, s[16:19], 0x52 ; C2179152 v_mad_f32 v38, s41, v0, -v38 ; D2820026 849A0029 v_mad_f32 v51, s41, v0, -v39 ; D2820033 849E0029 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v39, s43, v1 ; 104E022B v_mad_f32 v39, s44, v0, -v39 ; D2820027 849E002C v_mul_f32_e32 v1, s45, v1 ; 1002022D v_mad_f32 v0, s46, v0, -v1 ; D2820000 8406002E v_add_f32_e32 v80, s64, v40 ; 06A05040 v_add_f32_e32 v81, s40, v38 ; 06A24C28 v_add_f32_e32 v82, s42, v39 ; 06A44E2A v_add_f32_e32 v83, s47, v0 ; 06A6002F image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[76:77], s[32:39], s[28:31] ; F0800F00 00E82A4C image_sample v[46:49], 15, 0, 0, 0, 0, 0, 0, 0, v[69:70], s[32:39], s[28:31] ; F0800F00 00E82E45 image_sample v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[82:83], s[32:39], s[28:31] ; F0800F00 00E82652 s_load_dwordx4 s[52:55], s[4:5], 0x4 ; C09A0504 s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 v_add_f32_e32 v0, s64, v50 ; 06006440 v_add_f32_e32 v1, s40, v51 ; 06026628 s_load_dwordx4 s[28:31], s[4:5], 0x20 ; C08E0520 image_sample v[54:57], 15, 0, 0, 0, 0, 0, 0, 0, v[78:79], s[20:27], s[0:3] ; F0800F00 0005364E s_load_dwordx8 s[32:39], s[6:7], 0x40 ; C0D00740 image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[80:81], s[20:27], s[0:3] ; F0800F00 00053A50 image_sample v[50:53], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[0:3] ; F0800F00 00053200 s_load_dwordx4 s[40:43], s[4:5], 0x1c ; C094051C s_load_dwordx8 s[44:51], s[6:7], 0x38 ; C0D60738 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 image_sample v[66:68], 7, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[56:63], s[52:55] ; F0800700 01AE4224 image_sample v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[56:63], s[52:55] ; F0800700 01AE473E image_sample v[84:86], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[56:63], s[52:55] ; F0800700 01AE5414 s_load_dwordx4 s[0:3], s[4:5], 0x24 ; C0800524 s_load_dwordx8 s[20:27], s[6:7], 0x48 ; C0CA0748 s_waitcnt vmcnt(5) ; BF8C0775 image_sample v[87:88], 5, 0, 0, 0, 0, 0, 0, 0, v[78:79], s[32:39], s[28:31] ; F0800500 00E8574E image_sample v[89:90], 5, 0, 0, 0, 0, 0, 0, 0, v[80:81], s[32:39], s[28:31] ; F0800500 00E85950 image_sample v[91:92], 5, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[32:39], s[28:31] ; F0800500 00E85B00 s_load_dwordx4 s[64:67], s[4:5], 0x18 ; C0A00518 s_waitcnt vmcnt(6) ; BF8C0776 image_sample v[93:94], 5, 0, 0, 0, 0, 0, 0, 0, v[76:77], s[44:51], s[40:43] ; F0800500 014B5D4C image_sample v[95:96], 5, 0, 0, 0, 0, 0, 0, 0, v[69:70], s[44:51], s[40:43] ; F0800500 014B5F45 s_load_dwordx8 s[68:75], s[6:7], 0x30 ; C0E20730 image_sample v[97:98], 5, 0, 0, 0, 0, 0, 0, 0, v[82:83], s[44:51], s[40:43] ; F0800500 014B6152 s_load_dwordx4 s[76:79], s[4:5], 0x14 ; C0A60514 s_load_dwordx8 s[80:87], s[6:7], 0x28 ; C0E80728 s_load_dwordx4 s[40:43], s[4:5], 0x2c ; C094052C s_load_dwordx4 s[52:55], s[4:5], 0x30 ; C09A0530 s_load_dwordx8 s[56:63], s[6:7], 0x60 ; C0DC0760 s_load_dwordx8 s[44:51], s[6:7], 0x58 ; C0D60758 s_load_dwordx4 s[28:31], s[4:5], 0x28 ; C08E0528 s_load_dwordx8 s[32:39], s[6:7], 0x50 ; C0D00750 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample v[99:100], 5, 0, 0, 0, 0, 0, 0, 0, v[74:75], s[68:75], s[64:67] ; F0800500 0211634A image_sample v[101:102], 5, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[68:75], s[64:67] ; F0800500 02116540 image_sample v[103:104], 5, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[68:75], s[64:67] ; F0800500 02116716 s_waitcnt vmcnt(3) ; BF8C0773 image_sample v[105:106], 5, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[80:87], s[76:79] ; F0800500 02746924 image_sample v[107:108], 5, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[80:87], s[76:79] ; F0800500 02746B3E image_sample v[109:110], 5, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[80:87], s[76:79] ; F0800500 02746D14 image_sample v[78:79], 10, 0, 0, 0, 0, 0, 0, 0, v[78:79], s[56:63], s[52:55] ; F0800A00 01AE4E4E image_sample v[76:77], 10, 0, 0, 0, 0, 0, 0, 0, v[76:77], s[44:51], s[40:43] ; F0800A00 014B4C4C image_sample v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[20:27], s[0:3] ; F0800A00 00052424 image_sample v[74:75], 10, 0, 0, 0, 0, 0, 0, 0, v[74:75], s[32:39], s[28:31] ; F0800A00 00E84A4A image_sample v[80:81], 10, 0, 0, 0, 0, 0, 0, 0, v[80:81], s[56:63], s[52:55] ; F0800A00 01AE5050 s_buffer_load_dword s64, s[8:11], 0x30 ; C2200930 image_sample v[69:70], 10, 0, 0, 0, 0, 0, 0, 0, v[69:70], s[44:51], s[40:43] ; F0800A00 014B4545 s_buffer_load_dword s65, s[8:11], 0x31 ; C2208931 s_buffer_load_dword s66, s[8:11], 0x32 ; C2210932 s_buffer_load_dword s67, s[8:11], 0x33 ; C2218933 image_sample v[62:63], 10, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[20:27], s[0:3] ; F0800A00 00053E3E image_sample v[64:65], 10, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[32:39], s[28:31] ; F0800A00 00E84040 s_buffer_load_dword s68, s[8:11], 0x34 ; C2220934 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[56:63], s[52:55] ; F0800A00 01AE0000 image_sample v[82:83], 10, 0, 0, 0, 0, 0, 0, 0, v[82:83], s[44:51], s[40:43] ; F0800A00 014B5252 s_waitcnt vmcnt(5) lgkmcnt(0) ; BF8C0075 v_mul_f32_e32 v111, s64, v3 ; 10DE0640 s_buffer_load_dword s40, s[8:11], 0x35 ; C2140935 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v111, s65, v5 ; 3EDE0A41 v_mac_f32_e32 v111, s66, v4 ; 3EDE0842 v_add_f32_e32 v111, s67, v111 ; 06DEDE43 s_buffer_load_dword s41, s[8:11], 0x36 ; C2148936 s_buffer_load_dword s42, s[8:11], 0x37 ; C2150937 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v112, s68, v3 ; 10E00644 s_buffer_load_dword s43, s[8:11], 0x3c ; C215893C s_buffer_load_dword s44, s[8:11], 0x3d ; C216093D s_buffer_load_dword s45, s[8:11], 0x3e ; C216893E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mac_f32_e32 v112, s40, v5 ; 3EE00A28 s_buffer_load_dword s40, s[8:11], 0x38 ; C2140938 s_buffer_load_dword s46, s[8:11], 0x39 ; C2170939 s_buffer_load_dword s47, s[8:11], 0x3a ; C217893A v_mac_f32_e32 v112, s41, v4 ; 3EE00829 v_add_f32_e32 v112, s42, v112 ; 06E0E02A s_buffer_load_dword s41, s[8:11], 0x3f ; C214893F v_mul_f32_e32 v113, s43, v3 ; 10E2062B v_mac_f32_e32 v113, s44, v5 ; 3EE20A2C v_mac_f32_e32 v113, s45, v4 ; 3EE2082D v_add_f32_e32 v19, v19, v19 ; 06262713 v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680 v_sub_f32_e32 v114, 1.0, v19 ; 08E426F2 v_mul_f32_e32 v24, v24, v114 ; 1030E518 v_mul_f32_e32 v25, v25, v114 ; 1032E519 v_mul_f32_e32 v26, v26, v114 ; 1034E51A v_mul_f32_e32 v27, v27, v114 ; 1036E51B v_mac_f32_e32 v24, v28, v19 ; 3E30271C v_mac_f32_e32 v25, v29, v19 ; 3E32271D v_mac_f32_e32 v26, v30, v19 ; 3E34271E v_mac_f32_e32 v27, v31, v19 ; 3E36271F v_mul_f32_e32 v28, v42, v114 ; 1038E52A v_mul_f32_e32 v29, v43, v114 ; 103AE52B v_mul_f32_e32 v30, v44, v114 ; 103CE52C v_mul_f32_e32 v31, v45, v114 ; 103EE52D v_mac_f32_e32 v28, v46, v19 ; 3E38272E v_mac_f32_e32 v29, v47, v19 ; 3E3A272F v_mac_f32_e32 v30, v48, v19 ; 3E3C2730 v_mac_f32_e32 v31, v49, v19 ; 3E3E2731 v_mul_f32_e32 v42, v54, v114 ; 1054E536 v_mul_f32_e32 v43, v55, v114 ; 1056E537 v_mul_f32_e32 v44, v56, v114 ; 1058E538 v_mul_f32_e32 v45, v57, v114 ; 105AE539 v_mac_f32_e32 v42, v58, v19 ; 3E54273A v_mac_f32_e32 v43, v59, v19 ; 3E56273B v_mac_f32_e32 v44, v60, v19 ; 3E58273C v_mac_f32_e32 v45, v61, v19 ; 3E5A273D v_add_f32_e32 v18, v18, v18 ; 06242512 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_sub_f32_e32 v46, 1.0, v18 ; 085C24F2 v_mad_f32 v47, v46, v27, -v17 ; D282002F 8446372E v_mac_f32_e32 v17, v27, v46 ; 3E225D1B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v27, s41, v113 ; 0636E229 v_mul_f32_e32 v24, v24, v46 ; 10305D18 v_mac_f32_e32 v24, v32, v18 ; 3E302520 v_mul_f32_e32 v25, v25, v46 ; 10325D19 v_mac_f32_e32 v25, v33, v18 ; 3E322521 s_buffer_load_dword s41, s[8:11], 0x3b ; C214893B v_mul_f32_e32 v26, v26, v46 ; 10345D1A v_mul_f32_e32 v32, s40, v3 ; 10400628 s_buffer_load_dword s40, s[8:11], 0x40 ; C2140940 v_mac_f32_e32 v26, v34, v18 ; 3E342522 v_mac_f32_e32 v47, v35, v18 ; 3E5E2523 v_mac_f32_e32 v17, v35, v18 ; 3E222523 v_rcp_f32_e32 v27, v27 ; 7E36551B v_mac_f32_e32 v32, s46, v5 ; 3E400A2E v_mac_f32_e32 v32, s47, v4 ; 3E40082F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v32, s41, v32 ; 06404029 v_mul_f32_e32 v32, v27, v32 ; 1040411B v_add_f32_e64 v32, 0, -v32 clamp ; D2060820 40024080 v_mad_f32 v33, v111, v27, s40 ; D2820021 00A2376F v_mad_f32 v34, v112, v27, s40 ; D2820022 00A23770 v_mov_b32_e32 v35, 0 ; 7E460280 v_mad_f32 v48, v46, v31, -v16 ; D2820030 84423F2E v_mad_f32 v49, v111, v27, -s40 ; D2820031 80A2376F v_mov_b32_e32 v54, v32 ; 7E6C0320 v_mov_b32_e32 v55, v33 ; 7E6E0321 v_mov_b32_e32 v56, v34 ; 7E700322 v_mov_b32_e32 v57, v35 ; 7E720323 v_mac_f32_e32 v16, v31, v46 ; 3E205D1F v_mov_b32_e32 v55, v49 ; 7E6E0331 image_sample v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[32:39], s[28:31] ; F0800A00 00E81616 v_mul_f32_e32 v28, v28, v46 ; 10385D1C s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 v_mac_f32_e32 v28, v38, v18 ; 3E382526 v_mul_f32_e32 v29, v29, v46 ; 103A5D1D v_mac_f32_e32 v29, v39, v18 ; 3E3A2527 v_mul_f32_e32 v30, v30, v46 ; 103C5D1E v_mac_f32_e32 v30, v40, v18 ; 3E3C2528 v_mac_f32_e32 v48, v41, v18 ; 3E602529 v_mad_f32 v31, v112, v27, -s40 ; D282001F 80A23770 v_mov_b32_e32 v58, v32 ; 7E740320 v_mov_b32_e32 v59, v33 ; 7E760321 v_mov_b32_e32 v60, v34 ; 7E780322 v_mov_b32_e32 v61, v35 ; 7E7A0323 v_mac_f32_e32 v16, v41, v18 ; 3E202529 v_mov_b32_e32 v56, v34 ; 7E700322 v_mov_b32_e32 v60, v31 ; 7E78031F image_sample v[20:21], 10, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[20:27], s[0:3] ; F0800A00 00051414 v_mov_b32_e32 v57, v35 ; 7E720323 v_mov_b32_e32 v61, v35 ; 7E7A0323 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 image_sample_c_l v38, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[32:39], s[28:31] ; F0B00100 00E82620 image_sample_c_l v39, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[32:39], s[28:31] ; F0B00100 00E82736 image_sample_c_l v40, 1, 0, 0, 0, 0, 0, 0, 0, v[58:61], s[32:39], s[28:31] ; F0B00100 00E8283A v_mad_f32 v41, v46, v45, -v14 ; D2820029 843A5B2E v_mac_f32_e32 v14, v45, v46 ; 3E1C5D2D v_mul_f32_e32 v45, v27, v111 ; 105ADF1B v_mul_f32_e32 v27, v27, v112 ; 1036E11B v_mul_f32_e32 v42, v42, v46 ; 10545D2A v_mac_f32_e32 v42, v50, v18 ; 3E542532 v_mul_f32_e32 v43, v43, v46 ; 10565D2B v_mac_f32_e32 v43, v51, v18 ; 3E562533 v_mul_f32_e32 v44, v44, v46 ; 10585D2C v_mac_f32_e32 v44, v52, v18 ; 3E582534 v_mov_b32_e32 v56, v31 ; 7E70031F v_mov_b32_e32 v49, v32 ; 7E620320 v_mov_b32_e32 v50, v33 ; 7E640321 v_mov_b32_e32 v51, v34 ; 7E660322 v_mov_b32_e32 v52, v35 ; 7E680323 v_mac_f32_e32 v41, v53, v18 ; 3E522535 v_mov_b32_e32 v57, v35 ; 7E720323 v_mov_b32_e32 v51, v27 ; 7E66031B image_sample_c_l v58, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[32:39], s[28:31] ; F0B00100 00E83A36 v_mov_b32_e32 v56, v27 ; 7E70031B v_mac_f32_e32 v14, v53, v18 ; 3E1C2535 v_mov_b32_e32 v52, v35 ; 7E680323 v_mov_b32_e32 v57, v35 ; 7E720323 v_mov_b32_e32 v33, v45 ; 7E42032D v_mov_b32_e32 v115, v32 ; 7EE60320 v_mov_b32_e32 v116, v33 ; 7EE80321 v_mov_b32_e32 v117, v34 ; 7EEA0322 v_mov_b32_e32 v118, v35 ; 7EEC0323 image_sample_c_l v45, 1, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[32:39], s[28:31] ; F0B00100 00E82D31 v_mov_b32_e32 v117, v31 ; 7EEA031F image_sample_c_l v31, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[32:39], s[28:31] ; F0B00100 00E81F36 v_mov_b32_e32 v118, v35 ; 7EEC0323 image_sample_c_l v49, 1, 0, 0, 0, 0, 0, 0, 0, v[115:118], s[32:39], s[28:31] ; F0B00100 00E83173 image_sample_c_l v50, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[32:39], s[28:31] ; F0B00100 00E83220 v_mov_b32_e32 v34, v27 ; 7E44031B image_sample_c_l v27, 1, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[32:39], s[28:31] ; F0B00100 00E81B20 v_mul_f32_e32 v32, v66, v114 ; 1040E542 v_mul_f32_e32 v33, v67, v114 ; 1042E543 v_mul_f32_e32 v34, v68, v114 ; 1044E544 v_mac_f32_e32 v32, v71, v19 ; 3E402747 v_mac_f32_e32 v33, v72, v19 ; 3E422748 v_mac_f32_e32 v34, v73, v19 ; 3E442749 v_mul_f32_e32 v32, v32, v46 ; 10405D20 v_mac_f32_e32 v32, v84, v18 ; 3E402554 v_mul_f32_e32 v33, v33, v46 ; 10425D21 v_mac_f32_e32 v33, v85, v18 ; 3E422555 v_mul_f32_e32 v34, v34, v46 ; 10445D22 v_mac_f32_e32 v34, v86, v18 ; 3E442556 v_mul_f32_e32 v35, v87, v114 ; 1046E557 v_mul_f32_e32 v51, v88, v114 ; 1066E558 v_mac_f32_e32 v35, v89, v19 ; 3E462759 v_mac_f32_e32 v51, v90, v19 ; 3E66275A v_mul_f32_e32 v35, v35, v46 ; 10465D23 v_mac_f32_e32 v35, v91, v18 ; 3E46255B v_mul_f32_e32 v51, v51, v46 ; 10665D33 v_mac_f32_e32 v51, v92, v18 ; 3E66255C v_mul_f32_e32 v52, v93, v114 ; 1068E55D v_mul_f32_e32 v53, v94, v114 ; 106AE55E v_mac_f32_e32 v52, v95, v19 ; 3E68275F v_mac_f32_e32 v53, v96, v19 ; 3E6A2760 v_mul_f32_e32 v52, v52, v46 ; 10685D34 v_mac_f32_e32 v52, v97, v18 ; 3E682561 v_mul_f32_e32 v53, v53, v46 ; 106A5D35 v_mac_f32_e32 v53, v98, v18 ; 3E6A2562 v_mul_f32_e32 v54, v99, v114 ; 106CE563 v_mul_f32_e32 v55, v100, v114 ; 106EE564 v_mac_f32_e32 v54, v101, v19 ; 3E6C2765 v_mac_f32_e32 v55, v102, v19 ; 3E6E2766 v_mul_f32_e32 v54, v54, v46 ; 106C5D36 v_mac_f32_e32 v54, v103, v18 ; 3E6C2567 v_mul_f32_e32 v55, v55, v46 ; 106E5D37 v_mac_f32_e32 v55, v104, v18 ; 3E6E2568 v_mul_f32_e32 v56, v105, v114 ; 1070E569 v_mul_f32_e32 v57, v106, v114 ; 1072E56A v_mac_f32_e32 v56, v107, v19 ; 3E70276B v_mac_f32_e32 v57, v108, v19 ; 3E72276C v_max_f32_e32 v47, 0, v47 ; 205E5E80 v_min_f32_e32 v17, 1.0, v17 ; 1E2222F2 v_subrev_f32_e32 v17, v47, v17 ; 0A22232F v_rcp_f32_e32 v17, v17 ; 7E225511 v_mul_f32_e32 v56, v56, v46 ; 10705D38 v_mac_f32_e32 v56, v109, v18 ; 3E70256D v_mul_f32_e32 v57, v57, v46 ; 10725D39 v_mac_f32_e32 v57, v110, v18 ; 3E72256E v_subrev_f32_e32 v15, v47, v15 ; 0A1E1F2F v_mul_f32_e32 v15, v17, v15 ; 101E1F11 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mov_b32_e32 v17, 0x40400000 ; 7E2202FF 40400000 v_mad_f32 v47, -2.0, v15, v17 ; D282002F 04461EF5 v_mul_f32_e32 v47, v47, v15 ; 105E1F2F v_max_f32_e32 v48, 0, v48 ; 20606080 v_min_f32_e32 v16, 1.0, v16 ; 1E2020F2 v_subrev_f32_e32 v16, v48, v16 ; 0A202130 v_rcp_f32_e32 v16, v16 ; 7E205510 v_mul_f32_e32 v59, v47, v15 ; 10761F2F v_mad_f32 v15, -v15, v47, 1.0 ; D282000F 23CA5F0F v_subrev_f32_e32 v13, v48, v13 ; 0A1A1B30 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_max_f32_e32 v16, 0, v41 ; 20205280 v_min_f32_e32 v14, 1.0, v14 ; 1E1C1CF2 v_subrev_f32_e32 v14, v16, v14 ; 0A1C1D10 v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mad_f32 v41, -2.0, v13, v17 ; D2820029 04461AF5 v_mul_f32_e32 v41, v41, v13 ; 10521B29 v_mul_f32_e32 v13, v41, v13 ; 101A1B29 v_subrev_f32_e32 v12, v16, v12 ; 0A181910 v_mul_f32_e32 v12, v14, v12 ; 1018190E v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mac_f32_e32 v17, -2.0, v12 ; 3E2218F5 v_mul_f32_e32 v14, v17, v12 ; 101C1911 v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mul_f32_e32 v14, v75, v59 ; 101C774B v_mul_f32_e32 v16, v74, v59 ; 1020774A v_max_f32_e32 v13, 0, v13 ; 201A1A80 v_min_f32_e32 v13, v15, v13 ; 1E1A1B0F v_subrev_f32_e32 v15, v13, v15 ; 0A1E1F0D v_max_f32_e32 v12, 0, v12 ; 20181880 v_min_f32_e32 v12, v15, v12 ; 1E18190F v_subrev_f32_e32 v15, v12, v15 ; 0A1E1F0C v_mac_f32_e32 v14, v37, v15 ; 3E1C1F25 v_mac_f32_e32 v16, v36, v15 ; 3E201F24 s_buffer_load_dword s0, s[16:19], 0x8 ; C2001108 v_mul_f32_e32 v17, v65, v59 ; 10227741 v_mul_f32_e32 v36, v64, v59 ; 10487740 v_mac_f32_e32 v17, v63, v15 ; 3E221F3F s_buffer_load_dword s1, s[16:19], 0x4 ; C2009104 v_mac_f32_e32 v36, v62, v15 ; 3E481F3E s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v21, v21, v15 ; 102A1F15 v_mul_f32_e32 v20, v20, v15 ; 10281F14 v_mac_f32_e32 v21, v23, v59 ; 3E2A7717 v_mac_f32_e32 v20, v22, v59 ; 3E287716 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v22, s0, v24 ; 102C3000 s_buffer_load_dword s0, s[16:19], 0x9 ; C2001109 s_buffer_load_dword s2, s[16:19], 0x5 ; C2011105 s_buffer_load_dword s3, s[16:19], 0x6 ; C2019106 v_mul_f32_e32 v23, s1, v32 ; 102E4001 s_buffer_load_dword s1, s[16:19], 0xa ; C200910A v_mul_f32_e32 v23, v15, v23 ; 102E2F0F v_mac_f32_e32 v23, v59, v22 ; 3E2E2D3B s_buffer_load_dword s4, s[16:19], 0xc ; C202110C s_buffer_load_dword s5, s[16:19], 0xd ; C202910D s_buffer_load_dword s6, s[16:19], 0xe ; C203110E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v22, s0, v25 ; 102C3200 v_mul_f32_e32 v24, s2, v33 ; 10304202 v_mul_f32_e32 v24, v15, v24 ; 1030310F v_mac_f32_e32 v24, v59, v22 ; 3E302D3B v_mul_f32_e32 v22, s1, v26 ; 102C3401 v_mul_f32_e32 v25, s3, v34 ; 10324403 v_mul_f32_e32 v25, v15, v25 ; 1032330F v_mac_f32_e32 v25, v59, v22 ; 3E322D3B v_mul_f32_e32 v22, v56, v15 ; 102C1F38 v_mul_f32_e32 v15, v57, v15 ; 101E1F39 v_mac_f32_e32 v22, v54, v59 ; 3E2C7736 v_mac_f32_e32 v15, v55, v59 ; 3E1E7737 v_mac_f32_e32 v14, v77, v13 ; 3E1C1B4D v_mac_f32_e32 v16, v76, v13 ; 3E201B4C v_mac_f32_e32 v17, v70, v13 ; 3E221B46 v_mac_f32_e32 v36, v69, v13 ; 3E481B45 v_mac_f32_e32 v21, v83, v13 ; 3E2A1B53 v_mac_f32_e32 v20, v82, v13 ; 3E281B52 v_mul_f32_e32 v26, s4, v28 ; 10343804 v_mac_f32_e32 v23, v26, v13 ; 3E2E1B1A v_mul_f32_e32 v26, s5, v29 ; 10343A05 v_mac_f32_e32 v24, v26, v13 ; 3E301B1A v_mul_f32_e32 v26, s6, v30 ; 10343C06 v_mac_f32_e32 v25, v26, v13 ; 3E321B1A v_mac_f32_e32 v22, v52, v13 ; 3E2C1B34 v_mac_f32_e32 v15, v53, v13 ; 3E1E1B35 v_mac_f32_e32 v14, v79, v12 ; 3E1C194F s_buffer_load_dword s0, s[16:19], 0x10 ; C2001110 v_mac_f32_e32 v16, v78, v12 ; 3E20194E s_buffer_load_dword s1, s[16:19], 0x11 ; C2009111 v_mac_f32_e32 v17, v81, v12 ; 3E221951 s_buffer_load_dword s2, s[16:19], 0x12 ; C2011112 v_mac_f32_e32 v36, v80, v12 ; 3E481950 v_mac_f32_e32 v21, v1, v12 ; 3E2A1901 v_mac_f32_e32 v20, v0, v12 ; 3E281900 s_buffer_load_dword s3, s[16:19], 0x38 ; C2019138 s_buffer_load_dword s4, s[16:19], 0x39 ; C2021139 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v42 ; 10005400 v_mac_f32_e32 v23, v12, v0 ; 3E2E010C v_mul_f32_e32 v0, s1, v43 ; 10005601 v_mac_f32_e32 v24, v12, v0 ; 3E30010C v_mul_f32_e32 v0, s2, v44 ; 10005802 v_mul_f32_e32 v1, v9, v9 ; 10021309 v_mac_f32_e32 v1, v10, v10 ; 3E02150A v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mac_f32_e32 v25, v12, v0 ; 3E32010C v_mac_f32_e32 v22, v35, v12 ; 3E2C1923 v_mac_f32_e32 v15, v51, v12 ; 3E1E1933 v_mul_f32_e32 v0, v1, v9 ; 10001301 v_mul_f32_e32 v9, v1, v10 ; 10121501 v_mul_f32_e32 v1, v1, v11 ; 10021701 s_buffer_load_dword s2, s[16:19], 0x0 ; C2011100 v_mul_f32_e32 v10, v0, v9 ; 10141300 v_mad_f32 v11, -v9, v9, 1.0 ; D282000B 23CA1309 v_mul_f32_e32 v12, v10, v10 ; 1018150A v_mac_f32_e32 v12, v11, v11 ; 3E18170B v_mul_f32_e32 v13, v1, v9 ; 101A1301 v_mac_f32_e32 v12, v13, v13 ; 3E181B0D v_sqrt_f32_e32 v12, v12 ; 7E18670C v_mov_b32_e32 v26, 0x3727c5ac ; 7E3402FF 3727C5AC v_cmp_lt_f32_e32 vcc, v12, v26 ; 7C02350C v_mad_f32 v12, 2.0, v14, -1.0 ; D282000C 03CE1CF4 v_mad_f32 v14, 2.0, v16, -1.0 ; D282000E 03CE20F4 v_mad_f32 v16, -v14, v14, 1.0 ; D2820010 23CA1D0E v_mad_f32 v16, -v12, v12, v16 ; D2820010 2442190C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s2, v12 ; 10181802 v_mul_f32_e32 v14, s2, v14 ; 101C1C02 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sqrt_f32_e32 v16, v16 ; 7E206710 v_mul_f32_e32 v28, v12, v12 ; 1038190C v_mac_f32_e32 v28, v14, v14 ; 3E381D0E v_mac_f32_e32 v28, v16, v16 ; 3E382110 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e64 v29, -v1, v0 ; D210001D 20020101 v_mul_f32_e64 v30, -v1, v9 ; D210001E 20021301 v_mul_f32_e32 v32, v29, v29 ; 10403B1D v_mac_f32_e32 v32, v30, v30 ; 3E403D1E v_mad_f32 v33, v1, v1, -1.0 ; D2820021 03CE0301 v_mac_f32_e32 v32, v33, v33 ; 3E404321 v_sqrt_f32_e32 v32, v32 ; 7E406720 v_cmp_lt_f32_e64 s[0:1], v32, v26 ; D0020000 00023520 v_mul_f32_e32 v12, v28, v12 ; 1018191C v_mul_f32_e32 v14, v28, v14 ; 101C1D1C v_mov_b32_e32 v32, 0x80000000 ; 7E4002FF 80000000 v_xor_b32_e32 v29, v29, v32 ; 3A3A411D v_cndmask_b32_e64 v29, v29, 0, s[0:1] ; D200001D 0001011D v_xor_b32_e32 v34, v10, v32 ; 3A44410A v_cndmask_b32_e64 v35, v34, 0, vcc ; D2000023 01A90122 v_mul_f32_e32 v37, v14, v29 ; 104A3B0E v_mad_f32 v35, v35, v12, -v37 ; D2820023 84961923 v_cndmask_b32_e64 v11, v11, 1.0, vcc ; D200000B 01A9E50B v_xor_b32_e32 v30, v30, v32 ; 3A3C411E v_cndmask_b32_e64 v30, v30, 0, s[0:1] ; D200001E 0001011E v_mul_f32_e32 v37, v14, v30 ; 104A3D0E v_mad_f32 v11, v11, v12, -v37 ; D282000B 8496190B v_xor_b32_e32 v13, v13, v32 ; 3A1A410D v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_cndmask_b32_e64 v33, v33, -1.0, s[0:1] ; D2000021 0001E721 v_mul_f32_e32 v14, v14, v33 ; 101C430E v_mad_f32 v12, v13, v12, -v14 ; D282000C 843A190D v_mad_f32 v13, -v0, v0, 1.0 ; D282000D 23CA0100 v_mul_f32_e32 v14, v13, v13 ; 101C1B0D v_mac_f32_e32 v14, v10, v10 ; 3E1C150A v_mul_f32_e32 v10, v1, v0 ; 10140101 v_mac_f32_e32 v14, v10, v10 ; 3E1C150A v_sqrt_f32_e32 v14, v14 ; 7E1C670E v_cmp_lt_f32_e32 vcc, v14, v26 ; 7C02350E v_mul_f32_e64 v14, -v9, v0 ; D210000E 20020109 v_mad_f32 v37, v9, v9, -1.0 ; D2820025 03CE1309 v_mul_f32_e32 v41, v14, v14 ; 10521D0E v_mac_f32_e32 v41, v37, v37 ; 3E524B25 v_mul_f32_e64 v42, -v9, v1 ; D210002A 20020309 v_mac_f32_e32 v41, v42, v42 ; 3E52552A v_sqrt_f32_e32 v41, v41 ; 7E526729 v_cmp_lt_f32_e64 s[0:1], v41, v26 ; D0020000 00023529 v_mul_f32_e32 v16, v28, v16 ; 1020211C v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mad_f32 v26, 2.0, v36, -1.0 ; D282001A 03CE48F4 v_mad_f32 v28, -v26, v26, 1.0 ; D282001C 23CA351A v_mad_f32 v28, -v17, v17, v28 ; D282001C 24722311 v_mul_f32_e32 v17, s2, v17 ; 10222202 v_mul_f32_e32 v26, s2, v26 ; 10343402 v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 v_sqrt_f32_e32 v28, v28 ; 7E38671C v_mul_f32_e32 v36, v17, v17 ; 10482311 v_mac_f32_e32 v36, v26, v26 ; 3E48351A v_mac_f32_e32 v36, v28, v28 ; 3E48391C v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mac_f32_e32 v35, v16, v0 ; 3E460110 v_mac_f32_e32 v11, v16, v9 ; 3E161310 v_mac_f32_e32 v12, v16, v1 ; 3E180310 v_mul_f32_e32 v16, v36, v17 ; 10202324 v_mul_f32_e32 v17, v36, v26 ; 10223524 v_mul_f32_e32 v26, v36, v28 ; 10343924 v_mul_f32_e32 v28, v17, v29 ; 10383B11 v_mul_f32_e32 v29, v17, v30 ; 103A3D11 v_mul_f32_e32 v17, v17, v33 ; 10224311 v_xor_b32_e32 v10, v10, v32 ; 3A14410A v_cndmask_b32_e64 v13, v13, 1.0, vcc ; D200000D 01A9E50D v_cndmask_b32_e64 v30, v34, 0, vcc ; D200001E 01A90122 v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_mad_f32 v28, v13, v16, -v28 ; D282001C 8472210D v_mad_f32 v29, v30, v16, -v29 ; D282001D 8476211E v_mad_f32 v16, v10, v16, -v17 ; D2820010 8446210A v_mac_f32_e32 v28, v26, v0 ; 3E38011A v_mac_f32_e32 v29, v26, v9 ; 3E3A131A v_mac_f32_e32 v16, v26, v1 ; 3E20031A v_mul_f32_e32 v17, v35, v114 ; 1022E523 v_mac_f32_e32 v17, v28, v19 ; 3E22271C v_mul_f32_e32 v11, v11, v114 ; 1016E50B v_mac_f32_e32 v11, v29, v19 ; 3E16271D v_mul_f32_e32 v12, v12, v114 ; 1018E50C v_mac_f32_e32 v12, v16, v19 ; 3E182710 v_xor_b32_e32 v14, v14, v32 ; 3A1C410E v_xor_b32_e32 v16, v42, v32 ; 3A20412A v_mad_f32 v19, 2.0, v21, -1.0 ; D2820013 03CE2AF4 v_mad_f32 v20, 2.0, v20, -1.0 ; D2820014 03CE28F4 v_mad_f32 v21, -v20, v20, 1.0 ; D2820015 23CA2914 v_mad_f32 v21, -v19, v19, v21 ; D2820015 24562713 v_mul_f32_e32 v19, s2, v19 ; 10262602 v_mul_f32_e32 v20, s2, v20 ; 10282802 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v26, v19, v19 ; 10342713 v_mac_f32_e32 v26, v20, v20 ; 3E342914 v_mac_f32_e32 v26, v21, v21 ; 3E342B15 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_cndmask_b32_e64 v14, v14, 0, s[0:1] ; D200000E 0001010E v_cndmask_b32_e64 v28, v37, -1.0, s[0:1] ; D200001C 0001E725 v_cndmask_b32_e64 v16, v16, 0, s[0:1] ; D2000010 00010110 v_mul_f32_e32 v20, v26, v20 ; 1028291A v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_mul_f32_e32 v28, v20, v28 ; 10383914 v_mul_f32_e32 v16, v20, v16 ; 10202114 v_mul_f32_e32 v19, v26, v19 ; 1026271A v_mad_f32 v13, v13, v19, -v14 ; D282000D 843A270D v_mad_f32 v14, v30, v19, -v28 ; D282000E 8472271E v_mad_f32 v10, v10, v19, -v16 ; D282000A 8442270A v_mul_f32_e32 v16, v26, v21 ; 10202B1A v_mac_f32_e32 v13, v16, v0 ; 3E1A0110 v_mac_f32_e32 v14, v16, v9 ; 3E1C1310 v_mac_f32_e32 v10, v16, v1 ; 3E140310 v_mul_f32_e32 v0, v17, v46 ; 10005D11 v_mac_f32_e32 v0, v13, v18 ; 3E00250D v_mul_f32_e32 v1, v11, v46 ; 10025D0B v_mac_f32_e32 v1, v14, v18 ; 3E02250E s_buffer_load_dword s0, s[12:15], 0x4c ; C2000D4C s_buffer_load_dword s1, s[12:15], 0x4d ; C2008D4D v_mul_f32_e32 v9, v12, v46 ; 10125D0C v_mac_f32_e32 v9, v10, v18 ; 3E12250A s_buffer_load_dword s2, s[12:15], 0x4e ; C2010D4E v_mov_b32_e32 v10, 0x3d996887 ; 7E1402FF 3D996887 v_mul_f32_e32 v11, v10, v39 ; 10164F0A v_mac_f32_e32 v11, v10, v38 ; 3E164D0A v_mac_f32_e32 v11, v10, v40 ; 3E16510A v_mac_f32_e32 v11, v10, v58 ; 3E16750A s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v10, s0, v3 ; 0A140600 v_subrev_f32_e32 v12, s1, v5 ; 0A180A01 v_mul_f32_e32 v13, v10, v10 ; 101A150A v_mac_f32_e32 v13, v12, v12 ; 3E1A190C v_subrev_f32_e32 v14, s2, v4 ; 0A1C0802 v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mov_b32_e32 v16, 0x3dfd1fe6 ; 7E2002FF 3DFD1FE6 v_mul_f32_e32 v17, v16, v31 ; 10223F10 v_mac_f32_e32 v17, v16, v45 ; 3E225B10 v_mac_f32_e32 v17, v16, v49 ; 3E226310 v_mac_f32_e32 v17, v16, v50 ; 3E226510 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mul_f32_e32 v10, v0, v10 ; 10141500 v_mad_f32 v10, -v12, v1, -v10 ; D282000A A42A030C v_mul_f32_e32 v12, v13, v14 ; 10181D0D v_mad_f32 v10, -v12, v9, v10 ; D282000A 242A130C v_mul_f32_e32 v12, v0, v10 ; 10181500 v_mac_f32_e32 v12, v0, v10 ; 3E181500 v_mul_f32_e32 v14, v1, v10 ; 101C1501 v_mac_f32_e32 v14, v1, v10 ; 3E1C1501 s_buffer_load_dword s5, s[8:11], 0x17 ; C2028917 s_buffer_load_dword s6, s[8:11], 0x18 ; C2030918 s_buffer_load_dword s7, s[8:11], 0x19 ; C2038919 s_buffer_load_dword s16, s[8:11], 0x1a ; C208091A v_mul_f32_e32 v16, v9, v10 ; 10201509 v_mac_f32_e32 v16, v9, v10 ; 3E201509 v_sub_f32_e32 v10, s0, v3 ; 08140600 v_mad_f32 v10, v10, v13, -v12 ; D282000A 84321B0A v_sub_f32_e32 v12, s1, v5 ; 08180A01 v_mad_f32 v12, v12, v13, -v14 ; D282000C 843A1B0C v_sub_f32_e32 v14, s2, v4 ; 081C0802 v_mad_f32 v13, v14, v13, -v16 ; D282000D 84421B0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s6 ; 7E1C0206 v_mul_f32_e32 v14, s5, v14 ; 101C1C05 v_mov_b32_e32 v16, s7 ; 7E200207 v_mul_f32_e32 v16, s5, v16 ; 10202005 v_mov_b32_e32 v18, s16 ; 7E240210 v_mul_f32_e32 v18, s5, v18 ; 10242405 v_mul_f32_e32 v19, s4, v15 ; 10261E04 v_mad_f32 v15, -v15, s4, 1.0 ; D282000F 23C8090F v_mov_b32_e32 v20, 0x3e59999a ; 7E2802FF 3E59999A v_mul_f32_e32 v21, v20, v14 ; 102A1D14 v_mov_b32_e32 v26, 0x3f372474 ; 7E3402FF 3F372474 v_mac_f32_e32 v21, v26, v16 ; 3E2A211A s_buffer_load_dword s0, s[8:11], 0x13 ; C2000913 s_buffer_load_dword s1, s[8:11], 0x14 ; C2008914 s_buffer_load_dword s2, s[8:11], 0x15 ; C2010915 v_mov_b32_e32 v28, 0x3d93a92a ; 7E3802FF 3D93A92A v_mac_f32_e32 v21, v28, v18 ; 3E2A251C s_buffer_load_dword s4, s[8:11], 0x16 ; C2020916 v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mac_f32_e32 v14, v21, v19 ; 3E1C2715 v_mul_f32_e32 v16, v16, v15 ; 10201F10 v_mac_f32_e32 v16, v21, v19 ; 3E202715 v_mul_f32_e32 v18, v18, v15 ; 10241F12 v_mac_f32_e32 v18, v21, v19 ; 3E242715 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v21, s1 ; 7E2A0201 v_mul_f32_e32 v21, s0, v21 ; 102A2A00 v_mov_b32_e32 v29, s2 ; 7E3A0202 v_mul_f32_e32 v29, s0, v29 ; 103A3A00 v_mul_f32_e32 v20, v20, v21 ; 10282B14 v_mac_f32_e32 v20, v26, v29 ; 3E283B1A v_mov_b32_e32 v26, s4 ; 7E340204 v_mul_f32_e32 v26, s0, v26 ; 10343400 v_mac_f32_e32 v20, v28, v26 ; 3E28351C s_buffer_load_dword s0, s[8:11], 0x8 ; C2000908 v_mul_f32_e32 v21, v21, v15 ; 102A1F15 v_mul_f32_e32 v28, v29, v15 ; 10381F1D v_mul_f32_e32 v15, v26, v15 ; 101E1F1A v_mac_f32_e32 v21, v20, v19 ; 3E2A2714 s_buffer_load_dword s1, s[8:11], 0x9 ; C2008909 v_mac_f32_e32 v28, v20, v19 ; 3E382714 v_mac_f32_e32 v15, v20, v19 ; 3E1E2714 s_buffer_load_dword s2, s[8:11], 0xa ; C201090A s_buffer_load_dword s4, s[8:11], 0xb ; C202090B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s0, v10 ; 10141400 s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900 s_buffer_load_dword s5, s[8:11], 0x1 ; C2028901 s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910 s_buffer_load_dword s7, s[8:11], 0x11 ; C2038911 v_mac_f32_e32 v10, s1, v12 ; 3E141801 s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902 s_buffer_load_dword s16, s[8:11], 0x12 ; C2080912 v_mac_f32_e32 v10, s2, v13 ; 3E141A02 s_buffer_load_dword s2, s[8:11], 0x4 ; C2010904 s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s0, v0 ; 10180000 v_mad_f32 v12, -s5, v1, -v12 ; D282000C A4320205 v_mul_f32_e32 v0, s6, v0 ; 10000006 v_mac_f32_e32 v0, s7, v1 ; 3E000207 v_add_f32_e32 v1, v17, v11 ; 06021711 v_mad_f32 v11, -s1, v9, v12 ; D282000B 24321201 v_mac_f32_e32 v0, s16, v9 ; 3E001210 v_madmk_f32_e32 v1, v27, v1, 0x3e52efd4 ; 4002031B 3E52EFD4 v_add_f32_e64 v9, 0, v11 clamp ; D2060809 00021680 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mul_f32_e32 v11, v9, v1 ; 10160309 v_mul_f32_e32 v12, s2, v11 ; 10181602 v_mac_f32_e32 v12, v21, v0 ; 3E180115 s_buffer_load_dword s0, s[8:11], 0x6 ; C2000906 v_mul_f32_e32 v13, s17, v11 ; 101A1611 v_mac_f32_e32 v13, v28, v0 ; 3E1A011C s_buffer_load_dword s1, s[12:15], 0x44 ; C2008D44 s_buffer_load_dword s2, s[12:15], 0x45 ; C2010D45 s_buffer_load_dword s5, s[12:15], 0x46 ; C2028D46 s_buffer_load_dword s6, s[12:15], 0x47 ; C2030D47 s_buffer_load_dword s7, s[8:11], 0xc ; C203890C s_buffer_load_dword s12, s[8:11], 0xd ; C206090D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s0, v11 ; 10161600 v_mac_f32_e32 v11, v15, v0 ; 3E16010F v_mad_f32 v0, -v1, v9, 1.0 ; D2820000 23CA1301 v_mac_f32_e32 v12, v14, v0 ; 3E18010E v_add_f32_e64 v1, 0, v10 clamp ; D2060801 00021480 v_log_f32_e32 v1, v1 ; 7E024F01 v_mac_f32_e32 v13, v16, v0 ; 3E1A0110 v_mac_f32_e32 v11, v18, v0 ; 3E160112 v_mul_f32_e32 v0, s3, v22 ; 10002C03 v_mul_legacy_f32_e32 v1, s4, v1 ; 0E020204 v_mul_f32_e32 v6, v6, v23 ; 100C2F06 v_mul_f32_e32 v7, v7, v24 ; 100E3107 v_mul_f32_e32 v8, v8, v25 ; 10103308 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mac_f32_e32 v6, s7, v0 ; 3E0C0007 v_mac_f32_e32 v7, s12, v0 ; 3E0E000C v_mac_f32_e32 v8, s8, v0 ; 3E100008 v_mul_f32_e32 v0, v12, v6 ; 10000D0C v_mul_f32_e32 v1, v13, v7 ; 10020F0D v_mul_f32_e32 v6, v11, v8 ; 100C110B v_mul_f32_e32 v5, s2, v5 ; 100A0A02 v_mac_f32_e32 v5, s1, v3 ; 3E0A0601 v_mac_f32_e32 v5, s5, v4 ; 3E0A0805 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_add_f32_e32 v1, s6, v5 ; 06020A06 v_mul_f32_e32 v1, 0x3cf5c28f, v1 ; 100202FF 3CF5C28F v_cvt_pkrtz_f16_f32_e32 v1, v6, v1 ; 5E020306 exp 15, 0, 1, 0, 0, v0, v1, v0, v1 ; F800040F 01000100 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v2, v2 ; 5E000502 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 120 Code Size: 3748 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..9], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {272, 4, 0, 12} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {28, 44, 60, 24} IMM[7] UINT32 {32, 16, 48, 20} IMM[8] INT32 {3, 0, 0, 0} IMM[9] UINT32 {36, 52, 8, 40} IMM[10] UINT32 {56, 0, 0, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].y, IMM[0].xxxx 15: SHL TEMP[2].x, IN[3].yyyy, IMM[2].xxxx 16: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 17: I2F TEMP[2].x, TEMP[2].xxxx 18: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 21: FLR TEMP[2].x, TEMP[0].xxxx 22: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[2].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 24: MOV TEMP[4].y, TEMP[0].xxxx 25: MOV TEMP[0].y, IMM[0].xxxx 26: SHL TEMP[2].x, IN[3].zzzz, IMM[2].xxxx 27: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 28: I2F TEMP[2].x, TEMP[2].xxxx 29: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 30: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 31: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 32: FLR TEMP[2].x, TEMP[0].xxxx 33: ADD TEMP[5].x, TEMP[0].xxxx, -TEMP[2].xxxx 34: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 35: MOV TEMP[5].y, TEMP[0].xxxx 36: MOV TEMP[0].xy, TEMP[5].xyyy 37: MOV TEMP[0].w, IMM[0].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[4].xyyy 40: MOV TEMP[2].w, IMM[0].xxxx 41: TXL TEMP[2], TEMP[2], SAMP[0], 2D 42: MOV TEMP[6].xy, TEMP[3].xyyy 43: MOV TEMP[6].w, IMM[0].xxxx 44: TXL TEMP[6], TEMP[6], SAMP[0], 2D 45: MUL TEMP[6], IN[4].xxxx, TEMP[6] 46: MAD TEMP[2], IN[4].yyyy, TEMP[2], TEMP[6] 47: MAD TEMP[0], IN[4].zzzz, TEMP[0], TEMP[2] 48: MOV TEMP[2].xy, TEMP[5].xyyy 49: MOV TEMP[2].w, IMM[0].xxxx 50: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 51: MOV TEMP[6].xy, TEMP[3].xyyy 52: MOV TEMP[6].w, IMM[0].xxxx 53: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[2].zwz 54: MOV TEMP[7].xy, TEMP[4].xyyy 55: MOV TEMP[7].w, IMM[0].xxxx 56: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[2].zwz 57: MUL TEMP[7], IN[4].yyyy, TEMP[7] 58: MAD TEMP[6], IN[4].xxxx, TEMP[6], TEMP[7] 59: MAD TEMP[2], IN[4].zzzz, TEMP[2], TEMP[6] 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[2].xwx 63: MOV TEMP[4].xy, TEMP[4].xyyy 64: MOV TEMP[4].w, IMM[0].xxxx 65: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[2].xwx 66: MOV TEMP[3].xy, TEMP[3].xyyy 67: MOV TEMP[3].w, IMM[0].xxxx 68: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 69: MUL TEMP[3], IN[4].xxxx, TEMP[3] 70: MAD TEMP[3], IN[4].yyyy, TEMP[4], TEMP[3] 71: MAD TEMP[3], IN[4].zzzz, TEMP[5], TEMP[3] 72: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 73: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 74: AND TEMP[5], TEMP[5], IMM[3].xxxx 75: ABS TEMP[4], TEMP[4] 76: ADD TEMP[4], TEMP[4], -TEMP[5] 77: ADD TEMP[4], TEMP[4], IMM[3].yyyy 78: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 79: AND TEMP[6], TEMP[6], IMM[3].xxxx 80: ABS TEMP[4], TEMP[4] 81: ADD TEMP[4], TEMP[4], -TEMP[6] 82: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 83: MOV TEMP[7].x, TEMP[4].xxxx 84: MOV TEMP[7].y, TEMP[4].yyyy 85: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 86: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 87: MOV TEMP[7].z, TEMP[4].xxxx 88: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 89: RSQ TEMP[4].x, TEMP[4].xxxx 90: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 91: MUL TEMP[6], TEMP[6], IMM[3].wwww 92: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 93: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 94: MOV TEMP[7].w, IMM[0].xxxx 95: MOV TEMP[7].x, TEMP[6].xxxx 96: MOV TEMP[7].y, TEMP[6].yyyy 97: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 98: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 99: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 100: MOV TEMP[7].z, TEMP[4].xxxx 101: DP4 TEMP[4].x, TEMP[7], TEMP[0] 102: DP4 TEMP[5].x, TEMP[7], TEMP[2] 103: MOV TEMP[4].y, TEMP[5].xxxx 104: DP4 TEMP[5].x, TEMP[7], TEMP[3] 105: MOV TEMP[4].z, TEMP[5].xxxx 106: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 109: MOV TEMP[5].xy, IN[5].xyyy 110: MOV TEMP[5].w, IMM[0].xxxx 111: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 112: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 113: MOV TEMP[6].w, IMM[3].xxxx 114: MOV TEMP[6].x, TEMP[5].xxxx 115: MOV TEMP[6].y, TEMP[5].yyyy 116: MOV TEMP[6].z, TEMP[5].zzzz 117: DP4 TEMP[0].x, TEMP[6], TEMP[0] 118: DP4 TEMP[2].x, TEMP[6], TEMP[2] 119: DP4 TEMP[3].x, TEMP[6], TEMP[3] 120: MOV TEMP[5].x, TEMP[0].xxxx 121: MOV TEMP[5].y, TEMP[2].xxxx 122: MOV TEMP[5].z, TEMP[3].xxxx 123: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 124: MOV TEMP[6].x, TEMP[0].xxxx 125: MOV TEMP[6].y, TEMP[2].xxxx 126: MOV TEMP[6].z, TEMP[3].xxxx 127: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 128: MOV TEMP[6].w, TEMP[7].xxxx 129: MOV TEMP[7].w, IMM[3].xxxx 130: MOV TEMP[7].x, TEMP[0].xxxx 131: MOV TEMP[7].y, TEMP[2].xxxx 132: MOV TEMP[7].z, TEMP[3].xxxx 133: DP4 TEMP[0].x, TEMP[7], CONST[4][17] 134: MOV TEMP[2].x, TEMP[4].xxxx 135: MOV TEMP[2].y, TEMP[4].yyyy 136: MOV TEMP[2].z, TEMP[4].zzzz 137: DP3 TEMP[3].x, TEMP[5].xyzz, TEMP[5].xyzz 138: RSQ TEMP[3].x, TEMP[3].xxxx 139: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 140: DP3 TEMP[5].x, TEMP[4].xyzz, CONST[5][0].xyzz 141: MUL TEMP[4].xyz, TEMP[5].xxxx, TEMP[4].xyzz 142: MUL TEMP[4].xyz, IMM[3].wwww, TEMP[4].xyzz 143: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 144: DP3 TEMP[3].x, -TEMP[3].xyzz, TEMP[4].xyzz 145: MOV_SAT TEMP[3].x, TEMP[3].xxxx 146: POW TEMP[3].x, TEMP[3].xxxx, IMM[5].xxxx 147: MOV_SAT TEMP[3].x, TEMP[3].xxxx 148: MOV TEMP[2].w, TEMP[3].xxxx 149: MOV TEMP[3].x, CONST[4][0].wwww 150: MOV TEMP[3].y, CONST[4][1].wwww 151: MOV TEMP[3].z, CONST[4][2].wwww 152: MOV TEMP[3].w, CONST[4][3].wwww 153: DP4 TEMP[3].x, TEMP[7], TEMP[3] 154: MAD TEMP[4].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 155: MOV TEMP[5].xy, IN[5].xyyy 156: MOV TEMP[5].w, IMM[0].xxxx 157: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[8].xyx 158: MOV TEMP[8].x, CONST[4][0].xxxx 159: MOV TEMP[8].y, CONST[4][1].xxxx 160: MOV TEMP[8].z, CONST[4][2].xxxx 161: MOV TEMP[8].w, CONST[4][3].xxxx 162: DP4 TEMP[8].x, TEMP[7], TEMP[8] 163: MOV TEMP[9].x, CONST[4][0].yyyy 164: MOV TEMP[9].y, CONST[4][1].yyyy 165: MOV TEMP[9].z, CONST[4][2].yyyy 166: MOV TEMP[9].w, CONST[4][3].yyyy 167: DP4 TEMP[9].x, TEMP[7], TEMP[9] 168: MOV TEMP[8].y, -TEMP[9].xxxx 169: MOV TEMP[9].x, CONST[4][0].zzzz 170: MOV TEMP[9].y, CONST[4][1].zzzz 171: MOV TEMP[9].z, CONST[4][2].zzzz 172: MOV TEMP[9].w, CONST[4][3].zzzz 173: DP4 TEMP[7].x, TEMP[7], TEMP[9] 174: MAD TEMP[7].x, IMM[3].wwww, TEMP[7].xxxx, -TEMP[3].xxxx 175: MOV TEMP[8].z, TEMP[7].xxxx 176: MOV TEMP[8].w, TEMP[3].xxxx 177: MOV TEMP[3].yzw, TEMP[1].zyzw 178: MOV TEMP[3].x, TEMP[0].xxxx 179: MOV TEMP[1], TEMP[3] 180: MOV OUT[1], TEMP[1] 181: MOV OUT[2].xy, TEMP[4].xyxx 182: MOV OUT[4], TEMP[2] 183: MOV OUT[6], TEMP[0].xxxx 184: MOV OUT[5], TEMP[5] 185: MOV OUT[0], TEMP[8] 186: MOV OUT[3], TEMP[6] 187: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 272) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 276) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 280) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 284) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %46 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %47 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = call float @llvm.SI.load.const(<16 x i8> %48, i32 0) %50 = call float @llvm.SI.load.const(<16 x i8> %48, i32 4) %51 = call float @llvm.SI.load.const(<16 x i8> %48, i32 8) %52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0 %56 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = add i32 %5, %7 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = add i32 %5, %7 %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %65) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = add i32 %5, %7 %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %79) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = add i32 %10, %6 %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %93) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = bitcast float %81 to i32 %98 = shl i32 %97, 2 %99 = add i32 %98, 8 %100 = sitofp i32 %99 to float %101 = fadd float %100, 5.000000e-01 %102 = fmul float %101, %45 %103 = fadd float %102, %95 %104 = fadd float %96, 0.000000e+00 %105 = call float @floor(float %103) %106 = fsub float %103, %105 %107 = fmul float %105, %46 %108 = fadd float %107, %104 %109 = bitcast float %82 to i32 %110 = shl i32 %109, 2 %111 = add i32 %110, 8 %112 = sitofp i32 %111 to float %113 = fadd float %112, 5.000000e-01 %114 = fmul float %113, %45 %115 = fadd float %114, %95 %116 = fadd float %96, 0.000000e+00 %117 = call float @floor(float %115) %118 = fsub float %115, %117 %119 = fmul float %117, %46 %120 = fadd float %119, %116 %121 = bitcast float %83 to i32 %122 = shl i32 %121, 2 %123 = add i32 %122, 8 %124 = sitofp i32 %123 to float %125 = fadd float %124, 5.000000e-01 %126 = fmul float %125, %45 %127 = fadd float %126, %95 %128 = fadd float %96, 0.000000e+00 %129 = call float @floor(float %127) %130 = fsub float %127, %129 %131 = fmul float %129, %46 %132 = fadd float %131, %128 %133 = bitcast float %130 to i32 %134 = bitcast float %132 to i32 %135 = insertelement <4 x i32> undef, i32 %133, i32 0 %136 = insertelement <4 x i32> %135, i32 %134, i32 1 %137 = insertelement <4 x i32> %136, i32 0, i32 2 %138 = bitcast <8 x i32> %53 to <32 x i8> %139 = bitcast <4 x i32> %55 to <16 x i8> %140 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 1 %143 = extractelement <4 x float> %140, i32 2 %144 = extractelement <4 x float> %140, i32 3 %145 = bitcast float %118 to i32 %146 = bitcast float %120 to i32 %147 = insertelement <4 x i32> undef, i32 %145, i32 0 %148 = insertelement <4 x i32> %147, i32 %146, i32 1 %149 = insertelement <4 x i32> %148, i32 0, i32 2 %150 = bitcast <8 x i32> %53 to <32 x i8> %151 = bitcast <4 x i32> %55 to <16 x i8> %152 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %149, <32 x i8> %150, <16 x i8> %151, i32 2) %153 = extractelement <4 x float> %152, i32 0 %154 = extractelement <4 x float> %152, i32 1 %155 = extractelement <4 x float> %152, i32 2 %156 = extractelement <4 x float> %152, i32 3 %157 = bitcast float %106 to i32 %158 = bitcast float %108 to i32 %159 = insertelement <4 x i32> undef, i32 %157, i32 0 %160 = insertelement <4 x i32> %159, i32 %158, i32 1 %161 = insertelement <4 x i32> %160, i32 0, i32 2 %162 = bitcast <8 x i32> %53 to <32 x i8> %163 = bitcast <4 x i32> %55 to <16 x i8> %164 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) %165 = extractelement <4 x float> %164, i32 0 %166 = extractelement <4 x float> %164, i32 1 %167 = extractelement <4 x float> %164, i32 2 %168 = extractelement <4 x float> %164, i32 3 %169 = fmul float %88, %165 %170 = fmul float %88, %166 %171 = fmul float %88, %167 %172 = fmul float %88, %168 %173 = fmul float %89, %153 %174 = fadd float %173, %169 %175 = fmul float %89, %154 %176 = fadd float %175, %170 %177 = fmul float %89, %155 %178 = fadd float %177, %171 %179 = fmul float %89, %156 %180 = fadd float %179, %172 %181 = fmul float %90, %141 %182 = fadd float %181, %174 %183 = fmul float %90, %142 %184 = fadd float %183, %176 %185 = fmul float %90, %143 %186 = fadd float %185, %178 %187 = fmul float %90, %144 %188 = fadd float %187, %180 %189 = bitcast float %130 to i32 %190 = bitcast float %132 to i32 %191 = insertelement <4 x i32> , i32 %189, i32 1 %192 = insertelement <4 x i32> %191, i32 %190, i32 2 %193 = insertelement <4 x i32> %192, i32 0, i32 3 %194 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %193, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %195 = extractelement <4 x float> %194, i32 0 %196 = extractelement <4 x float> %194, i32 1 %197 = extractelement <4 x float> %194, i32 2 %198 = extractelement <4 x float> %194, i32 3 %199 = bitcast float %106 to i32 %200 = bitcast float %108 to i32 %201 = insertelement <4 x i32> , i32 %199, i32 1 %202 = insertelement <4 x i32> %201, i32 %200, i32 2 %203 = insertelement <4 x i32> %202, i32 0, i32 3 %204 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %203, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %205 = extractelement <4 x float> %204, i32 0 %206 = extractelement <4 x float> %204, i32 1 %207 = extractelement <4 x float> %204, i32 2 %208 = extractelement <4 x float> %204, i32 3 %209 = bitcast float %118 to i32 %210 = bitcast float %120 to i32 %211 = insertelement <4 x i32> , i32 %209, i32 1 %212 = insertelement <4 x i32> %211, i32 %210, i32 2 %213 = insertelement <4 x i32> %212, i32 0, i32 3 %214 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %213, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %215 = extractelement <4 x float> %214, i32 0 %216 = extractelement <4 x float> %214, i32 1 %217 = extractelement <4 x float> %214, i32 2 %218 = extractelement <4 x float> %214, i32 3 %219 = fmul float %89, %215 %220 = fmul float %89, %216 %221 = fmul float %89, %217 %222 = fmul float %89, %218 %223 = fmul float %88, %205 %224 = fadd float %223, %219 %225 = fmul float %88, %206 %226 = fadd float %225, %220 %227 = fmul float %88, %207 %228 = fadd float %227, %221 %229 = fmul float %88, %208 %230 = fadd float %229, %222 %231 = fmul float %90, %195 %232 = fadd float %231, %224 %233 = fmul float %90, %196 %234 = fadd float %233, %226 %235 = fmul float %90, %197 %236 = fadd float %235, %228 %237 = fmul float %90, %198 %238 = fadd float %237, %230 %239 = bitcast float %130 to i32 %240 = bitcast float %132 to i32 %241 = insertelement <4 x i32> , i32 %239, i32 1 %242 = insertelement <4 x i32> %241, i32 %240, i32 2 %243 = insertelement <4 x i32> %242, i32 0, i32 3 %244 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %243, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = extractelement <4 x float> %244, i32 3 %249 = bitcast float %118 to i32 %250 = bitcast float %120 to i32 %251 = insertelement <4 x i32> , i32 %249, i32 1 %252 = insertelement <4 x i32> %251, i32 %250, i32 2 %253 = insertelement <4 x i32> %252, i32 0, i32 3 %254 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %253, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %255 = extractelement <4 x float> %254, i32 0 %256 = extractelement <4 x float> %254, i32 1 %257 = extractelement <4 x float> %254, i32 2 %258 = extractelement <4 x float> %254, i32 3 %259 = bitcast float %106 to i32 %260 = bitcast float %108 to i32 %261 = insertelement <4 x i32> , i32 %259, i32 1 %262 = insertelement <4 x i32> %261, i32 %260, i32 2 %263 = insertelement <4 x i32> %262, i32 0, i32 3 %264 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %263, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %265 = extractelement <4 x float> %264, i32 0 %266 = extractelement <4 x float> %264, i32 1 %267 = extractelement <4 x float> %264, i32 2 %268 = extractelement <4 x float> %264, i32 3 %269 = fmul float %88, %265 %270 = fmul float %88, %266 %271 = fmul float %88, %267 %272 = fmul float %88, %268 %273 = fmul float %89, %255 %274 = fadd float %273, %269 %275 = fmul float %89, %256 %276 = fadd float %275, %270 %277 = fmul float %89, %257 %278 = fadd float %277, %271 %279 = fmul float %89, %258 %280 = fadd float %279, %272 %281 = fmul float %90, %245 %282 = fadd float %281, %274 %283 = fmul float %90, %246 %284 = fadd float %283, %276 %285 = fmul float %90, %247 %286 = fadd float %285, %278 %287 = fmul float %90, %248 %288 = fadd float %287, %280 %289 = fmul float %67, 2.550000e+02 %290 = fadd float %289, -1.280000e+02 %291 = fmul float %68, 2.550000e+02 %292 = fadd float %291, -1.280000e+02 %293 = fmul float %69, 2.550000e+02 %294 = fadd float %293, -1.280000e+02 %295 = fmul float %70, 2.550000e+02 %296 = fadd float %295, -1.280000e+02 %297 = fcmp olt float %290, 0.000000e+00 %298 = fcmp olt float %292, 0.000000e+00 %299 = fcmp olt float %294, 0.000000e+00 %300 = fcmp olt float %296, 0.000000e+00 %301 = select i1 %297, float 1.000000e+00, float 0.000000e+00 %302 = call float @fabs(float %290) %303 = call float @fabs(float %292) %304 = call float @fabs(float %294) %305 = call float @fabs(float %296) %306 = fsub float %302, %301 %307 = select i1 %298, float -1.000000e+00, float -0.000000e+00 %308 = fadd float %303, %307 %309 = select i1 %299, float -1.000000e+00, float -0.000000e+00 %310 = fadd float %304, %309 %311 = select i1 %300, float -1.000000e+00, float -0.000000e+00 %312 = fadd float %305, %311 %313 = fadd float %306, -6.400000e+01 %314 = fadd float %308, -6.400000e+01 %315 = fadd float %310, -6.400000e+01 %316 = fadd float %312, -6.400000e+01 %317 = fcmp olt float %313, 0.000000e+00 %318 = fcmp olt float %314, 0.000000e+00 %319 = select i1 %317, float 1.000000e+00, float 0.000000e+00 %320 = select i1 %318, float 1.000000e+00, float 0.000000e+00 %321 = call float @fabs(float %313) %322 = call float @fabs(float %314) %323 = call float @fabs(float %315) %324 = call float @fabs(float %316) %325 = fsub float %321, %319 %326 = fsub float %322, %320 %327 = fmul float %325, 0x3F90410420000000 %328 = fmul float %326, 0x3F90410420000000 %329 = fsub float 1.000000e+00, %327 %330 = fsub float %329, %328 %331 = fmul float %327, %327 %332 = fmul float %328, %328 %333 = fadd float %332, %331 %334 = fmul float %330, %330 %335 = fadd float %333, %334 %336 = call float @llvm.AMDGPU.rsq.clamped.f32(float %335) %337 = fmul float %327, %336 %338 = fmul float %328, %336 %339 = fmul float %330, %336 %340 = fmul float %319, 2.000000e+00 %341 = fmul float %320, 2.000000e+00 %342 = fsub float 1.000000e+00, %340 %343 = fsub float 1.000000e+00, %341 %344 = fmul float %337, %342 %345 = fmul float %338, %343 %346 = fmul float %301, 2.000000e+00 %347 = fsub float 1.000000e+00, %346 %348 = fmul float %347, %339 %349 = fmul float %344, %182 %350 = fmul float %345, %184 %351 = fadd float %349, %350 %352 = fmul float %348, %186 %353 = fadd float %351, %352 %354 = fmul float %188, 0.000000e+00 %355 = fadd float %353, %354 %356 = fmul float %344, %232 %357 = fmul float %345, %234 %358 = fadd float %356, %357 %359 = fmul float %348, %236 %360 = fadd float %358, %359 %361 = fmul float %238, 0.000000e+00 %362 = fadd float %360, %361 %363 = fmul float %344, %282 %364 = fmul float %345, %284 %365 = fadd float %363, %364 %366 = fmul float %348, %286 %367 = fadd float %365, %366 %368 = fmul float %288, 0.000000e+00 %369 = fadd float %367, %368 %370 = fmul float %355, %355 %371 = fmul float %362, %362 %372 = fadd float %371, %370 %373 = fmul float %369, %369 %374 = fadd float %372, %373 %375 = call float @llvm.AMDGPU.rsq.clamped.f32(float %374) %376 = fmul float %355, %375 %377 = fmul float %362, %375 %378 = fmul float %369, %375 %379 = bitcast float %95 to i32 %380 = bitcast float %96 to i32 %381 = insertelement <4 x i32> undef, i32 %379, i32 0 %382 = insertelement <4 x i32> %381, i32 %380, i32 1 %383 = insertelement <4 x i32> %382, i32 0, i32 2 %384 = bitcast <8 x i32> %53 to <32 x i8> %385 = bitcast <4 x i32> %55 to <16 x i8> %386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 2) %387 = extractelement <4 x float> %386, i32 2 %388 = fmul float %60, %387 %389 = fmul float %61, %387 %390 = fmul float %62, %387 %391 = fmul float %388, %182 %392 = fmul float %389, %184 %393 = fadd float %391, %392 %394 = fmul float %390, %186 %395 = fadd float %393, %394 %396 = fadd float %395, %188 %397 = fmul float %388, %232 %398 = fmul float %389, %234 %399 = fadd float %397, %398 %400 = fmul float %390, %236 %401 = fadd float %399, %400 %402 = fadd float %401, %238 %403 = fmul float %388, %282 %404 = fmul float %389, %284 %405 = fadd float %403, %404 %406 = fmul float %390, %286 %407 = fadd float %405, %406 %408 = fadd float %407, %288 %409 = fsub float %396, %39 %410 = fsub float %402, %40 %411 = fsub float %408, %41 %412 = fmul float %42, %409 %413 = fmul float %43, %410 %414 = fadd float %413, %412 %415 = fmul float %44, %411 %416 = fadd float %414, %415 %417 = fmul float %396, %35 %418 = fmul float %402, %36 %419 = fadd float %417, %418 %420 = fmul float %408, %37 %421 = fadd float %419, %420 %422 = fadd float %421, %38 %423 = fmul float %409, %409 %424 = fmul float %410, %410 %425 = fadd float %424, %423 %426 = fmul float %411, %411 %427 = fadd float %425, %426 %428 = call float @llvm.AMDGPU.rsq.clamped.f32(float %427) %429 = fmul float %409, %428 %430 = fmul float %410, %428 %431 = fmul float %411, %428 %432 = fmul float %376, %49 %433 = fmul float %377, %50 %434 = fadd float %433, %432 %435 = fmul float %378, %51 %436 = fadd float %434, %435 %437 = fmul float %436, %376 %438 = fmul float %436, %377 %439 = fmul float %436, %378 %440 = fmul float %437, 2.000000e+00 %441 = fmul float %438, 2.000000e+00 %442 = fmul float %439, 2.000000e+00 %443 = fsub float %49, %440 %444 = fsub float %50, %441 %445 = fsub float %51, %442 %446 = fmul float %429, %443 %447 = fsub float -0.000000e+00, %446 %448 = fmul float %430, %444 %449 = fsub float %447, %448 %450 = fmul float %431, %445 %451 = fsub float %449, %450 %452 = call float @llvm.AMDIL.clamp.(float %451, float 0.000000e+00, float 1.000000e+00) %453 = call float @llvm.pow.f32(float %452, float 1.600000e+01) %454 = call float @llvm.AMDIL.clamp.(float %453, float 0.000000e+00, float 1.000000e+00) %455 = fmul float %396, %22 %456 = fmul float %402, %26 %457 = fadd float %455, %456 %458 = fmul float %408, %30 %459 = fadd float %457, %458 %460 = fadd float %459, %34 %461 = fmul float %75, %13 %462 = fadd float %461, %15 %463 = fmul float %76, %14 %464 = fadd float %463, %16 %465 = bitcast float %95 to i32 %466 = bitcast float %96 to i32 %467 = insertelement <4 x i32> , i32 %465, i32 1 %468 = insertelement <4 x i32> %467, i32 %466, i32 2 %469 = insertelement <4 x i32> %468, i32 0, i32 3 %470 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %469, <8 x i32> %53, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %471 = extractelement <4 x float> %470, i32 0 %472 = extractelement <4 x float> %470, i32 1 %473 = extractelement <4 x float> %470, i32 2 %474 = extractelement <4 x float> %470, i32 3 %475 = fmul float %396, %19 %476 = fmul float %402, %23 %477 = fadd float %475, %476 %478 = fmul float %408, %27 %479 = fadd float %477, %478 %480 = fadd float %479, %31 %481 = fmul float %396, %20 %482 = fmul float %402, %24 %483 = fadd float %481, %482 %484 = fmul float %408, %28 %485 = fadd float %483, %484 %486 = fadd float %485, %32 %487 = fsub float -0.000000e+00, %486 %488 = fmul float %396, %21 %489 = fmul float %402, %25 %490 = fadd float %488, %489 %491 = fmul float %408, %29 %492 = fadd float %490, %491 %493 = fadd float %492, %33 %494 = fmul float %493, 2.000000e+00 %495 = fsub float %494, %460 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %422, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %462, float %464, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %396, float %402, float %408, float %416) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %376, float %377, float %378, float %454) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %471, float %472, float %473, float %474) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %422, float %422, float %422, float %422) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %480, float %487, float %495, float %460) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float %422, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[44:47], s[2:3], 0x4 ; C0960304 s_load_dwordx4 s[48:51], s[2:3], 0x10 ; C0980310 s_load_dwordx4 s[52:55], s[2:3], 0x14 ; C09A0314 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[14:17], v0, s[24:27], 0 idxen ; E00C2000 80060E00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[17:20], v0, s[28:31], 0 idxen ; E00C2000 80071100 buffer_load_format_xyzw v[24:27], v3, s[8:11], 0 idxen ; E00C2000 80021803 s_buffer_load_dword s30, s[44:47], 0x6 ; C20F2D06 s_buffer_load_dword s29, s[44:47], 0x7 ; C20EAD07 s_buffer_load_dword s11, s[44:47], 0x8 ; C205AD08 s_buffer_load_dword s12, s[44:47], 0x9 ; C2062D09 s_buffer_load_dword s3, s[48:51], 0x52 ; C201B152 s_buffer_load_dword s31, s[48:51], 0x66 ; C20FB166 s_buffer_load_dword s44, s[48:51], 0x67 ; C2163167 s_buffer_load_dword s24, s[48:51], 0x4c ; C20C314C s_buffer_load_dword s23, s[48:51], 0x4d ; C20BB14D s_buffer_load_dword s22, s[48:51], 0x4e ; C20B314E s_buffer_load_dword s16, s[48:51], 0x50 ; C2083150 s_buffer_load_dword s9, s[48:51], 0x51 ; C204B151 s_buffer_load_dword s21, s[52:55], 0x0 ; C20AB500 s_buffer_load_dword s20, s[52:55], 0x1 ; C20A3501 s_buffer_load_dword s19, s[52:55], 0x2 ; C209B502 s_buffer_load_dword s0, s[48:51], 0xf ; C200310F s_buffer_load_dword s26, s[48:51], 0x44 ; C20D3144 s_buffer_load_dword s28, s[48:51], 0x45 ; C20E3145 s_buffer_load_dword s27, s[48:51], 0x46 ; C20DB146 s_buffer_load_dword s25, s[48:51], 0x47 ; C20CB147 s_buffer_load_dword s4, s[48:51], 0x5 ; C2023105 s_buffer_load_dword s5, s[48:51], 0x6 ; C202B106 s_buffer_load_dword s10, s[48:51], 0x7 ; C2053107 s_buffer_load_dword s2, s[48:51], 0x8 ; C2013108 s_buffer_load_dword s1, s[48:51], 0x9 ; C200B109 s_buffer_load_dword s6, s[48:51], 0x0 ; C2033100 s_buffer_load_dword s7, s[48:51], 0x1 ; C203B101 s_buffer_load_dword s8, s[48:51], 0x2 ; C2043102 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s11 ; 7E00020B s_buffer_load_dword s11, s[48:51], 0x3 ; C205B103 v_mov_b32_e32 v3, s12 ; 7E06020C s_buffer_load_dword s17, s[48:51], 0x4 ; C208B104 s_buffer_load_dword s12, s[48:51], 0xa ; C206310A s_buffer_load_dword s18, s[48:51], 0xb ; C209310B s_buffer_load_dword s13, s[48:51], 0xc ; C206B10C s_buffer_load_dword s14, s[48:51], 0xd ; C207310D s_buffer_load_dword s15, s[48:51], 0xe ; C207B10E v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_lshlrev_b32_e32 v2, 2, v14 ; 34041C82 v_lshlrev_b32_e32 v11, 2, v15 ; 34161E82 v_lshlrev_b32_e32 v14, 2, v16 ; 341C2082 v_add_i32_e32 v2, 8, v2 ; 4A040488 v_add_i32_e32 v11, 8, v11 ; 4A161688 v_add_i32_e32 v14, 8, v14 ; 4A1C1C88 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B v_cvt_f32_i32_e32 v14, v14 ; 7E1C0B0E v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_add_f32_e32 v11, 0.5, v11 ; 061616F0 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v2, s31, v2, v24 ; D2820002 0462041F v_mad_f32 v11, s31, v11, v24 ; D282000B 0462161F v_mad_f32 v14, s31, v14, v24 ; D282000E 04621C1F v_floor_f32_e32 v15, v2 ; 7E1E4902 v_subrev_f32_e32 v20, v15, v2 ; 0A28050F v_mad_f32 v21, s44, v15, v25 ; D2820015 04661E2C v_floor_f32_e32 v2, v11 ; 7E04490B v_subrev_f32_e32 v26, v2, v11 ; 0A341702 v_mad_f32 v27, s44, v2, v25 ; D282001B 0466042C v_floor_f32_e32 v2, v14 ; 7E04490E v_subrev_f32_e32 v33, v2, v14 ; 0A421D02 v_mad_f32 v34, s44, v2, v25 ; D2820022 0466042C v_mov_b32_e32 v35, 0 ; 7E460280 v_mov_b32_e32 v28, v35 ; 7E380323 v_mov_b32_e32 v22, v35 ; 7E2C0323 image_sample_l v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[36:43], s[32:35] ; F0900F00 01092421 image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091C1A image_sample_l v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01092814 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, v40, v17 ; 10042328 v_mul_f32_e32 v11, v41, v17 ; 10162329 v_mul_f32_e32 v14, v42, v17 ; 101C232A v_mul_f32_e32 v15, v43, v17 ; 101E232B v_mov_b32_e32 v32, 0x10001 ; 7E4002FF 00010001 v_mac_f32_e32 v2, v28, v18 ; 3E04251C v_mac_f32_e32 v11, v29, v18 ; 3E16251D v_mac_f32_e32 v14, v30, v18 ; 3E1C251E v_mov_b32_e32 v40, v32 ; 7E500320 v_mov_b32_e32 v41, v33 ; 7E520321 v_mov_b32_e32 v42, v34 ; 7E540322 v_mov_b32_e32 v43, v35 ; 7E560323 v_mov_b32_e32 v44, v32 ; 7E580320 v_mov_b32_e32 v45, v33 ; 7E5A0321 v_mov_b32_e32 v46, v34 ; 7E5C0322 v_mov_b32_e32 v47, v35 ; 7E5E0323 v_mac_f32_e32 v15, v31, v18 ; 3E1E251F v_mov_b32_e32 v41, v20 ; 7E520314 v_mov_b32_e32 v45, v26 ; 7E5A031A v_mac_f32_e32 v2, v36, v19 ; 3E042724 v_mac_f32_e32 v11, v37, v19 ; 3E162725 v_mac_f32_e32 v14, v38, v19 ; 3E1C2726 v_mov_b32_e32 v42, v21 ; 7E540315 v_mac_f32_e32 v15, v39, v19 ; 3E1E2727 v_mov_b32_e32 v46, v27 ; 7E5C031B v_mov_b32_e32 v43, v35 ; 7E560323 image_sample_l_o v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[36:43], s[32:35] ; F0D00F00 01091C20 image_sample_l_o v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[36:43], s[32:35] ; F0D00F00 01092428 v_mov_b32_e32 v47, v35 ; 7E5E0323 image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[36:43], s[32:35] ; F0D00F00 0109282C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, v40, v18 ; 10202528 v_mul_f32_e32 v22, v41, v18 ; 102C2529 v_mul_f32_e32 v40, v42, v18 ; 1050252A v_mul_f32_e32 v41, v43, v18 ; 1052252B v_mov_b32_e32 v32, 0x20002 ; 7E4002FF 00020002 image_sample_l_o v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[36:43], s[32:35] ; F0D00F00 01092A20 v_mov_b32_e32 v33, v26 ; 7E42031A v_mac_f32_e32 v16, v36, v17 ; 3E202324 v_mac_f32_e32 v22, v37, v17 ; 3E2C2325 v_mac_f32_e32 v40, v38, v17 ; 3E502326 v_mac_f32_e32 v41, v39, v17 ; 3E522327 v_mov_b32_e32 v34, v27 ; 7E44031B v_mac_f32_e32 v16, v28, v19 ; 3E20271C v_mac_f32_e32 v22, v29, v19 ; 3E2C271D v_mac_f32_e32 v40, v30, v19 ; 3E50271E image_sample_l_o v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[36:43], s[32:35] ; F0D00F00 01091A20 v_mov_b32_e32 v33, v20 ; 7E420314 v_mac_f32_e32 v41, v31, v19 ; 3E52271F v_mov_b32_e32 v34, v21 ; 7E440315 image_sample_l_o v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[36:43], s[32:35] ; F0D00F00 01091E20 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v30, v17 ; 1028231E v_mul_f32_e32 v21, v31, v17 ; 102A231F v_mul_f32_e32 v30, v32, v17 ; 103C2320 v_mul_f32_e32 v17, v33, v17 ; 10222321 v_mac_f32_e32 v20, v26, v18 ; 3E28251A v_mac_f32_e32 v21, v27, v18 ; 3E2A251B v_mac_f32_e32 v30, v28, v18 ; 3E3C251C v_mac_f32_e32 v17, v29, v18 ; 3E22251D v_mac_f32_e32 v20, v42, v19 ; 3E28272A v_mac_f32_e32 v21, v43, v19 ; 3E2A272B v_mac_f32_e32 v30, v44, v19 ; 3E3C272C v_mac_f32_e32 v17, v45, v19 ; 3E22272D v_mac_f32_e32 v0, s30, v12 ; 3E00181E v_mac_f32_e32 v3, s29, v13 ; 3E061A1D v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v1, |v1|, v4 ; D2060101 00020901 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 v_sub_f32_e64 v10, |v10|, v4 ; D208010A 0002090A v_add_f32_e32 v10, v5, v10 ; 06141505 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v12, v5, |v10| ; D208020C 00021505 v_mad_f32 v12, v12, v6, 1.0 ; D282000C 03CA0D0C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_sub_f32_e64 v18, v13, |v1| ; D2080212 0002030D v_mac_f32_e32 v12, v6, v18 ; 3E182506 v_sub_f32_e64 v10, |v10|, v5 ; D208010A 00020B0A v_sub_f32_e64 v1, |v1|, v13 ; D2080101 00021B01 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v10, v10 ; 100C150A v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mac_f32_e32 v6, v12, v12 ; 3E0C190C v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mov_b32_e32 v26, v35 ; 7E340323 image_sample_l v18, 4, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[32:35] ; F0900400 01091218 v_mov_b32_e32 v23, 0x30003 ; 7E2E02FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mul_f32_e32 v8, v18, v8 ; 10101112 v_mul_f32_e32 v9, v18, v9 ; 10121312 v_mov_b32_e32 v26, v35 ; 7E340323 image_sample_l_o v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0D00F00 01091717 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mad_f32 v10, -2.0, v13, 1.0 ; D282000A 03CA1AF5 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v1, v10, v1 ; 1002030A v_mul_f32_e32 v10, v11, v1 ; 1014030B v_mac_f32_e32 v10, v2, v5 ; 3E140B02 v_mul_f32_e32 v11, v11, v8 ; 1016110B v_mac_f32_e32 v11, v2, v7 ; 3E160F02 v_mul_f32_e32 v2, v22, v1 ; 10040316 v_mac_f32_e32 v2, v16, v5 ; 3E040B10 v_mul_f32_e32 v13, v22, v8 ; 101A1116 v_mac_f32_e32 v13, v16, v7 ; 3E1A0F10 v_mul_f32_e32 v1, v21, v1 ; 10020315 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v1, v20, v5 ; 3E020B14 v_mac_f32_e32 v8, v20, v7 ; 3E100F14 v_mul_f32_e32 v5, v6, v12 ; 100A1906 v_mad_f32 v4, -2.0, v4, 1.0 ; D2820004 03CA08F5 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mac_f32_e32 v10, v14, v4 ; 3E14090E v_mac_f32_e32 v11, v14, v9 ; 3E16130E v_mac_f32_e32 v2, v40, v4 ; 3E040928 v_mac_f32_e32 v13, v40, v9 ; 3E1A1328 v_mac_f32_e32 v1, v30, v4 ; 3E02091E v_mac_f32_e32 v8, v30, v9 ; 3E10131E v_add_f32_e32 v4, v15, v11 ; 0608170F v_add_f32_e32 v5, v41, v13 ; 060A1B29 v_add_f32_e32 v6, v17, v8 ; 060C1111 v_mul_f32_e32 v7, s28, v5 ; 100E0A1C v_mac_f32_e32 v7, s26, v4 ; 3E0E081A v_mac_f32_e32 v7, s27, v6 ; 3E0E0C1B v_mul_f32_e32 v8, v10, v10 ; 1010150A v_mac_f32_e32 v8, v2, v2 ; 3E100502 v_mac_f32_e32 v8, v1, v1 ; 3E100301 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_add_f32_e32 v7, s25, v7 ; 060E0E19 exp 15, 32, 0, 0, 0, v7, v35, v35, v35 ; F800020F 23232307 exp 15, 33, 0, 0, 0, v0, v3, v35, v35 ; F800021F 23230300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v8, v10 ; 10001508 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_subrev_f32_e32 v3, s24, v4 ; 0A060818 v_subrev_f32_e32 v8, s23, v5 ; 0A100A17 v_mul_f32_e32 v9, v3, v3 ; 10120703 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_subrev_f32_e32 v10, s22, v6 ; 0A140C16 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v11, s21, v0 ; 10160015 v_mac_f32_e32 v11, s20, v2 ; 3E160414 v_mac_f32_e32 v11, s19, v1 ; 3E160213 v_mul_f32_e32 v12, v0, v11 ; 10181700 v_mad_f32 v12, 2.0, v12, -s21 ; D282000C 805618F4 v_mul_f32_e32 v13, v9, v3 ; 101A0709 v_mul_f32_e32 v12, v12, v13 ; 10181B0C v_mul_f32_e32 v13, v2, v11 ; 101A1702 v_mad_f32 v13, 2.0, v13, -s20 ; D282000D 80521AF4 v_mul_f32_e32 v14, v9, v8 ; 101C1109 v_mac_f32_e32 v12, v13, v14 ; 3E181D0D v_mul_f32_e32 v11, v1, v11 ; 10161701 v_mad_f32 v11, 2.0, v11, -s19 ; D282000B 804E16F4 v_mul_f32_e32 v9, v9, v10 ; 10121509 v_mac_f32_e32 v12, v11, v9 ; 3E18130B v_mul_f32_e32 v3, s16, v3 ; 10060610 v_add_f32_e64 v9, 0, v12 clamp ; D2060809 00021880 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v3, s9, v8 ; 3E061009 v_mac_f32_e32 v3, s3, v10 ; 3E061403 exp 15, 34, 0, 0, 0, v4, v5, v6, v3 ; F800022F 03060504 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v3, 0x41800000, v9 ; 0E0612FF 41800000 v_exp_f32_e32 v3, v3 ; 7E064B03 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 exp 15, 35, 0, 0, 0, v0, v2, v1, v3 ; F800023F 03010200 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s10, v5 ; 10000A0A v_mul_f32_e32 v1, s17, v5 ; 10020A11 v_mul_f32_e32 v2, s4, v5 ; 10040A04 v_mul_f32_e32 v3, s5, v5 ; 10060A05 v_mac_f32_e32 v0, s11, v4 ; 3E00080B v_mac_f32_e32 v1, s6, v4 ; 3E020806 v_mac_f32_e32 v2, s7, v4 ; 3E040807 v_mac_f32_e32 v3, s8, v4 ; 3E060808 v_mac_f32_e32 v0, s18, v6 ; 3E000C12 v_mac_f32_e32 v1, s2, v6 ; 3E020C02 v_mac_f32_e32 v2, s1, v6 ; 3E040C01 v_mac_f32_e32 v3, s12, v6 ; 3E060C0C v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v2, s14, v2 ; 0604040E v_add_f32_e32 v3, s15, v3 ; 0606060F v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v23, v24, v25, v26 ; F800024F 1A191817 exp 15, 37, 0, 0, 0, v7, v7, v7, v7 ; F800025F 07070707 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v7, v35, v35, v35 ; F80008DF 23232307 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 48 Code Size: 1448 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], SHADOW2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..8], LOCAL IMM[0] UINT32 {0, 16, 4, 200} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000} IMM[2] UINT32 {196, 192, 204, 216} IMM[3] UINT32 {212, 208, 220, 232} IMM[4] UINT32 {228, 224, 236, 248} IMM[5] UINT32 {244, 240, 252, 256} IMM[6] FLT32 { 0.2060, 0.0749, 0.1236, 0.0000} IMM[7] UINT32 {92, 96, 64, 76} IMM[8] UINT32 {80, 0, 0, 0} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[1], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1], TEMP[2], IN[3] 8: MUL TEMP[2].xyz, TEMP[0].zxyy, IN[4].yzxx 9: MAD TEMP[2].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[2].xyzz 10: MOV TEMP[3].xy, IN[0].xyyy 11: TEX TEMP[3].yw, TEMP[3], SAMP[2], 2D 12: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[1].xxxx, IMM[1].yyyy 13: MOV TEMP[4].x, TEMP[3].xxxx 14: MOV TEMP[4].y, -TEMP[3].yyyy 15: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[1][0].xxxx 16: MOV TEMP[5].x, TEMP[4].xxxx 17: MOV TEMP[5].y, TEMP[4].yyyy 18: DP2 TEMP[3].x, TEMP[3].xyyy, TEMP[3].xyyy 19: ADD TEMP[3].x, IMM[1].zzzz, -TEMP[3].xxxx 20: MOV_SAT TEMP[3].x, TEMP[3].xxxx 21: SQRT TEMP[3].x, TEMP[3].xxxx 22: MOV TEMP[5].z, TEMP[3].xxxx 23: DP3 TEMP[3].x, TEMP[5].xyzz, TEMP[5].xyzz 24: RSQ TEMP[3].x, TEMP[3].xxxx 25: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx 26: DP3 TEMP[4].x, IN[4].xyzz, IN[4].xyzz 27: RSQ TEMP[4].x, TEMP[4].xxxx 28: MUL TEMP[4].xyz, IN[4].xyzz, TEMP[4].xxxx 29: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz 30: RSQ TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx 32: MUL TEMP[2].xyz, IN[4].wwww, TEMP[2].xyzz 33: MUL TEMP[2].xyz, TEMP[3].yyyy, TEMP[2].xyzz 34: MAD TEMP[2].xyz, TEMP[3].xxxx, TEMP[4].xyzz, TEMP[2].xyzz 35: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].zzzz, TEMP[2].xyzz 36: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 37: RSQ TEMP[2].x, TEMP[2].xxxx 38: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 39: MUL TEMP[2].x, IN[1].xxxx, CONST[5][12].xxxx 40: MAD TEMP[2].x, IN[1].yyyy, CONST[5][12].yyyy, TEMP[2].xxxx 41: MAD TEMP[2].x, IN[1].zzzz, CONST[5][12].zzzz, TEMP[2].xxxx 42: ADD TEMP[2].x, TEMP[2].xxxx, CONST[5][12].wwww 43: MUL TEMP[3].x, IN[1].xxxx, CONST[5][13].xxxx 44: MAD TEMP[3].x, IN[1].yyyy, CONST[5][13].yyyy, TEMP[3].xxxx 45: MAD TEMP[3].x, IN[1].zzzz, CONST[5][13].zzzz, TEMP[3].xxxx 46: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][13].wwww 47: MOV TEMP[2].y, TEMP[3].xxxx 48: MUL TEMP[3].x, IN[1].xxxx, CONST[5][14].xxxx 49: MAD TEMP[3].x, IN[1].yyyy, CONST[5][14].yyyy, TEMP[3].xxxx 50: MAD TEMP[3].x, IN[1].zzzz, CONST[5][14].zzzz, TEMP[3].xxxx 51: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][14].wwww 52: MOV TEMP[2].z, TEMP[3].xxxx 53: MUL TEMP[3].x, IN[1].xxxx, CONST[5][15].xxxx 54: MAD TEMP[3].x, IN[1].yyyy, CONST[5][15].yyyy, TEMP[3].xxxx 55: MAD TEMP[3].x, IN[1].zzzz, CONST[5][15].zzzz, TEMP[3].xxxx 56: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][15].wwww 57: RCP TEMP[3].xyz, TEMP[3].xxxx 58: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 59: MOV_SAT TEMP[3].x, -TEMP[2].zzzz 60: MOV TEMP[4].x, -CONST[5][16].xxxx 61: MOV TEMP[5].x, TEMP[4].xxxx 62: MOV TEMP[5].y, CONST[5][16].xxxx 63: MOV TEMP[6].x, CONST[5][16].xxxx 64: MOV TEMP[6].y, TEMP[4].xxxx 65: ADD TEMP[7].xy, TEMP[2].xyyy, CONST[5][16].xxxx 66: MOV TEMP[7].xy, TEMP[7].xyyy 67: MOV TEMP[7].z, TEMP[3].xxxx 68: MOV TEMP[7].w, IMM[1].wwww 69: TXL TEMP[7].x, TEMP[7], SAMP[0], SHADOW2D 70: MOV TEMP[7].x, TEMP[7].xxxx 71: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[2].xyyy 72: MOV TEMP[5].xy, TEMP[5].xyyy 73: MOV TEMP[5].z, TEMP[3].xxxx 74: MOV TEMP[5].w, IMM[1].wwww 75: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 76: MOV TEMP[7].y, TEMP[5].xxxx 77: ADD TEMP[5].xy, TEMP[2].xyyy, TEMP[6].xyyy 78: MOV TEMP[5].xy, TEMP[5].xyyy 79: MOV TEMP[5].z, TEMP[3].xxxx 80: MOV TEMP[5].w, IMM[1].wwww 81: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 82: MOV TEMP[7].z, TEMP[5].xxxx 83: ADD TEMP[5].xy, TEMP[2].xyyy, TEMP[4].xxxx 84: MOV TEMP[5].xy, TEMP[5].xyyy 85: MOV TEMP[5].z, TEMP[3].xxxx 86: MOV TEMP[5].w, IMM[1].wwww 87: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 88: MOV TEMP[7].w, TEMP[5].xxxx 89: MOV TEMP[5].y, IMM[1].wwww 90: MOV TEMP[5].x, CONST[5][16].xxxx 91: MOV TEMP[6].y, IMM[1].wwww 92: MOV TEMP[6].x, TEMP[4].xxxx 93: MOV TEMP[8].x, IMM[1].wwww 94: MOV TEMP[8].y, TEMP[4].xxxx 95: MOV TEMP[4].x, IMM[1].wwww 96: MOV TEMP[4].y, CONST[5][16].xxxx 97: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[2].xyyy 98: MOV TEMP[5].xy, TEMP[5].xyyy 99: MOV TEMP[5].z, TEMP[3].xxxx 100: MOV TEMP[5].w, IMM[1].wwww 101: TXL TEMP[5].x, TEMP[5], SAMP[0], SHADOW2D 102: MOV TEMP[5].x, TEMP[5].xxxx 103: ADD TEMP[6].xy, TEMP[6].xyyy, TEMP[2].xyyy 104: MOV TEMP[6].xy, TEMP[6].xyyy 105: MOV TEMP[6].z, TEMP[3].xxxx 106: MOV TEMP[6].w, IMM[1].wwww 107: TXL TEMP[6].x, TEMP[6], SAMP[0], SHADOW2D 108: MOV TEMP[5].y, TEMP[6].xxxx 109: ADD TEMP[6].xy, TEMP[8].xyyy, TEMP[2].xyyy 110: MOV TEMP[6].xy, TEMP[6].xyyy 111: MOV TEMP[6].z, TEMP[3].xxxx 112: MOV TEMP[6].w, IMM[1].wwww 113: TXL TEMP[6].x, TEMP[6], SAMP[0], SHADOW2D 114: MOV TEMP[5].z, TEMP[6].xxxx 115: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[2].xyyy 116: MOV TEMP[4].xy, TEMP[4].xyyy 117: MOV TEMP[4].z, TEMP[3].xxxx 118: MOV TEMP[4].w, IMM[1].wwww 119: TXL TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D 120: MOV TEMP[5].w, TEMP[4].xxxx 121: MOV TEMP[2].xy, TEMP[2].xyyy 122: MOV TEMP[2].z, TEMP[3].xxxx 123: MOV TEMP[2].w, IMM[1].wwww 124: TXL TEMP[2].x, TEMP[2], SAMP[0], SHADOW2D 125: DP4 TEMP[3].x, TEMP[7], IMM[6].yyyy 126: DP4 TEMP[4].x, TEMP[5], IMM[6].zzzz 127: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 128: MAD TEMP[2].x, TEMP[2].xxxx, IMM[6].xxxx, TEMP[3].xxxx 129: DP3 TEMP[3].x, -CONST[5][0].xyzz, TEMP[0].xyzz 130: MOV_SAT TEMP[3].x, TEMP[3].xxxx 131: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 132: ADD TEMP[3].x, IMM[1].zzzz, -TEMP[2].xxxx 133: MUL TEMP[4].xyz, CONST[5][5].wwww, CONST[5][6].xyzz 134: DP3 TEMP[0].x, CONST[5][4].xyzz, TEMP[0].xyzz 135: MOV_SAT TEMP[0].x, TEMP[0].xxxx 136: MUL TEMP[5].xyz, CONST[5][4].wwww, CONST[5][5].xyzz 137: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][1].xyzz 138: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[5].xyzz, TEMP[2].xyzz 139: MAD TEMP[0].xyz, TEMP[3].xxxx, TEMP[4].xyzz, TEMP[0].xyzz 140: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 141: MOV TEMP[2].x, TEMP[0].xxxx 142: MOV TEMP[2].y, TEMP[0].yyyy 143: MOV TEMP[2].z, TEMP[0].zzzz 144: MUL TEMP[0].x, IN[3].wwww, TEMP[1].wwww 145: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][0].yyyy 146: MOV TEMP[2].w, TEMP[0].xxxx 147: MOV OUT[0], TEMP[2] 148: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.load.const(<16 x i8> %30, i32 0) %32 = call float @llvm.SI.load.const(<16 x i8> %30, i32 4) %33 = call float @llvm.SI.load.const(<16 x i8> %30, i32 8) %34 = call float @llvm.SI.load.const(<16 x i8> %30, i32 16) %35 = call float @llvm.SI.load.const(<16 x i8> %30, i32 20) %36 = call float @llvm.SI.load.const(<16 x i8> %30, i32 24) %37 = call float @llvm.SI.load.const(<16 x i8> %30, i32 64) %38 = call float @llvm.SI.load.const(<16 x i8> %30, i32 68) %39 = call float @llvm.SI.load.const(<16 x i8> %30, i32 72) %40 = call float @llvm.SI.load.const(<16 x i8> %30, i32 76) %41 = call float @llvm.SI.load.const(<16 x i8> %30, i32 80) %42 = call float @llvm.SI.load.const(<16 x i8> %30, i32 84) %43 = call float @llvm.SI.load.const(<16 x i8> %30, i32 88) %44 = call float @llvm.SI.load.const(<16 x i8> %30, i32 92) %45 = call float @llvm.SI.load.const(<16 x i8> %30, i32 96) %46 = call float @llvm.SI.load.const(<16 x i8> %30, i32 100) %47 = call float @llvm.SI.load.const(<16 x i8> %30, i32 104) %48 = call float @llvm.SI.load.const(<16 x i8> %30, i32 192) %49 = call float @llvm.SI.load.const(<16 x i8> %30, i32 196) %50 = call float @llvm.SI.load.const(<16 x i8> %30, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %30, i32 204) %52 = call float @llvm.SI.load.const(<16 x i8> %30, i32 208) %53 = call float @llvm.SI.load.const(<16 x i8> %30, i32 212) %54 = call float @llvm.SI.load.const(<16 x i8> %30, i32 216) %55 = call float @llvm.SI.load.const(<16 x i8> %30, i32 220) %56 = call float @llvm.SI.load.const(<16 x i8> %30, i32 224) %57 = call float @llvm.SI.load.const(<16 x i8> %30, i32 228) %58 = call float @llvm.SI.load.const(<16 x i8> %30, i32 232) %59 = call float @llvm.SI.load.const(<16 x i8> %30, i32 236) %60 = call float @llvm.SI.load.const(<16 x i8> %30, i32 240) %61 = call float @llvm.SI.load.const(<16 x i8> %30, i32 244) %62 = call float @llvm.SI.load.const(<16 x i8> %30, i32 248) %63 = call float @llvm.SI.load.const(<16 x i8> %30, i32 252) %64 = call float @llvm.SI.load.const(<16 x i8> %30, i32 256) %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %66 = load <8 x i32>, <8 x i32> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %68 = load <4 x i32>, <4 x i32> addrspace(2)* %67, align 16, !tbaa !0 %69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)* %71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0 %72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)* %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %76 = bitcast <8 x i32> addrspace(2)* %75 to <32 x i8> addrspace(2)* %77 = load <32 x i8>, <32 x i8> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %79 = bitcast <4 x i32> addrspace(2)* %78 to <16 x i8> addrspace(2)* %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0 %81 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %97 = fmul float %86, %86 %98 = fmul float %87, %87 %99 = fadd float %98, %97 %100 = fmul float %88, %88 %101 = fadd float %99, %100 %102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101) %103 = fmul float %86, %102 %104 = fmul float %87, %102 %105 = fmul float %88, %102 %106 = bitcast float %81 to i32 %107 = bitcast float %82 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %71, <16 x i8> %74, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %26, %111 %116 = fmul float %27, %112 %117 = fmul float %28, %113 %118 = fmul float %115, %89 %119 = fmul float %116, %90 %120 = fmul float %117, %91 %121 = fmul float %114, %92 %122 = fmul float %105, %94 %123 = fmul float %103, %95 %124 = fmul float %104, %93 %125 = fmul float %104, %95 %126 = fsub float %125, %122 %127 = fmul float %105, %93 %128 = fsub float %127, %123 %129 = fmul float %103, %94 %130 = fsub float %129, %124 %131 = bitcast float %81 to i32 %132 = bitcast float %82 to i32 %133 = insertelement <2 x i32> undef, i32 %131, i32 0 %134 = insertelement <2 x i32> %133, i32 %132, i32 1 %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %77, <16 x i8> %80, i32 2) %136 = extractelement <4 x float> %135, i32 1 %137 = extractelement <4 x float> %135, i32 3 %138 = fmul float %137, 2.000000e+00 %139 = fadd float %138, -1.000000e+00 %140 = fmul float %136, 2.000000e+00 %141 = fadd float %140, -1.000000e+00 %142 = fmul float %139, %24 %143 = fmul float %141, %24 %144 = fmul float %139, %139 %145 = fmul float %141, %141 %146 = fadd float %144, %145 %147 = fsub float 1.000000e+00, %146 %148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00) %149 = call float @llvm.sqrt.f32(float %148) %150 = fmul float %142, %142 %151 = fmul float %143, %143 %152 = fadd float %151, %150 %153 = fmul float %149, %149 %154 = fadd float %152, %153 %155 = call float @llvm.AMDGPU.rsq.clamped.f32(float %154) %156 = fmul float %142, %155 %157 = fmul float %143, %155 %158 = fsub float -0.000000e+00, %157 %159 = fmul float %149, %155 %160 = fmul float %93, %93 %161 = fmul float %94, %94 %162 = fadd float %161, %160 %163 = fmul float %95, %95 %164 = fadd float %162, %163 %165 = call float @llvm.AMDGPU.rsq.clamped.f32(float %164) %166 = fmul float %93, %165 %167 = fmul float %94, %165 %168 = fmul float %95, %165 %169 = fmul float %126, %126 %170 = fmul float %128, %128 %171 = fadd float %170, %169 %172 = fmul float %130, %130 %173 = fadd float %171, %172 %174 = call float @llvm.AMDGPU.rsq.clamped.f32(float %173) %175 = fmul float %126, %174 %176 = fmul float %128, %174 %177 = fmul float %130, %174 %178 = fmul float %96, %175 %179 = fmul float %96, %176 %180 = fmul float %96, %177 %181 = fmul float %178, %158 %182 = fmul float %179, %158 %183 = fmul float %180, %158 %184 = fmul float %156, %166 %185 = fadd float %184, %181 %186 = fmul float %156, %167 %187 = fadd float %186, %182 %188 = fmul float %156, %168 %189 = fadd float %188, %183 %190 = fmul float %103, %159 %191 = fadd float %190, %185 %192 = fmul float %104, %159 %193 = fadd float %192, %187 %194 = fmul float %105, %159 %195 = fadd float %194, %189 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %197, %196 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = fmul float %191, %201 %203 = fmul float %193, %201 %204 = fmul float %195, %201 %205 = fmul float %83, %48 %206 = fmul float %84, %49 %207 = fadd float %206, %205 %208 = fmul float %85, %50 %209 = fadd float %208, %207 %210 = fadd float %209, %51 %211 = fmul float %83, %52 %212 = fmul float %84, %53 %213 = fadd float %212, %211 %214 = fmul float %85, %54 %215 = fadd float %214, %213 %216 = fadd float %215, %55 %217 = fmul float %83, %56 %218 = fmul float %84, %57 %219 = fadd float %218, %217 %220 = fmul float %85, %58 %221 = fadd float %220, %219 %222 = fadd float %221, %59 %223 = fmul float %83, %60 %224 = fmul float %84, %61 %225 = fadd float %224, %223 %226 = fmul float %85, %62 %227 = fadd float %226, %225 %228 = fadd float %227, %63 %229 = fdiv float 1.000000e+00, %228 %230 = fmul float %210, %229 %231 = fmul float %216, %229 %232 = fmul float %222, %229 %233 = fsub float -0.000000e+00, %232 %234 = call float @llvm.AMDIL.clamp.(float %233, float 0.000000e+00, float 1.000000e+00) %235 = fadd float %230, %64 %236 = fadd float %231, %64 %237 = bitcast float %234 to i32 %238 = bitcast float %235 to i32 %239 = bitcast float %236 to i32 %240 = insertelement <4 x i32> undef, i32 %237, i32 0 %241 = insertelement <4 x i32> %240, i32 %238, i32 1 %242 = insertelement <4 x i32> %241, i32 %239, i32 2 %243 = insertelement <4 x i32> %242, i32 0, i32 3 %244 = bitcast <8 x i32> %66 to <32 x i8> %245 = bitcast <4 x i32> %68 to <16 x i8> %246 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %243, <32 x i8> %244, <16 x i8> %245, i32 7) %247 = extractelement <4 x float> %246, i32 0 %248 = fsub float %230, %64 %249 = fadd float %64, %231 %250 = bitcast float %234 to i32 %251 = bitcast float %248 to i32 %252 = bitcast float %249 to i32 %253 = insertelement <4 x i32> undef, i32 %250, i32 0 %254 = insertelement <4 x i32> %253, i32 %251, i32 1 %255 = insertelement <4 x i32> %254, i32 %252, i32 2 %256 = insertelement <4 x i32> %255, i32 0, i32 3 %257 = bitcast <8 x i32> %66 to <32 x i8> %258 = bitcast <4 x i32> %68 to <16 x i8> %259 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %256, <32 x i8> %257, <16 x i8> %258, i32 7) %260 = extractelement <4 x float> %259, i32 0 %261 = fadd float %230, %64 %262 = fsub float %231, %64 %263 = bitcast float %234 to i32 %264 = bitcast float %261 to i32 %265 = bitcast float %262 to i32 %266 = insertelement <4 x i32> undef, i32 %263, i32 0 %267 = insertelement <4 x i32> %266, i32 %264, i32 1 %268 = insertelement <4 x i32> %267, i32 %265, i32 2 %269 = insertelement <4 x i32> %268, i32 0, i32 3 %270 = bitcast <8 x i32> %66 to <32 x i8> %271 = bitcast <4 x i32> %68 to <16 x i8> %272 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %269, <32 x i8> %270, <16 x i8> %271, i32 7) %273 = extractelement <4 x float> %272, i32 0 %274 = fsub float %230, %64 %275 = fsub float %231, %64 %276 = bitcast float %234 to i32 %277 = bitcast float %274 to i32 %278 = bitcast float %275 to i32 %279 = insertelement <4 x i32> undef, i32 %276, i32 0 %280 = insertelement <4 x i32> %279, i32 %277, i32 1 %281 = insertelement <4 x i32> %280, i32 %278, i32 2 %282 = insertelement <4 x i32> %281, i32 0, i32 3 %283 = bitcast <8 x i32> %66 to <32 x i8> %284 = bitcast <4 x i32> %68 to <16 x i8> %285 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %282, <32 x i8> %283, <16 x i8> %284, i32 7) %286 = extractelement <4 x float> %285, i32 0 %287 = fadd float %64, %230 %288 = fadd float %231, 0.000000e+00 %289 = bitcast float %234 to i32 %290 = bitcast float %287 to i32 %291 = bitcast float %288 to i32 %292 = insertelement <4 x i32> undef, i32 %289, i32 0 %293 = insertelement <4 x i32> %292, i32 %290, i32 1 %294 = insertelement <4 x i32> %293, i32 %291, i32 2 %295 = insertelement <4 x i32> %294, i32 0, i32 3 %296 = bitcast <8 x i32> %66 to <32 x i8> %297 = bitcast <4 x i32> %68 to <16 x i8> %298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 7) %299 = extractelement <4 x float> %298, i32 0 %300 = fsub float %230, %64 %301 = fadd float %231, 0.000000e+00 %302 = bitcast float %234 to i32 %303 = bitcast float %300 to i32 %304 = bitcast float %301 to i32 %305 = insertelement <4 x i32> undef, i32 %302, i32 0 %306 = insertelement <4 x i32> %305, i32 %303, i32 1 %307 = insertelement <4 x i32> %306, i32 %304, i32 2 %308 = insertelement <4 x i32> %307, i32 0, i32 3 %309 = bitcast <8 x i32> %66 to <32 x i8> %310 = bitcast <4 x i32> %68 to <16 x i8> %311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %308, <32 x i8> %309, <16 x i8> %310, i32 7) %312 = extractelement <4 x float> %311, i32 0 %313 = fadd float %230, 0.000000e+00 %314 = fsub float %231, %64 %315 = bitcast float %234 to i32 %316 = bitcast float %313 to i32 %317 = bitcast float %314 to i32 %318 = insertelement <4 x i32> undef, i32 %315, i32 0 %319 = insertelement <4 x i32> %318, i32 %316, i32 1 %320 = insertelement <4 x i32> %319, i32 %317, i32 2 %321 = insertelement <4 x i32> %320, i32 0, i32 3 %322 = bitcast <8 x i32> %66 to <32 x i8> %323 = bitcast <4 x i32> %68 to <16 x i8> %324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %321, <32 x i8> %322, <16 x i8> %323, i32 7) %325 = extractelement <4 x float> %324, i32 0 %326 = fadd float %230, 0.000000e+00 %327 = fadd float %64, %231 %328 = bitcast float %234 to i32 %329 = bitcast float %326 to i32 %330 = bitcast float %327 to i32 %331 = insertelement <4 x i32> undef, i32 %328, i32 0 %332 = insertelement <4 x i32> %331, i32 %329, i32 1 %333 = insertelement <4 x i32> %332, i32 %330, i32 2 %334 = insertelement <4 x i32> %333, i32 0, i32 3 %335 = bitcast <8 x i32> %66 to <32 x i8> %336 = bitcast <4 x i32> %68 to <16 x i8> %337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %334, <32 x i8> %335, <16 x i8> %336, i32 7) %338 = extractelement <4 x float> %337, i32 0 %339 = bitcast float %234 to i32 %340 = bitcast float %230 to i32 %341 = bitcast float %231 to i32 %342 = insertelement <4 x i32> undef, i32 %339, i32 0 %343 = insertelement <4 x i32> %342, i32 %340, i32 1 %344 = insertelement <4 x i32> %343, i32 %341, i32 2 %345 = insertelement <4 x i32> %344, i32 0, i32 3 %346 = bitcast <8 x i32> %66 to <32 x i8> %347 = bitcast <4 x i32> %68 to <16 x i8> %348 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %345, <32 x i8> %346, <16 x i8> %347, i32 7) %349 = extractelement <4 x float> %348, i32 0 %350 = fmul float %247, 0x3FB32D10E0000000 %351 = fmul float %260, 0x3FB32D10E0000000 %352 = fadd float %350, %351 %353 = fmul float %273, 0x3FB32D10E0000000 %354 = fadd float %352, %353 %355 = fmul float %286, 0x3FB32D10E0000000 %356 = fadd float %354, %355 %357 = fmul float %299, 0x3FBFA3FCC0000000 %358 = fmul float %312, 0x3FBFA3FCC0000000 %359 = fadd float %357, %358 %360 = fmul float %325, 0x3FBFA3FCC0000000 %361 = fadd float %359, %360 %362 = fmul float %338, 0x3FBFA3FCC0000000 %363 = fadd float %361, %362 %364 = fadd float %356, %363 %365 = fmul float %349, 0x3FCA5DFA80000000 %366 = fadd float %365, %364 %367 = fmul float %31, %202 %368 = fsub float -0.000000e+00, %367 %369 = fmul float %32, %203 %370 = fsub float %368, %369 %371 = fmul float %33, %204 %372 = fsub float %370, %371 %373 = call float @llvm.AMDIL.clamp.(float %372, float 0.000000e+00, float 1.000000e+00) %374 = fmul float %366, %373 %375 = fsub float 1.000000e+00, %374 %376 = fmul float %44, %45 %377 = fmul float %44, %46 %378 = fmul float %44, %47 %379 = fmul float %37, %202 %380 = fmul float %38, %203 %381 = fadd float %380, %379 %382 = fmul float %39, %204 %383 = fadd float %381, %382 %384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) %385 = fmul float %40, %41 %386 = fmul float %40, %42 %387 = fmul float %40, %43 %388 = fmul float %374, %34 %389 = fmul float %374, %35 %390 = fmul float %374, %36 %391 = fmul float %384, %385 %392 = fadd float %391, %388 %393 = fmul float %384, %386 %394 = fadd float %393, %389 %395 = fmul float %384, %387 %396 = fadd float %395, %390 %397 = fmul float %375, %376 %398 = fadd float %397, %392 %399 = fmul float %375, %377 %400 = fadd float %399, %394 %401 = fmul float %375, %378 %402 = fadd float %401, %396 %403 = fmul float %118, %398 %404 = fmul float %119, %400 %405 = fmul float %120, %402 %406 = fmul float %92, %121 %407 = fmul float %406, %25 %408 = call i32 @llvm.SI.packf16(float %403, float %404) %409 = bitcast i32 %408 to float %410 = call i32 @llvm.SI.packf16(float %405, float %407) %411 = bitcast i32 %410 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %409, float %411, float %409, float %411) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v2, 0x3d996887 ; 7E0402FF 3D996887 v_mov_b32_e32 v3, 0x3dfd1fe6 ; 7E0602FF 3DFD1FE6 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800 v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801 v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900 v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901 v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00 v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00 v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01 v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00 v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01 v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00 v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01 v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000 v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001 v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100 v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101 v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200 v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x4 ; C2021504 s_buffer_load_dword s6, s[0:3], 0x33 ; C2030133 s_buffer_load_dword s5, s[0:3], 0x34 ; C2028134 s_buffer_load_dword s7, s[0:3], 0x35 ; C2038135 s_buffer_load_dword s48, s[0:3], 0x36 ; C2180136 s_buffer_load_dword s49, s[0:3], 0x37 ; C2188137 s_buffer_load_dword s50, s[0:3], 0x38 ; C2190138 s_buffer_load_dword s51, s[0:3], 0x39 ; C2198139 s_buffer_load_dword s52, s[0:3], 0x3a ; C21A013A s_buffer_load_dword s53, s[0:3], 0x3b ; C21A813B s_buffer_load_dword s54, s[0:3], 0x3c ; C21B013C s_buffer_load_dword s55, s[0:3], 0x3d ; C21B813D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s5, v6 ; 10020C05 v_mac_f32_e32 v1, s7, v7 ; 3E020E07 v_mac_f32_e32 v1, s48, v8 ; 3E021030 v_add_f32_e32 v1, s49, v1 ; 06020231 v_mul_f32_e32 v19, s50, v6 ; 10260C32 v_mac_f32_e32 v19, s51, v7 ; 3E260E33 v_mac_f32_e32 v19, s52, v8 ; 3E261034 v_add_f32_e32 v19, s53, v19 ; 06262635 v_mul_f32_e32 v20, s54, v6 ; 10280C36 s_buffer_load_dword s7, s[0:3], 0x3e ; C203813E s_buffer_load_dword s48, s[0:3], 0x3f ; C218013F s_buffer_load_dword s5, s[0:3], 0x40 ; C2028140 v_mac_f32_e32 v20, s55, v7 ; 3E280E37 s_buffer_load_dword s49, s[0:3], 0x30 ; C2188130 s_buffer_load_dword s50, s[0:3], 0x31 ; C2190131 s_buffer_load_dword s51, s[0:3], 0x32 ; C2198132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s49, v6 ; 100C0C31 v_mac_f32_e32 v20, s7, v8 ; 3E281007 v_add_f32_e32 v20, s48, v20 ; 06282830 v_rcp_f32_e32 v20, v20 ; 7E285514 v_mac_f32_e32 v6, s50, v7 ; 3E0C0E32 v_mac_f32_e32 v6, s51, v8 ; 3E0C1033 v_add_f32_e32 v6, s6, v6 ; 060C0C06 v_mul_f32_e32 v7, v20, v19 ; 100E2714 v_add_f32_e64 v21, 0, -v7 clamp ; D2060815 40020E80 v_mad_f32 v22, v6, v20, s5 ; D2820016 00162906 v_mad_f32 v23, v1, v20, s5 ; D2820017 00162901 v_mov_b32_e32 v24, 0 ; 7E300280 v_mad_f32 v7, v6, v20, -s5 ; D2820007 80162906 v_mov_b32_e32 v25, v21 ; 7E320315 v_mov_b32_e32 v26, v22 ; 7E340316 v_mov_b32_e32 v27, v23 ; 7E360317 v_mov_b32_e32 v28, v24 ; 7E380318 v_mad_f32 v8, v1, v20, -s5 ; D2820008 80162901 v_mov_b32_e32 v26, v7 ; 7E340307 v_mov_b32_e32 v29, v21 ; 7E3A0315 v_mov_b32_e32 v30, v22 ; 7E3C0316 v_mov_b32_e32 v31, v23 ; 7E3E0317 v_mov_b32_e32 v32, v24 ; 7E400318 image_sample v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[40:47], s[36:39] ; F0800F00 012A2104 v_mov_b32_e32 v27, v23 ; 7E360317 v_mov_b32_e32 v31, v8 ; 7E3E0308 s_buffer_load_dword s5, s[20:23], 0x5 ; C2029505 s_buffer_load_dword s6, s[20:23], 0x6 ; C2031506 image_sample v[4:5], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[24:27] ; F0800A00 00C70404 v_mov_b32_e32 v28, v24 ; 7E380318 v_mov_b32_e32 v32, v24 ; 7E400318 image_sample_c_l v7, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[12:19], s[8:11] ; F0B00100 00430715 image_sample_c_l v19, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[8:11] ; F0B00100 00431319 image_sample_c_l v29, 1, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[12:19], s[8:11] ; F0B00100 00431D1D v_mul_f32_e32 v1, v20, v1 ; 10020314 v_mov_b32_e32 v27, v8 ; 7E360308 v_mov_b32_e32 v37, v21 ; 7E4A0315 v_mov_b32_e32 v38, v22 ; 7E4C0316 v_mov_b32_e32 v39, v23 ; 7E4E0317 v_mov_b32_e32 v40, v24 ; 7E500318 v_mul_f32_e32 v6, v20, v6 ; 100C0D14 v_mov_b32_e32 v28, v24 ; 7E380318 v_mov_b32_e32 v39, v1 ; 7E4E0301 image_sample_c_l v20, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[8:11] ; F0B00100 00431419 v_mov_b32_e32 v27, v1 ; 7E360301 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v30, s4, v33 ; 103C4204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v31, s5, v34 ; 103E4405 v_mul_f32_e32 v32, s6, v35 ; 10404606 v_mul_f32_e32 v33, v15, v36 ; 1042490F v_mov_b32_e32 v40, v24 ; 7E500318 v_mov_b32_e32 v28, v24 ; 7E380318 v_mov_b32_e32 v22, v6 ; 7E2C0306 v_mul_f32_e32 v6, v9, v9 ; 100C1309 v_mac_f32_e32 v6, v10, v10 ; 3E0C150A v_mac_f32_e32 v6, v11, v11 ; 3E0C170B v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mov_b32_e32 v41, v21 ; 7E520315 v_mov_b32_e32 v42, v22 ; 7E540316 v_mov_b32_e32 v43, v23 ; 7E560317 v_mov_b32_e32 v44, v24 ; 7E580318 image_sample_c_l v34, 1, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[8:11] ; F0B00100 00432225 v_mov_b32_e32 v43, v8 ; 7E560308 image_sample_c_l v8, 1, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[8:11] ; F0B00100 00430819 v_mov_b32_e32 v44, v24 ; 7E580318 image_sample_c_l v25, 1, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[12:19], s[8:11] ; F0B00100 00431929 v_mul_f32_e32 v9, v6, v9 ; 10121306 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mul_f32_e32 v6, v6, v11 ; 100C1706 s_waitcnt vmcnt(7) ; BF8C0777 v_mad_f32 v5, 2.0, v5, -1.0 ; D2820005 03CE0AF4 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mul_f32_e32 v11, v17, v6 ; 10160D11 v_mad_f32 v11, v10, v18, -v11 ; D282000B 842E250A v_mul_f32_e32 v26, v18, v9 ; 10341312 v_mad_f32 v26, v6, v16, -v26 ; D282001A 846A2106 v_mul_f32_e32 v27, v16, v10 ; 10361510 v_mad_f32 v27, v9, v17, -v27 ; D282001B 846E2309 v_mul_f32_e32 v28, v16, v16 ; 10382110 v_mac_f32_e32 v28, v17, v17 ; 3E382311 v_mac_f32_e32 v28, v18, v18 ; 3E382512 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v35, v11, v11 ; 1046170B v_mac_f32_e32 v35, v26, v26 ; 3E46351A v_mac_f32_e32 v35, v27, v27 ; 3E46371B v_rsq_clamp_f32_e32 v35, v35 ; 7E465923 s_buffer_load_dword s4, s[20:23], 0x0 ; C2021500 v_mul_f32_e32 v16, v28, v16 ; 1020211C v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mul_f32_e32 v18, v28, v18 ; 1024251C v_mul_f32_e32 v11, v35, v11 ; 10161723 v_mul_f32_e32 v26, v35, v26 ; 10343523 v_mul_f32_e32 v27, v35, v27 ; 10363723 s_buffer_load_dword s5, s[20:23], 0x1 ; C2029501 v_mad_f32 v28, -v4, v4, 1.0 ; D282001C 23CA0904 v_mad_f32 v28, -v5, v5, v28 ; D282001C 24720B05 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_mul_f32_e32 v4, s4, v4 ; 10080804 v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 v_sqrt_f32_e32 v28, v28 ; 7E38671C v_mul_f32_e32 v35, v5, v5 ; 10460B05 v_mac_f32_e32 v35, v4, v4 ; 3E460904 v_mac_f32_e32 v35, v28, v28 ; 3E46391C v_rsq_clamp_f32_e32 v35, v35 ; 7E465923 v_mul_f32_e32 v11, v11, v0 ; 1016010B v_mul_f32_e32 v26, v26, v0 ; 1034011A v_mul_f32_e32 v0, v27, v0 ; 1000011B v_mul_f32_e32 v4, v35, v4 ; 10080923 v_mul_f32_e32 v11, v4, v11 ; 10161704 v_mul_f32_e32 v26, v4, v26 ; 10343504 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v4, v35, v5 ; 10080B23 v_mad_f32 v5, v4, v16, -v11 ; D2820005 842E2104 v_mad_f32 v11, v4, v17, -v26 ; D282000B 846A2304 v_mad_f32 v0, v4, v18, -v0 ; D2820000 84022504 v_mul_f32_e32 v4, v35, v28 ; 10083923 v_mac_f32_e32 v5, v4, v9 ; 3E0A1304 v_mac_f32_e32 v11, v4, v10 ; 3E161504 v_mac_f32_e32 v0, v4, v6 ; 3E000D04 image_sample_c_l v4, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[12:19], s[8:11] ; F0B00100 00430415 v_mov_b32_e32 v23, v1 ; 7E2E0301 v_mul_f32_e32 v1, v5, v5 ; 10020B05 v_mac_f32_e32 v1, v11, v11 ; 3E02170B v_mac_f32_e32 v1, v0, v0 ; 3E020100 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 image_sample_c_l v6, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[12:19], s[8:11] ; F0B00100 00430615 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v9, v1, v11 ; 10121701 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 v_mul_f32_e32 v0, v1, v0 ; 10000101 s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v5 ; 10020A04 v_mad_f32 v1, -s6, v9, -v1 ; D2820001 A4061206 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 v_mul_f32_e32 v5, s7, v5 ; 100A0A07 v_mac_f32_e32 v5, s8, v9 ; 3E0A1208 s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114 s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115 s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116 s_buffer_load_dword s16, s[0:3], 0x17 ; C2080117 s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118 v_mad_f32 v1, -s9, v0, v1 ; D2820001 24060009 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v5, s4, v0 ; 3E0A0004 v_mul_f32_e32 v0, v2, v19 ; 10002702 v_mac_f32_e32 v0, v2, v7 ; 3E000F02 v_mac_f32_e32 v0, v2, v29 ; 3E003B02 v_mac_f32_e32 v0, v2, v20 ; 3E002902 v_mul_f32_e32 v2, v3, v8 ; 10041103 v_mac_f32_e32 v2, v3, v34 ; 3E044503 v_mac_f32_e32 v2, v3, v25 ; 3E043303 v_mac_f32_e32 v2, v3, v4 ; 3E040903 v_add_f32_e32 v0, v2, v0 ; 06000102 v_madmk_f32_e32 v0, v6, v0, 0x3e52efd4 ; 40000106 3E52EFD4 v_mov_b32_e32 v2, s13 ; 7E04020D v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v3, v1, v0 ; 10060101 v_mul_f32_e32 v4, s10, v3 ; 1008060A v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mac_f32_e32 v4, v2, v5 ; 3E080B02 v_mov_b32_e32 v2, s14 ; 7E04020E v_mul_f32_e32 v6, s11, v3 ; 100C060B v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mac_f32_e32 v6, v2, v5 ; 3E0C0B02 v_mov_b32_e32 v2, s15 ; 7E04020F v_mul_f32_e32 v2, s6, v2 ; 10040406 v_mul_f32_e32 v3, s7, v3 ; 10060607 v_mac_f32_e32 v3, v2, v5 ; 3E060B02 v_mul_f32_e32 v2, v12, v30 ; 10043D0C v_mul_f32_e32 v5, v13, v31 ; 100A3F0D v_mul_f32_e32 v7, v14, v32 ; 100E410E v_mad_f32 v0, -v0, v1, 1.0 ; D2820000 23CA0300 v_mov_b32_e32 v1, s0 ; 7E020200 v_mul_f32_e32 v1, s16, v1 ; 10020210 v_mac_f32_e32 v4, v1, v0 ; 3E080101 v_mov_b32_e32 v1, s8 ; 7E020208 v_mul_f32_e32 v1, s16, v1 ; 10020210 v_mac_f32_e32 v6, v1, v0 ; 3E0C0101 v_mov_b32_e32 v1, s12 ; 7E02020C v_mul_f32_e32 v1, s16, v1 ; 10020210 v_mac_f32_e32 v3, v1, v0 ; 3E060101 v_mul_f32_e32 v0, v4, v2 ; 10000504 v_mul_f32_e32 v1, v6, v5 ; 10020B06 v_mul_f32_e32 v2, v3, v7 ; 10040F03 v_mul_f32_e32 v3, v33, v15 ; 10061F21 v_mul_f32_e32 v3, s5, v3 ; 10060605 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 48 Code Size: 1256 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..13], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 0.0774} IMM[1] UINT32 {4, 96, 80, 112} IMM[2] FLT32 { 0.9479, 0.0521, 2.4000, 0.0404} IMM[3] UINT32 {3, 320, 336, 48} IMM[4] FLT32 { 1.0000, 0.0039, 0.0000, 0.0000} IMM[5] UINT32 {304, 64, 0, 512} IMM[6] UINT32 {528, 544, 560, 516} IMM[7] UINT32 {532, 548, 564, 524} IMM[8] UINT32 {540, 556, 572, 364} IMM[9] UINT32 {372, 520, 536, 552} IMM[10] UINT32 {568, 0, 0, 0} IMM[11] FLT32 { 0.0010, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: MAD TEMP[0], TEMP[3], CONST[5][6].zwzw, CONST[5][6].xyxy 10: MOV TEMP[2].xz, TEMP[0].xxzx 11: MAD TEMP[3].x, IMM[0].yyyy, IN[0].yyyy, IMM[0].yyyy 12: LRP TEMP[3].x, TEMP[3].xxxx, IN[1].yyyy, IN[1].xxxx 13: ADD TEMP[3].x, TEMP[3].xxxx, CONST[5][5].zzzz 14: MUL TEMP[3].x, TEMP[3].xxxx, CONST[5][5].wwww 15: MAD TEMP[3].x, TEMP[3].xxxx, CONST[5][6].wwww, CONST[5][6].yyyy 16: LRP TEMP[0].xy, CONST[5][5].xxxx, TEMP[3].xxxx, TEMP[0].ywww 17: MOV TEMP[2].yw, TEMP[0].yxyy 18: MUL TEMP[0].xyz, IN[2].xyzz, IMM[0].wwww 19: MAD TEMP[3].xyz, IN[2].xyzz, IMM[2].xxxx, IMM[2].yyyy 20: POW TEMP[4].x, TEMP[3].xxxx, IMM[2].zzzz 21: POW TEMP[4].y, TEMP[3].yyyy, IMM[2].zzzz 22: POW TEMP[4].z, TEMP[3].zzzz, IMM[2].zzzz 23: FSLT TEMP[3].x, IMM[2].wwww, IN[2].xxxx 24: UIF TEMP[3].xxxx :0 25: MOV TEMP[3].x, TEMP[4].xxxx 26: ELSE :0 27: MOV TEMP[3].x, TEMP[0].xxxx 28: ENDIF 29: FSLT TEMP[5].x, IMM[2].wwww, IN[2].yyyy 30: UIF TEMP[5].xxxx :0 31: MOV TEMP[5].x, TEMP[4].yyyy 32: ELSE :0 33: MOV TEMP[5].x, TEMP[0].yyyy 34: ENDIF 35: FSLT TEMP[6].x, IMM[2].wwww, IN[2].zzzz 36: UIF TEMP[6].xxxx :0 37: MOV TEMP[4].x, TEMP[4].zzzz 38: ELSE :0 39: MOV TEMP[4].x, TEMP[0].zzzz 40: ENDIF 41: MOV TEMP[0].x, TEMP[3].xxxx 42: MOV TEMP[0].y, TEMP[5].xxxx 43: MOV TEMP[0].z, TEMP[4].xxxx 44: MOV TEMP[0].w, IN[2].wwww 45: LRP TEMP[0], CONST[5][7].xxxx, TEMP[0], IN[2] 46: MUL TEMP[3].xyz, CONST[4][20].zxyy, CONST[4][21].yzxx 47: MAD TEMP[3].xyz, CONST[4][20].yzxx, CONST[4][21].zxyy, -TEMP[3].xyzz 48: COS TEMP[4].x, IN[4].xxxx 49: SIN TEMP[5].x, IN[4].xxxx 50: MUL TEMP[6].xyz, TEMP[4].xxxx, TEMP[3].xyzz 51: MAD TEMP[6].xyz, TEMP[5].xxxx, CONST[4][21].xyzz, TEMP[6].xyzz 52: MUL TEMP[3].xyz, TEMP[5].xxxx, TEMP[3].xyzz 53: MAD TEMP[3].xyz, TEMP[4].xxxx, CONST[4][21].xyzz, -TEMP[3].xyzz 54: SIN TEMP[4].x, IN[4].yyyy 55: COS TEMP[5].x, IN[4].yyyy 56: ADD TEMP[7].x, IMM[4].xxxx, -TEMP[5].xxxx 57: MUL TEMP[8].x, TEMP[3].xxxx, TEMP[3].yyyy 58: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[8].xxxx 59: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[3].zzzz 60: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].zzzz 61: MUL TEMP[10].x, TEMP[7].xxxx, TEMP[10].xxxx 62: MUL TEMP[11].x, TEMP[4].xxxx, TEMP[3].yyyy 63: MUL TEMP[12].x, TEMP[3].yyyy, TEMP[3].zzzz 64: MUL TEMP[7].x, TEMP[12].xxxx, TEMP[7].xxxx 65: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].xxxx 66: MUL TEMP[12].x, TEMP[3].xxxx, TEMP[3].xxxx 67: LRP TEMP[12].x, TEMP[5].xxxx, IMM[4].xxxx, TEMP[12].xxxx 68: ADD TEMP[13].x, TEMP[8].xxxx, -TEMP[9].xxxx 69: MOV TEMP[12].y, TEMP[13].xxxx 70: ADD TEMP[13].x, TEMP[10].xxxx, TEMP[11].xxxx 71: MOV TEMP[12].z, TEMP[13].xxxx 72: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx 73: MUL TEMP[9].x, TEMP[3].yyyy, TEMP[3].yyyy 74: LRP TEMP[9].x, TEMP[5].xxxx, IMM[4].xxxx, TEMP[9].xxxx 75: MOV TEMP[8].y, TEMP[9].xxxx 76: ADD TEMP[9].x, TEMP[7].xxxx, -TEMP[4].xxxx 77: MOV TEMP[8].z, TEMP[9].xxxx 78: ADD TEMP[9].x, TEMP[10].xxxx, -TEMP[11].xxxx 79: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[4].xxxx 80: MOV TEMP[9].y, TEMP[4].xxxx 81: MUL TEMP[4].x, TEMP[3].zzzz, TEMP[3].zzzz 82: LRP TEMP[4].x, TEMP[5].xxxx, IMM[4].xxxx, TEMP[4].xxxx 83: MOV TEMP[9].z, TEMP[4].xxxx 84: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[12].xyzz 85: DP3 TEMP[5].x, TEMP[6].xyzz, TEMP[8].xyzz 86: MOV TEMP[4].y, TEMP[5].xxxx 87: DP3 TEMP[5].x, TEMP[6].xyzz, TEMP[9].xyzz 88: MOV TEMP[4].z, TEMP[5].xxxx 89: ADD TEMP[5].x, IN[0].xxxx, CONST[5][3].yyyy 90: ADD TEMP[6].x, IN[0].yyyy, CONST[5][3].zzzz 91: MUL TEMP[3].xyz, TEMP[6].xxxx, TEMP[3].xyzz 92: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[3].xyzz 93: ADD TEMP[4].xyz, CONST[4][19].xyzz, -IN[3].xyzz 94: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 95: SQRT TEMP[4].x, TEMP[4].xxxx 96: MOV TEMP[5], TEMP[0] 97: MOV TEMP[6].x, IN[4].wwww 98: FSLT TEMP[7].x, IMM[0].xxxx, CONST[5][4].zzzz 99: UIF TEMP[7].xxxx :0 100: MUL TEMP[7].x, TEMP[4].xxxx, CONST[5][0].yyyy 101: FSLT TEMP[8].x, TEMP[7].xxxx, IN[4].wwww 102: ADD TEMP[7].x, IN[4].wwww, -TEMP[7].xxxx 103: ADD TEMP[9].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 104: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[4].xxxx 105: RCP TEMP[9].x, TEMP[9].xxxx 106: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx 107: ADD TEMP[7].x, IMM[4].xxxx, -TEMP[7].xxxx 108: MUL TEMP[7], TEMP[7].xxxx, TEMP[0] 109: MUL TEMP[9].x, TEMP[4].xxxx, CONST[5][0].zzzz 110: FSLT TEMP[9].x, TEMP[9].xxxx, IN[4].wwww 111: UIF TEMP[9].xxxx :0 112: MOV TEMP[9], IMM[0].xxxx 113: ELSE :0 114: MOV TEMP[9], TEMP[7] 115: ENDIF 116: UIF TEMP[8].xxxx :0 117: MOV TEMP[7], TEMP[9] 118: ELSE :0 119: MOV TEMP[7], TEMP[0] 120: ENDIF 121: MOV TEMP[5], TEMP[7] 122: MUL TEMP[0].x, TEMP[4].xxxx, CONST[5][0].xxxx 123: MAX TEMP[0].x, IN[4].wwww, TEMP[0].xxxx 124: MUL TEMP[4].x, TEMP[4].xxxx, CONST[5][0].wwww 125: MIN TEMP[6].x, TEMP[0].xxxx, TEMP[4].xxxx 126: ENDIF 127: FSLT TEMP[0].x, TEMP[5].wwww, IMM[4].yyyy 128: UIF TEMP[0].xxxx :0 129: MOV TEMP[0].x, IMM[0].xxxx 130: ELSE :0 131: MOV TEMP[0].x, TEMP[6].xxxx 132: ENDIF 133: MOV TEMP[4].xy, IMM[0].xxxx 134: MOV TEMP[4].w, IMM[0].xxxx 135: TXL TEMP[4], TEMP[4], SAMP[0], 2D 136: MUL TEMP[4].xyz, TEMP[4], IMM[4].zzzz 137: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[0].xxxx, IN[3].xyzz 138: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 139: MOV TEMP[3].w, IMM[4].xxxx 140: MOV TEMP[3].x, TEMP[0].xxxx 141: MOV TEMP[3].y, TEMP[0].yyyy 142: MOV TEMP[3].z, TEMP[0].zzzz 143: MOV TEMP[4].x, CONST[4][32].xxxx 144: MOV TEMP[4].y, CONST[4][33].xxxx 145: MOV TEMP[4].z, CONST[4][34].xxxx 146: MOV TEMP[4].w, CONST[4][35].xxxx 147: DP4 TEMP[4].x, TEMP[3], TEMP[4] 148: MOV TEMP[6].x, CONST[4][32].yyyy 149: MOV TEMP[6].y, CONST[4][33].yyyy 150: MOV TEMP[6].z, CONST[4][34].yyyy 151: MOV TEMP[6].w, CONST[4][35].yyyy 152: DP4 TEMP[6].x, TEMP[3], TEMP[6] 153: MOV TEMP[7].x, CONST[4][32].wwww 154: MOV TEMP[7].y, CONST[4][33].wwww 155: MOV TEMP[7].z, CONST[4][34].wwww 156: MOV TEMP[7].w, CONST[4][35].wwww 157: DP4 TEMP[7].x, TEMP[3], TEMP[7] 158: MAD TEMP[8].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 159: MOV TEMP[9].w, IMM[4].xxxx 160: MOV TEMP[9].x, TEMP[8].xxxx 161: MOV TEMP[9].y, TEMP[8].yyyy 162: MOV TEMP[9].z, TEMP[8].zzzz 163: MOV TEMP[8].xyz, -CONST[4][19].xyzx 164: ADD TEMP[10].xyz, TEMP[0].xyzz, TEMP[8].xyzz 165: MOV TEMP[11].x, TEMP[0].xxxx 166: MOV TEMP[11].y, TEMP[0].yyyy 167: MOV TEMP[11].z, TEMP[0].zzzz 168: DP3 TEMP[12].x, CONST[4][20].xyzz, TEMP[10].xyzz 169: MOV TEMP[11].w, TEMP[12].xxxx 170: MOV TEMP[12].x, TEMP[4].xxxx 171: MOV TEMP[12].y, TEMP[6].xxxx 172: MOV TEMP[13].x, -CONST[4][22].wwww 173: DP3 TEMP[10].x, TEMP[10].xyzz, CONST[4][20].xyzz 174: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 175: ADD TEMP[13].x, CONST[4][23].yyyy, TEMP[13].xxxx 176: RCP TEMP[13].x, TEMP[13].xxxx 177: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx 178: MOV TEMP[12].z, TEMP[10].xxxx 179: MOV TEMP[12].w, TEMP[7].xxxx 180: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xyzz 181: MOV TEMP[0].xyz, -TEMP[0].xyzx 182: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[0].xyzz 183: RSQ TEMP[8].x, TEMP[8].xxxx 184: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].xxxx 185: MOV TEMP[4].x, TEMP[4].xxxx 186: MOV TEMP[4].y, -TEMP[6].xxxx 187: MOV TEMP[6].x, CONST[4][32].zzzz 188: MOV TEMP[6].y, CONST[4][33].zzzz 189: MOV TEMP[6].z, CONST[4][34].zzzz 190: MOV TEMP[6].w, CONST[4][35].zzzz 191: MOV TEMP[8].x, CONST[4][32].wwww 192: MOV TEMP[8].y, CONST[4][33].wwww 193: MOV TEMP[8].z, CONST[4][34].wwww 194: MOV TEMP[8].w, CONST[4][35].wwww 195: MOV TEMP[10].x, CONST[4][32].zzzz 196: MOV TEMP[10].y, CONST[4][33].zzzz 197: MOV TEMP[10].z, CONST[4][34].zzzz 198: MOV TEMP[10].w, CONST[4][35].zzzz 199: DP4 TEMP[6].x, TEMP[9], TEMP[6] 200: DP4 TEMP[8].x, TEMP[9], TEMP[8] 201: RCP TEMP[8].x, TEMP[8].xxxx 202: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 203: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 204: DP4 TEMP[3].x, TEMP[3], TEMP[10] 205: MIN TEMP[3].x, IMM[11].xxxx, TEMP[3].xxxx 206: MAX TEMP[3].x, TEMP[6].xxxx, TEMP[3].xxxx 207: MAD TEMP[3].x, TEMP[3].xxxx, IMM[11].yyyy, -TEMP[7].xxxx 208: MOV TEMP[4].z, TEMP[3].xxxx 209: MOV TEMP[4].w, TEMP[7].xxxx 210: MOV OUT[1], TEMP[1] 211: MOV OUT[6].xyz, TEMP[0].xyzx 212: MOV OUT[2], TEMP[2] 213: MOV OUT[3], TEMP[5] 214: MOV OUT[4], TEMP[12] 215: MOV OUT[0], TEMP[4] 216: MOV OUT[5], TEMP[11] 217: MOV OUT[7].xy, IMM[4].xwxx 218: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 512) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 516) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 520) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 524) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 528) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 532) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 536) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 540) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 544) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 548) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 552) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 556) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 560) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 564) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 568) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 572) %40 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = call float @llvm.SI.load.const(<16 x i8> %41, i32 4) %43 = call float @llvm.SI.load.const(<16 x i8> %41, i32 8) %44 = call float @llvm.SI.load.const(<16 x i8> %41, i32 48) %45 = call float @llvm.SI.load.const(<16 x i8> %41, i32 52) %46 = call float @llvm.SI.load.const(<16 x i8> %41, i32 56) %47 = call float @llvm.SI.load.const(<16 x i8> %41, i32 72) %48 = call float @llvm.SI.load.const(<16 x i8> %41, i32 80) %49 = call float @llvm.SI.load.const(<16 x i8> %41, i32 88) %50 = call float @llvm.SI.load.const(<16 x i8> %41, i32 92) %51 = call float @llvm.SI.load.const(<16 x i8> %41, i32 96) %52 = call float @llvm.SI.load.const(<16 x i8> %41, i32 100) %53 = call float @llvm.SI.load.const(<16 x i8> %41, i32 104) %54 = call float @llvm.SI.load.const(<16 x i8> %41, i32 108) %55 = call float @llvm.SI.load.const(<16 x i8> %41, i32 112) %56 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %10, %6 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %10, %6 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %10, %6 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %10, %6 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 3 %94 = fmul float %64, 5.000000e-01 %95 = fadd float %94, 5.000000e-01 %96 = fmul float %65, -5.000000e-01 %97 = fadd float %96, 5.000000e-01 %98 = fmul float %95, %53 %99 = fadd float %98, %51 %100 = fmul float %97, %54 %101 = fadd float %100, %52 %102 = fmul float %95, %53 %103 = fadd float %102, %51 %104 = fmul float %97, %54 %105 = fadd float %104, %52 %106 = fmul float %65, 5.000000e-01 %107 = fadd float %106, 5.000000e-01 %108 = call float @llvm.AMDGPU.lrp(float %107, float %71, float %70) %109 = fadd float %108, %49 %110 = fmul float %109, %50 %111 = fmul float %110, %54 %112 = fadd float %111, %52 %113 = call float @llvm.AMDGPU.lrp(float %48, float %112, float %101) %114 = call float @llvm.AMDGPU.lrp(float %48, float %112, float %105) %115 = fmul float %76, 0x3FB3D07220000000 %116 = fmul float %77, 0x3FB3D07220000000 %117 = fmul float %78, 0x3FB3D07220000000 %118 = fmul float %76, 0x3FEE54EDE0000000 %119 = fadd float %118, 0x3FAAB12320000000 %120 = fmul float %77, 0x3FEE54EDE0000000 %121 = fadd float %120, 0x3FAAB12320000000 %122 = fmul float %78, 0x3FEE54EDE0000000 %123 = fadd float %122, 0x3FAAB12320000000 %124 = call float @llvm.pow.f32(float %119, float 0x4003333340000000) %125 = call float @llvm.pow.f32(float %121, float 0x4003333340000000) %126 = call float @llvm.pow.f32(float %123, float 0x4003333340000000) %127 = fcmp ogt float %76, 0x3FA4B5DCC0000000 %. = select i1 %127, float %124, float %115 %128 = fcmp ogt float %77, 0x3FA4B5DCC0000000 %temp20.0 = select i1 %128, float %125, float %116 %129 = fcmp ogt float %78, 0x3FA4B5DCC0000000 %.74 = select i1 %129, float %126, float %117 %130 = call float @llvm.AMDGPU.lrp(float %55, float %., float %76) %131 = call float @llvm.AMDGPU.lrp(float %55, float %temp20.0, float %77) %132 = call float @llvm.AMDGPU.lrp(float %55, float %.74, float %78) %133 = call float @llvm.AMDGPU.lrp(float %55, float %79, float %79) %134 = fmul float %18, %20 %135 = fmul float %16, %21 %136 = fmul float %17, %19 %137 = fmul float %17, %21 %138 = fsub float %137, %134 %139 = fmul float %18, %19 %140 = fsub float %139, %135 %141 = fmul float %16, %20 %142 = fsub float %141, %136 %143 = call float @llvm.cos.f32(float %91) %144 = call float @llvm.sin.f32(float %91) %145 = fmul float %143, %138 %146 = fmul float %143, %140 %147 = fmul float %143, %142 %148 = fmul float %144, %19 %149 = fadd float %148, %145 %150 = fmul float %144, %20 %151 = fadd float %150, %146 %152 = fmul float %144, %21 %153 = fadd float %152, %147 %154 = fmul float %144, %138 %155 = fmul float %144, %140 %156 = fmul float %144, %142 %157 = fmul float %143, %19 %158 = fsub float %157, %154 %159 = fmul float %143, %20 %160 = fsub float %159, %155 %161 = fmul float %143, %21 %162 = fsub float %161, %156 %163 = call float @llvm.sin.f32(float %92) %164 = call float @llvm.cos.f32(float %92) %165 = fsub float 1.000000e+00, %164 %166 = fmul float %158, %160 %167 = fmul float %165, %166 %168 = fmul float %163, %162 %169 = fmul float %158, %162 %170 = fmul float %165, %169 %171 = fmul float %163, %160 %172 = fmul float %160, %162 %173 = fmul float %172, %165 %174 = fmul float %163, %158 %175 = fmul float %158, %158 %176 = call float @llvm.AMDGPU.lrp(float %164, float 1.000000e+00, float %175) %177 = fsub float %167, %168 %178 = fadd float %170, %171 %179 = fadd float %167, %168 %180 = fmul float %160, %160 %181 = call float @llvm.AMDGPU.lrp(float %164, float 1.000000e+00, float %180) %182 = fsub float %173, %174 %183 = fsub float %170, %171 %184 = fadd float %173, %174 %185 = fmul float %162, %162 %186 = call float @llvm.AMDGPU.lrp(float %164, float 1.000000e+00, float %185) %187 = fmul float %149, %176 %188 = fmul float %151, %177 %189 = fadd float %188, %187 %190 = fmul float %153, %178 %191 = fadd float %189, %190 %192 = fmul float %149, %179 %193 = fmul float %151, %181 %194 = fadd float %193, %192 %195 = fmul float %153, %182 %196 = fadd float %194, %195 %197 = fmul float %149, %183 %198 = fmul float %151, %184 %199 = fadd float %198, %197 %200 = fmul float %153, %186 %201 = fadd float %199, %200 %202 = fadd float %64, %45 %203 = fadd float %65, %46 %204 = fmul float %203, %158 %205 = fmul float %203, %160 %206 = fmul float %203, %162 %207 = fmul float %191, %202 %208 = fadd float %207, %204 %209 = fmul float %196, %202 %210 = fadd float %209, %205 %211 = fmul float %201, %202 %212 = fadd float %211, %206 %213 = fsub float %13, %84 %214 = fsub float %14, %85 %215 = fsub float %15, %86 %216 = fmul float %213, %213 %217 = fmul float %214, %214 %218 = fadd float %217, %216 %219 = fmul float %215, %215 %220 = fadd float %218, %219 %221 = call float @llvm.sqrt.f32(float %220) %222 = fcmp ogt float %47, 0.000000e+00 br i1 %222, label %IF63, label %ENDIF62 IF63: ; preds = %main_body %223 = call float @llvm.SI.load.const(<16 x i8> %41, i32 12) %224 = call float @llvm.SI.load.const(<16 x i8> %41, i32 0) %225 = fmul float %221, %42 %226 = fcmp olt float %225, %93 %227 = fsub float %93, %225 %228 = fsub float %43, %42 %229 = fmul float %228, %221 %230 = fdiv float 1.000000e+00, %229 %231 = fmul float %227, %230 %232 = fsub float 1.000000e+00, %231 %233 = fmul float %232, %130 %234 = fmul float %232, %131 %235 = fmul float %232, %132 %236 = fmul float %232, %133 %237 = fmul float %221, %43 %238 = fcmp olt float %237, %93 %.75 = select i1 %238, float 0.000000e+00, float %233 %.76 = select i1 %238, float 0.000000e+00, float %234 %.77 = select i1 %238, float 0.000000e+00, float %235 %.78 = select i1 %238, float 0.000000e+00, float %236 %.75. = select i1 %226, float %.75, float %130 %.76. = select i1 %226, float %.76, float %131 %.77. = select i1 %226, float %.77, float %132 %.78. = select i1 %226, float %.78, float %133 %239 = fmul float %221, %224 %240 = call float @llvm.maxnum.f32(float %93, float %239) %241 = fmul float %221, %223 %242 = call float @llvm.minnum.f32(float %240, float %241) br label %ENDIF62 ENDIF62: ; preds = %main_body, %IF63 %temp20.1 = phi float [ %.75., %IF63 ], [ %130, %main_body ] %temp21.0 = phi float [ %.76., %IF63 ], [ %131, %main_body ] %temp22.0 = phi float [ %.77., %IF63 ], [ %132, %main_body ] %temp23.0 = phi float [ %.78., %IF63 ], [ %133, %main_body ] %temp24.0 = phi float [ %242, %IF63 ], [ %93, %main_body ] %243 = fcmp olt float %temp23.0, 0x3F70101060000000 %.temp24.0 = select i1 %243, float 0.000000e+00, float %temp24.0 %244 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %57, <16 x i8> %59, i32 2) %245 = extractelement <4 x float> %244, i32 0 %246 = extractelement <4 x float> %244, i32 1 %247 = extractelement <4 x float> %244, i32 2 %248 = fmul float %245, 0x3E7AD7F2A0000000 %249 = fmul float %246, 0x3E7AD7F2A0000000 %250 = fmul float %247, 0x3E7AD7F2A0000000 %251 = fmul float %208, %.temp24.0 %252 = fadd float %251, %84 %253 = fmul float %210, %.temp24.0 %254 = fadd float %253, %85 %255 = fmul float %212, %.temp24.0 %256 = fadd float %255, %86 %257 = fadd float %248, %252 %258 = fadd float %249, %254 %259 = fadd float %250, %256 %260 = fmul float %257, %24 %261 = fmul float %258, %28 %262 = fadd float %260, %261 %263 = fmul float %259, %32 %264 = fadd float %262, %263 %265 = fadd float %264, %36 %266 = fmul float %257, %25 %267 = fmul float %258, %29 %268 = fadd float %266, %267 %269 = fmul float %259, %33 %270 = fadd float %268, %269 %271 = fadd float %270, %37 %272 = fmul float %257, %27 %273 = fmul float %258, %31 %274 = fadd float %272, %273 %275 = fmul float %259, %35 %276 = fadd float %274, %275 %277 = fadd float %276, %39 %278 = fmul float %16, %44 %279 = fadd float %278, %257 %280 = fmul float %17, %44 %281 = fadd float %280, %258 %282 = fmul float %18, %44 %283 = fadd float %282, %259 %284 = fsub float %257, %13 %285 = fsub float %258, %14 %286 = fsub float %259, %15 %287 = fmul float %16, %284 %288 = fmul float %17, %285 %289 = fadd float %288, %287 %290 = fmul float %18, %286 %291 = fadd float %289, %290 %292 = fmul float %284, %16 %293 = fmul float %285, %17 %294 = fadd float %293, %292 %295 = fmul float %286, %18 %296 = fadd float %294, %295 %297 = fsub float %296, %22 %298 = fsub float %23, %22 %299 = fdiv float 1.000000e+00, %298 %300 = fmul float %297, %299 %301 = fsub float %257, %13 %302 = fsub float %258, %14 %303 = fsub float %259, %15 %304 = fmul float %301, %301 %305 = fmul float %302, %302 %306 = fadd float %305, %304 %307 = fmul float %303, %303 %308 = fadd float %306, %307 %309 = call float @llvm.AMDGPU.rsq.clamped.f32(float %308) %310 = fmul float %301, %309 %311 = fsub float -0.000000e+00, %310 %312 = fmul float %302, %309 %313 = fsub float -0.000000e+00, %312 %314 = fmul float %303, %309 %315 = fsub float -0.000000e+00, %314 %316 = fsub float -0.000000e+00, %271 %317 = fmul float %279, %26 %318 = fmul float %281, %30 %319 = fadd float %317, %318 %320 = fmul float %283, %34 %321 = fadd float %319, %320 %322 = fadd float %321, %38 %323 = fmul float %279, %27 %324 = fmul float %281, %31 %325 = fadd float %323, %324 %326 = fmul float %283, %35 %327 = fadd float %325, %326 %328 = fadd float %327, %39 %329 = fdiv float 1.000000e+00, %328 %330 = fmul float %322, %329 %331 = fmul float %330, %277 %332 = fmul float %257, %26 %333 = fmul float %258, %30 %334 = fadd float %332, %333 %335 = fmul float %259, %34 %336 = fadd float %334, %335 %337 = fadd float %336, %38 %338 = call float @llvm.minnum.f32(float %337, float 0x3F50624DE0000000) %339 = call float @llvm.maxnum.f32(float %331, float %338) %340 = fmul float %339, 2.000000e+00 %341 = fsub float %340, %277 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %99, float %113, float %103, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp20.1, float %temp21.0, float %temp22.0, float %temp23.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %265, float %271, float %300, float %277) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %257, float %258, float %259, float %291) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %311, float %313, float %315, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %265, float %316, float %341, float %277) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[36:39], s[2:3], 0x10 ; C0920310 s_load_dwordx4 s[32:35], s[2:3], 0x14 ; C0900314 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v4, s11, v3 ; 4A08060B s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[32:35], 0x16 ; C20C2116 s_buffer_load_dword s41, s[32:35], 0x17 ; C214A117 s_buffer_load_dword s29, s[32:35], 0x18 ; C20EA118 buffer_load_format_xyzw v[11:14], v0, s[0:3], 0 idxen ; E00C2000 80000B00 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[13:16], v4, s[12:15], 0 idxen ; E00C2000 80030D04 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[15:18], v4, s[16:19], 0 idxen ; E00C2000 80040F04 buffer_load_format_xyzw v[0:3], v4, s[20:23], 0 idxen ; E00C2000 80050004 buffer_load_format_xyzw v[7:10], v4, s[8:11], 0 idxen ; E00C2000 80020704 s_buffer_load_dword s42, s[32:35], 0x19 ; C2152119 s_buffer_load_dword s43, s[32:35], 0x1a ; C215A11A s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v3, 0.5, v12, 0.5 ; D2820003 03C218F0 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v4, v13, s24 ; D2820009 00621B04 v_mac_f32_e32 v9, v14, v3 ; 3E12070E v_mov_b32_e32 v3, 0x3d558919 ; 7E0602FF 3D558919 v_mov_b32_e32 v4, 0x3f72a76f ; 7E0802FF 3F72A76F v_mad_f32 v5, v4, v15, v3 ; D2820005 040E1F04 v_log_f32_e32 v5, v5 ; 7E0A4F05 v_mad_f32 v6, v4, v16, v3 ; D2820006 040E2104 v_mac_f32_e32 v3, v4, v17 ; 3E062304 v_mov_b32_e32 v13, 0x4019999a ; 7E1A02FF 4019999A v_mul_legacy_f32_e32 v4, v13, v5 ; 0E080B0D v_exp_f32_e32 v4, v4 ; 7E084B04 v_mov_b32_e32 v5, 0x3d25aee6 ; 7E0A02FF 3D25AEE6 v_cmp_gt_f32_e32 vcc, v15, v5 ; 7C080B0F v_mov_b32_e32 v14, 0x3d9e8391 ; 7E1C02FF 3D9E8391 v_mul_f32_e32 v19, v14, v15 ; 10261F0E v_cndmask_b32_e32 v19, v19, v4 ; 00260913 s_buffer_load_dword s17, s[36:39], 0x4c ; C208A54C s_buffer_load_dword s18, s[36:39], 0x4d ; C209254D s_buffer_load_dword s19, s[36:39], 0x4e ; C209A54E s_buffer_load_dword s8, s[36:39], 0x50 ; C2042550 s_buffer_load_dword s9, s[36:39], 0x51 ; C204A551 s_buffer_load_dword s12, s[36:39], 0x52 ; C2062552 s_buffer_load_dword s40, s[36:39], 0x54 ; C2142554 s_buffer_load_dword s30, s[36:39], 0x55 ; C20F2555 s_buffer_load_dword s31, s[36:39], 0x56 ; C20FA556 s_buffer_load_dword s20, s[36:39], 0x5b ; C20A255B s_buffer_load_dword s44, s[36:39], 0x5d ; C216255D s_buffer_load_dword s22, s[36:39], 0x80 ; C20B2580 s_buffer_load_dword s21, s[36:39], 0x81 ; C20AA581 s_buffer_load_dword s11, s[36:39], 0x82 ; C205A582 s_buffer_load_dword s10, s[36:39], 0x83 ; C2052583 s_buffer_load_dword s25, s[36:39], 0x84 ; C20CA584 s_buffer_load_dword s23, s[36:39], 0x85 ; C20BA585 s_buffer_load_dword s15, s[36:39], 0x86 ; C207A586 s_buffer_load_dword s14, s[36:39], 0x87 ; C2072587 s_buffer_load_dword s26, s[36:39], 0x88 ; C20D2588 s_buffer_load_dword s24, s[36:39], 0x89 ; C20C2589 s_buffer_load_dword s2, s[36:39], 0x8a ; C201258A s_buffer_load_dword s13, s[36:39], 0x8b ; C206A58B s_buffer_load_dword s28, s[36:39], 0x8c ; C20E258C s_buffer_load_dword s27, s[36:39], 0x8d ; C20DA58D s_buffer_load_dword s3, s[36:39], 0x8e ; C201A58E s_buffer_load_dword s16, s[36:39], 0x8f ; C208258F v_log_f32_e32 v4, v6 ; 7E084F06 s_buffer_load_dword s36, s[32:35], 0x1c ; C212211C v_cmp_gt_f32_e32 vcc, v16, v5 ; 7C080B10 v_cmp_gt_f32_e64 s[0:1], v17, v5 ; D0080000 00020B11 v_mul_legacy_f32_e32 v4, v13, v4 ; 0E08090D v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v5, v14, v16 ; 100A210E v_cndmask_b32_e32 v20, v5, v4 ; 00280905 v_mul_f32_e32 v14, v14, v17 ; 101C230E v_log_f32_e32 v21, v3 ; 7E2A4F03 s_buffer_load_dword s37, s[32:35], 0x1b ; C212A11B s_buffer_load_dword s38, s[32:35], 0x14 ; C2132114 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v6, 1.0, s36 ; D2080006 000048F2 v_mul_f32_e32 v3, v15, v6 ; 10060D0F v_mul_f32_e32 v4, v16, v6 ; 10080D10 v_mul_f32_e32 v5, v17, v6 ; 100A0D11 v_mul_f32_e32 v6, v18, v6 ; 100C0D12 v_mac_f32_e32 v6, s36, v18 ; 3E0C2424 v_mul_legacy_f32_e32 v13, v13, v21 ; 0E1A2B0D v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cndmask_b32_e64 v13, v14, v13, s[0:1] ; D200000D 00021B0E v_mul_f32_e32 v9, s41, v9 ; 10121229 v_mov_b32_e32 v14, s42 ; 7E1C022A v_mad_f32 v15, -0.5, v12, 0.5 ; D282000F 03C218F1 v_mad_f32 v14, s37, v15, v14 ; D282000E 043A1E25 v_mov_b32_e32 v15, s42 ; 7E1E022A v_mac_f32_e32 v15, s37, v9 ; 3E1E1225 v_sub_f32_e64 v9, 1.0, s38 ; D2080009 00004CF2 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mac_f32_e32 v9, s38, v15 ; 3E121E26 v_mac_f32_e32 v3, s36, v19 ; 3E062624 v_mac_f32_e32 v4, s36, v20 ; 3E082824 v_mac_f32_e32 v5, s36, v13 ; 3E0A1A24 v_mov_b32_e32 v13, s30 ; 7E1A021E v_mul_f32_e32 v13, s12, v13 ; 101A1A0C v_mov_b32_e32 v14, s31 ; 7E1C021F v_mad_f32 v17, v14, s9, -v13 ; D2820011 8434130E v_mov_b32_e32 v13, s31 ; 7E1A021F v_mul_f32_e32 v13, s8, v13 ; 101A1A08 v_mov_b32_e32 v14, s40 ; 7E1C0228 v_mad_f32 v18, v14, s12, -v13 ; D2820012 8434190E v_mov_b32_e32 v13, s40 ; 7E1A0228 v_mul_f32_e32 v13, s9, v13 ; 101A1A09 v_mov_b32_e32 v14, s30 ; 7E1C021E v_mad_f32 v20, v14, s8, -v13 ; D2820014 8434110E v_mov_b32_e32 v13, 0x3e22f983 ; 7E1A02FF 3E22F983 v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v8, v13, v8 ; 1010110D v_fract_f32_e32 v7, v7 ; 7E0E4107 v_fract_f32_e32 v19, v8 ; 7E264108 v_cos_f32_e32 v22, v7 ; 7E2C6D07 v_sin_f32_e32 v21, v7 ; 7E2A6B07 v_mul_f32_e32 v7, v17, v21 ; 100E2B11 v_mad_f32 v8, v22, s40, -v7 ; D2820008 841C5116 v_mul_f32_e32 v7, v18, v21 ; 100E2B12 v_mad_f32 v13, v22, s30, -v7 ; D282000D 841C3D16 v_mul_f32_e32 v7, v20, v21 ; 100E2B14 v_mad_f32 v14, v22, s31, -v7 ; D282000E 841C3F16 s_buffer_load_dword s0, s[32:35], 0xc ; C200210C s_buffer_load_dword s37, s[32:35], 0xd ; C212A10D s_buffer_load_dword s36, s[32:35], 0xe ; C212210E s_buffer_load_dword s1, s[32:35], 0x12 ; C200A112 v_cos_f32_e32 v23, v19 ; 7E2E6D13 v_sub_f32_e32 v7, 1.0, v23 ; 080E2EF2 v_mul_f32_e32 v15, v8, v8 ; 101E1108 v_mad_f32 v24, v15, v7, v23 ; D2820018 045E0F0F v_mul_f32_e32 v15, v13, v13 ; 101E1B0D v_mad_f32 v16, v15, v7, v23 ; D2820010 045E0F0F v_mul_f32_e32 v15, v14, v14 ; 101E1D0E v_mad_f32 v15, v15, v7, v23 ; D282000F 045E0F0F v_sin_f32_e32 v19, v19 ; 7E266B13 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[38:39], 0, s1 ; D0020026 00000280 v_mov_b32_e32 v26, s44 ; 7E34022C v_mov_b32_e32 v7, s0 ; 7E0E0200 v_mov_b32_e32 v25, s43 ; 7E32022B s_and_saveexec_b64 s[38:39], s[38:39] ; BEA62426 s_xor_b64 s[38:39], exec, s[38:39] ; 89A6267E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v27, s17, v0 ; 08360011 v_sub_f32_e32 v28, s18, v1 ; 08380212 v_sub_f32_e32 v29, s19, v2 ; 083A0413 v_mul_f32_e32 v27, v27, v27 ; 1036371B s_buffer_load_dword s0, s[32:35], 0x1 ; C2002101 s_buffer_load_dword s1, s[32:35], 0x2 ; C200A102 s_buffer_load_dword s41, s[32:35], 0x0 ; C214A100 s_buffer_load_dword s42, s[32:35], 0x3 ; C2152103 v_mac_f32_e32 v27, v28, v28 ; 3E36391C v_mac_f32_e32 v27, v29, v29 ; 3E363B1D v_rsq_f32_e32 v28, v27 ; 7E385D1B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v29, s0 ; 7E3A0200 v_sub_f32_e32 v29, s1, v29 ; 083A3A01 v_rcp_f32_e32 v29, v29 ; 7E3A551D v_sqrt_f32_e32 v27, v27 ; 7E36671B v_mul_f32_e32 v28, v29, v28 ; 1038391D v_mul_f32_e32 v29, s0, v27 ; 103A3600 v_mad_f32 v30, v27, s0, -v10 ; D282001E 8428011B v_mad_f32 v28, v30, v28, 1.0 ; D282001C 03CA391E v_cmp_lt_f32_e32 vcc, v29, v10 ; 7C02151D v_mul_f32_e32 v29, s1, v27 ; 103A3601 v_mul_f32_e32 v30, s41, v27 ; 103C3629 v_max_f32_e32 v30, v30, v10 ; 203C151E v_cmp_lt_f32_e64 s[0:1], v29, v10 ; D0020000 0002151D v_mul_f32_e32 v10, v3, v28 ; 10143903 v_mul_f32_e32 v29, v4, v28 ; 103A3904 v_mul_f32_e32 v31, v5, v28 ; 103E3905 v_mul_f32_e32 v28, v6, v28 ; 10383906 v_cndmask_b32_e64 v10, v10, 0, s[0:1] ; D200000A 0001010A v_cndmask_b32_e64 v29, v29, 0, s[0:1] ; D200001D 0001011D v_cndmask_b32_e64 v31, v31, 0, s[0:1] ; D200001F 0001011F v_cndmask_b32_e64 v28, v28, 0, s[0:1] ; D200001C 0001011C v_cndmask_b32_e32 v3, v3, v10 ; 00061503 v_cndmask_b32_e32 v4, v4, v29 ; 00083B04 v_cndmask_b32_e32 v5, v5, v31 ; 000A3F05 v_cndmask_b32_e32 v6, v6, v28 ; 000C3906 v_mul_f32_e32 v10, s42, v27 ; 1014362A v_min_f32_e32 v10, v10, v30 ; 1E143D0A s_or_b64 exec, exec, s[38:39] ; 88FE267E v_mul_f32_e32 v17, v17, v22 ; 10222D11 v_mac_f32_e32 v17, s40, v21 ; 3E222A28 v_mul_f32_e32 v18, v18, v22 ; 10242D12 v_mul_f32_e32 v20, v20, v22 ; 10282D14 v_mac_f32_e32 v18, s30, v21 ; 3E242A1E v_mac_f32_e32 v20, s31, v21 ; 3E282A1F v_sub_f32_e32 v21, 1.0, v23 ; 082A2EF2 v_mul_f32_e32 v22, v24, v17 ; 102C2318 v_subrev_f32_e32 v23, s20, v26 ; 0A2E3414 v_rcp_f32_e32 v23, v23 ; 7E2E5517 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 v_mad_f32 v24, 0.5, v11, 0.5 ; D2820018 03C216F0 v_mad_f32 v24, v25, v24, s29 ; D2820018 00763119 v_mov_b32_e32 v25, 1.0 ; 7E3202F2 v_mul_f32_e32 v26, v13, v8 ; 1034110D v_mul_f32_e32 v27, v14, v19 ; 1036270E v_mul_f32_e32 v28, v14, v8 ; 1038110E v_mul_f32_e32 v29, v13, v19 ; 103A270D v_mul_f32_e32 v19, v8, v19 ; 10262708 v_mul_f32_e32 v30, v14, v13 ; 103C1B0E v_mad_f32 v31, v21, v26, -v27 ; D282001F 846E3515 v_mac_f32_e32 v27, v26, v21 ; 3E362B1A v_mac_f32_e32 v22, v31, v18 ; 3E2C251F v_mul_f32_e32 v26, v27, v17 ; 1034231B v_mac_f32_e32 v26, v16, v18 ; 3E342510 v_mad_f32 v16, v28, v21, v29 ; D2820010 04762B1C v_mad_f32 v27, v21, v28, -v29 ; D282001B 84763915 v_mac_f32_e32 v22, v16, v20 ; 3E2C2910 v_mul_f32_e32 v16, v27, v17 ; 1020231B v_mad_f32 v17, v30, v21, -v19 ; D2820011 844E2B1E v_mac_f32_e32 v19, v21, v30 ; 3E263D15 v_mac_f32_e32 v16, v19, v18 ; 3E202513 v_mac_f32_e32 v26, v17, v20 ; 3E342911 v_mac_f32_e32 v16, v15, v20 ; 3E20290F v_add_f32_e32 v11, s37, v11 ; 06161625 v_add_f32_e32 v12, s36, v12 ; 06181824 v_mov_b32_e32 v15, 0x3b808083 ; 7E1E02FF 3B808083 v_cmp_gt_f32_e32 vcc, v15, v6 ; 7C080D0F v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_mul_f32_e32 v8, v8, v12 ; 10101908 v_mul_f32_e32 v13, v13, v12 ; 101A190D v_mul_f32_e32 v12, v14, v12 ; 1018190E v_mac_f32_e32 v8, v11, v22 ; 3E102D0B v_mac_f32_e32 v13, v11, v26 ; 3E1A350B v_mov_b32_e32 v17, 0 ; 7E220280 v_mac_f32_e32 v12, v11, v16 ; 3E18210B v_mov_b32_e32 v18, v17 ; 7E240311 v_mad_f32 v0, v10, v8, v0 ; D2820000 0402110A v_mad_f32 v1, v10, v13, v1 ; D2820001 04061B0A v_mac_f32_e32 v2, v10, v12 ; 3E04190A v_mov_b32_e32 v19, v17 ; 7E260311 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[40:47], s[32:35] ; F0900700 010A0A11 v_mov_b32_e32 v8, 0x33d6bf95 ; 7E1002FF 33D6BF95 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v8, v10 ; 3E001508 v_mac_f32_e32 v1, v8, v11 ; 3E021708 v_mac_f32_e32 v2, v8, v12 ; 3E041908 exp 15, 32, 0, 0, 0, v17, v17, v17, v17 ; F800020F 11111111 exp 15, 33, 0, 0, 0, v24, v9, v24, v9 ; F800021F 09180918 exp 15, 34, 0, 0, 0, v3, v4, v5, v6 ; F800022F 06050403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s25, v1 ; 10060219 v_mac_f32_e32 v3, s22, v0 ; 3E060016 v_mac_f32_e32 v3, s26, v2 ; 3E06041A v_add_f32_e32 v3, s28, v3 ; 0606061C v_mul_f32_e32 v4, s23, v1 ; 10080217 v_mac_f32_e32 v4, s21, v0 ; 3E080015 v_mac_f32_e32 v4, s24, v2 ; 3E080418 v_add_f32_e32 v4, s27, v4 ; 0608081B v_subrev_f32_e32 v5, s17, v0 ; 0A0A0011 v_subrev_f32_e32 v6, s18, v1 ; 0A0C0212 v_subrev_f32_e32 v8, s19, v2 ; 0A100413 v_mul_f32_e32 v9, s8, v5 ; 10120A08 v_mac_f32_e32 v9, s9, v6 ; 3E120C09 v_mac_f32_e32 v9, s12, v8 ; 3E12100C v_subrev_f32_e32 v10, s20, v9 ; 0A141214 v_mul_f32_e32 v10, v23, v10 ; 10141517 v_mul_f32_e32 v11, s14, v1 ; 1016020E v_mac_f32_e32 v11, s10, v0 ; 3E16000A v_mac_f32_e32 v11, s13, v2 ; 3E16040D v_add_f32_e32 v11, s16, v11 ; 06161610 exp 15, 35, 0, 0, 0, v3, v4, v10, v11 ; F800023F 0B0A0403 exp 15, 36, 0, 0, 0, v0, v1, v2, v9 ; F800024F 09020100 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v9, s12, v7, v2 ; D2820009 040A0E0C v_mul_f32_e32 v10, s15, v1 ; 1014020F v_mac_f32_e32 v10, s11, v0 ; 3E14000B v_mac_f32_e32 v10, s2, v2 ; 3E140402 v_mad_f32 v0, s8, v7, v0 ; D2820000 04020E08 v_mad_f32 v1, s9, v7, v1 ; D2820001 04060E09 v_mul_f32_e32 v2, v5, v5 ; 10040B05 v_mac_f32_e32 v2, v6, v6 ; 3E040D06 v_mac_f32_e32 v2, v8, v8 ; 3E041108 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v7, s15, v1 ; 100E020F v_mac_f32_e32 v7, s11, v0 ; 3E0E000B v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s10, v0 ; 3E02000A v_mul_f32_e32 v0, v2, v5 ; 10000B02 v_mul_f32_e32 v5, v2, v6 ; 100A0D02 v_mul_f32_e32 v2, v2, v8 ; 10041102 v_mac_f32_e32 v1, s13, v9 ; 3E02120D v_add_f32_e32 v1, s16, v1 ; 06020210 v_mov_b32_e32 v6, 0x80000000 ; 7E0C02FF 80000000 v_xor_b32_e32 v0, v0, v6 ; 3A000D00 v_xor_b32_e32 v5, v5, v6 ; 3A0A0D05 v_xor_b32_e32 v2, v2, v6 ; 3A040D02 v_rcp_f32_e32 v1, v1 ; 7E025501 v_xor_b32_e32 v4, v4, v6 ; 3A080D04 v_mac_f32_e32 v7, s2, v9 ; 3E0E1202 v_add_f32_e32 v6, s3, v7 ; 060C0E03 v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_add_f32_e32 v6, s3, v10 ; 060C1403 v_min_f32_e32 v6, 0x3a83126f, v6 ; 1E0C0CFF 3A83126F v_max_f32_e32 v1, v6, v1 ; 20020306 v_mad_f32 v1, 2.0, v1, -v11 ; D2820001 842E02F4 exp 15, 37, 0, 0, 0, v0, v5, v2, v17 ; F800025F 11020500 exp 15, 38, 0, 0, 0, v25, v17, v17, v17 ; F800026F 11111119 exp 15, 12, 0, 0, 0, v3, v4, v1, v11 ; F80000CF 0B010403 exp 15, 13, 0, 1, 0, v17, v17, v17, v17 ; F80008DF 11111111 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 32 Code Size: 1512 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..37] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL CONST[7][0..1] DCL TEMP[0..2], LOCAL IMM[0] UINT32 {1, 160, 176, 192} IMM[1] UINT32 {208, 164, 180, 196} IMM[2] UINT32 {212, 168, 184, 200} IMM[3] UINT32 {216, 172, 188, 204} IMM[4] UINT32 {220, 144, 32, 16} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], CONST[2][10], TEMP[0], CONST[2][9] 3: MOV TEMP[1].xyz, TEMP[0].xyzx 4: MOV_SAT TEMP[0].x, TEMP[0].wwww 5: MOV TEMP[1].w, TEMP[0].xxxx 6: MUL TEMP[0], TEMP[1], IN[1] 7: MUL TEMP[1].xyz, CONST[2][2].zzzz, TEMP[0].xyzz 8: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[2][1].zzzz 9: MOV_SAT TEMP[2].xyz, TEMP[1].xyzz 10: LRP TEMP[1].xyz, CONST[2][2].yyyy, TEMP[2].xyzz, TEMP[1].xyzz 11: MUL TEMP[1].xyz, TEMP[0].wwww, TEMP[1].xyzz 12: MOV TEMP[2].x, TEMP[1].xxxx 13: MOV TEMP[2].y, TEMP[1].yyyy 14: MOV TEMP[2].z, TEMP[1].zzzz 15: MUL TEMP[0].x, CONST[2][1].wwww, TEMP[0].wwww 16: MOV TEMP[2].w, TEMP[0].xxxx 17: MOV OUT[0], TEMP[2] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %46 = bitcast float %40 to i32 %47 = bitcast float %41 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %37, <16 x i8> %39, i32 2) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = fmul float %32, %51 %56 = fadd float %55, %28 %57 = fmul float %33, %52 %58 = fadd float %57, %29 %59 = fmul float %34, %53 %60 = fadd float %59, %30 %61 = fmul float %35, %54 %62 = fadd float %61, %31 %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00) %64 = fmul float %56, %42 %65 = fmul float %58, %43 %66 = fmul float %60, %44 %67 = fmul float %63, %45 %68 = fmul float %27, %64 %69 = fmul float %27, %65 %70 = fmul float %27, %66 %71 = fmul float %68, %24 %72 = fmul float %69, %24 %73 = fmul float %70, %24 %74 = call float @llvm.AMDIL.clamp.(float %71, float 0.000000e+00, float 1.000000e+00) %75 = call float @llvm.AMDIL.clamp.(float %72, float 0.000000e+00, float 1.000000e+00) %76 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00) %77 = call float @llvm.AMDGPU.lrp(float %26, float %74, float %71) %78 = call float @llvm.AMDGPU.lrp(float %26, float %75, float %72) %79 = call float @llvm.AMDGPU.lrp(float %26, float %76, float %73) %80 = fmul float %67, %77 %81 = fmul float %67, %78 %82 = fmul float %67, %79 %83 = fmul float %25, %67 %84 = call i32 @llvm.SI.packf16(float %80, float %81) %85 = bitcast i32 %84 to float %86 = call i32 @llvm.SI.packf16(float %82, float %83) %87 = bitcast i32 %86 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %85, float %87, float %85, float %87) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_buffer_load_dword s9, s[0:3], 0x28 ; C2048128 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 s_buffer_load_dword s10, s[0:3], 0x25 ; C2050125 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_buffer_load_dword s5, s[0:3], 0x27 ; C2028127 s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129 s_buffer_load_dword s7, s[0:3], 0x2a ; C203812A v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 s_buffer_load_dword s11, s[0:3], 0x2b ; C205812B v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640702 v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v1, s9, v7 ; 3E020E09 v_mov_b32_e32 v2, s10 ; 7E04020A s_buffer_load_dword s8, s[0:3], 0xa ; C204010A v_mac_f32_e32 v2, s6, v8 ; 3E041006 v_mov_b32_e32 v3, s4 ; 7E060204 v_mac_f32_e32 v3, s7, v9 ; 3E061207 v_mov_b32_e32 v7, s5 ; 7E0E0205 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 v_mac_f32_e32 v7, s11, v10 ; 3E0E140B s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_buffer_load_dword s0, s[0:3], 0x9 ; C2000109 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v3, v6, v3 ; 10060706 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_add_f32_e64 v4, 0, v1 clamp ; D2060804 00020280 v_sub_f32_e64 v5, 1.0, s0 ; D2080005 000000F2 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mac_f32_e32 v1, s0, v4 ; 3E020800 v_add_f32_e64 v4, 0, v2 clamp ; D2060804 00020480 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mac_f32_e32 v2, s0, v4 ; 3E040800 v_mul_f32_e32 v4, v3, v5 ; 10080B03 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mac_f32_e32 v4, s0, v3 ; 3E080600 v_add_f32_e64 v3, 0, v7 clamp ; D2060803 00020E80 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v3, v4, v0 ; 10060104 v_mul_f32_e32 v0, s5, v0 ; 10000005 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 312 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyxy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], IN[1] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = extractelement <4 x float> %20, i32 2 %24 = extractelement <4 x float> %20, i32 3 %25 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float %23, float %24) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %25, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 v_xor_b32_e32 v0, 0x80000000, v3 ; 3A0006FF 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v6, v7 ; F800021F 07060504 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 0, 0, v2, v0, v1, v3 ; F80000CF 03010002 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 92 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300 v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.0000, -3.0000, -1.1824, 1.1824} IMM[1] UINT32 {0, 0, 0, 0} IMM[2] FLT32 { 3.0000, 0.0044, 0.2960, 0.3990} IMM[3] FLT32 { 0.8000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].y, IMM[0].xxxx 1: MUL TEMP[0].x, CONST[1][0].xxxx, IMM[0].yyyy 2: MOV TEMP[1].y, IMM[0].xxxx 3: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[0].zzzz 4: MOV TEMP[2].y, IMM[0].xxxx 5: MUL TEMP[2].x, CONST[1][0].xxxx, IMM[0].wwww 6: MOV TEMP[3].y, IMM[0].xxxx 7: MUL TEMP[3].x, CONST[1][0].xxxx, IMM[2].xxxx 8: MOV TEMP[4].xy, IN[0].xyyy 9: TEX TEMP[4], TEMP[4], SAMP[0], 2D 10: ADD TEMP[3].xy, TEMP[3].xyyy, IN[0].xyyy 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: TEX TEMP[3], TEMP[3], SAMP[0], 2D 13: ADD TEMP[2].xy, TEMP[2].xyyy, IN[0].xyyy 14: MOV TEMP[2].xy, TEMP[2].xyyy 15: TEX TEMP[2], TEMP[2], SAMP[0], 2D 16: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 17: MOV TEMP[0].xy, TEMP[0].xyyy 18: TEX TEMP[0], TEMP[0], SAMP[0], 2D 19: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 20: MOV TEMP[1].xy, TEMP[1].xyyy 21: TEX TEMP[1], TEMP[1], SAMP[0], 2D 22: MUL TEMP[1], IMM[2].zzzz, TEMP[1] 23: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 24: MAD TEMP[0], IMM[2].wwww, TEMP[4], TEMP[0] 25: MAD TEMP[0], IMM[2].zzzz, TEMP[2], TEMP[0] 26: MAD TEMP[0], IMM[2].yyyy, TEMP[3], TEMP[0] 27: MOV TEMP[1].w, TEMP[0].wwww 28: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[0].xyzz 29: LRP TEMP[0].xyz, IMM[3].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 30: MIN TEMP[1].xyz, TEMP[4].xyzz, TEMP[0].xyzz 31: MOV OUT[0], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = fmul float %24, -3.000000e+00 %32 = fmul float %24, 0xBFF2EB3BC0000000 %33 = fmul float %24, 0x3FF2EB3BC0000000 %34 = fmul float %24, 3.000000e+00 %35 = bitcast float %29 to i32 %36 = bitcast float %30 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = bitcast <8 x i32> %26 to <32 x i8> %40 = bitcast <4 x i32> %28 to <16 x i8> %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %39, <16 x i8> %40, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fadd float %34, %29 %47 = fadd float %30, 0.000000e+00 %48 = bitcast float %46 to i32 %49 = bitcast float %47 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = bitcast <8 x i32> %26 to <32 x i8> %53 = bitcast <4 x i32> %28 to <16 x i8> %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %52, <16 x i8> %53, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fadd float %33, %29 %60 = fadd float %30, 0.000000e+00 %61 = bitcast float %59 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fadd float %31, %29 %73 = fadd float %30, 0.000000e+00 %74 = bitcast float %72 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %26 to <32 x i8> %79 = bitcast <4 x i32> %28 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %32, %29 %86 = fadd float %30, 0.000000e+00 %87 = bitcast float %85 to i32 %88 = bitcast float %86 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = bitcast <8 x i32> %26 to <32 x i8> %92 = bitcast <4 x i32> %28 to <16 x i8> %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = extractelement <4 x float> %93, i32 3 %98 = fmul float %94, 0x3FD2F25A20000000 %99 = fmul float %95, 0x3FD2F25A20000000 %100 = fmul float %96, 0x3FD2F25A20000000 %101 = fmul float %97, 0x3FD2F25A20000000 %102 = fmul float %81, 0x3F72285660000000 %103 = fadd float %102, %98 %104 = fmul float %82, 0x3F72285660000000 %105 = fadd float %104, %99 %106 = fmul float %83, 0x3F72285660000000 %107 = fadd float %106, %100 %108 = fmul float %84, 0x3F72285660000000 %109 = fadd float %108, %101 %110 = fmul float %42, 0x3FD98A0900000000 %111 = fadd float %110, %103 %112 = fmul float %43, 0x3FD98A0900000000 %113 = fadd float %112, %105 %114 = fmul float %44, 0x3FD98A0900000000 %115 = fadd float %114, %107 %116 = fmul float %45, 0x3FD98A0900000000 %117 = fadd float %116, %109 %118 = fmul float %68, 0x3FD2F25A20000000 %119 = fadd float %118, %111 %120 = fmul float %69, 0x3FD2F25A20000000 %121 = fadd float %120, %113 %122 = fmul float %70, 0x3FD2F25A20000000 %123 = fadd float %122, %115 %124 = fmul float %71, 0x3FD2F25A20000000 %125 = fadd float %124, %117 %126 = fmul float %55, 0x3F72285660000000 %127 = fadd float %126, %119 %128 = fmul float %56, 0x3F72285660000000 %129 = fadd float %128, %121 %130 = fmul float %57, 0x3F72285660000000 %131 = fadd float %130, %123 %132 = fmul float %58, 0x3F72285660000000 %133 = fadd float %132, %125 %134 = fmul float %127, %127 %135 = fmul float %129, %129 %136 = fmul float %131, %131 %137 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %134, float %127) %138 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %135, float %129) %139 = call float @llvm.AMDGPU.lrp(float 0x3FE99999A0000000, float %136, float %131) %140 = call float @llvm.minnum.f32(float %42, float %137) %141 = call float @llvm.minnum.f32(float %43, float %138) %142 = call float @llvm.minnum.f32(float %44, float %139) %143 = call i32 @llvm.SI.packf16(float %140, float %141) %144 = bitcast i32 %143 to float %145 = call i32 @llvm.SI.packf16(float %142, float %133) %146 = bitcast i32 %145 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %144, float %146, float %144, float %146) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[8:11], 0x0 ; C2060900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v1, 0x3f9759de ; 7E0202FF 3F9759DE v_mov_b32_e32 v4, 0xc0400000 ; 7E0802FF C0400000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, s12, v0, v2 ; D2820005 040A000C v_mad_f32 v0, s12, v1, v2 ; D2820000 040A020C v_mad_f32 v7, s12, v4, v2 ; D2820007 040A080C v_mov_b32_e32 v6, v3 ; 7E0C0303 v_mov_b32_e32 v1, v3 ; 7E020303 v_mov_b32_e32 v8, v3 ; 7E100303 v_mov_b32_e32 v4, 0xbf9759de ; 7E0802FF BF9759DE image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010902 v_mac_f32_e32 v2, s12, v4 ; 3E04080C image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[4:11], s[0:3] ; F0800F00 00010D05 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0800F00 00011100 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[4:11], s[0:3] ; F0800F00 00010407 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 v_mov_b32_e32 v8, 0x3e9792d1 ; 7E1002FF 3E9792D1 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mov_b32_e32 v21, 0x3b9142b3 ; 7E2A02FF 3B9142B3 v_mac_f32_e32 v0, v21, v4 ; 3E000915 v_mac_f32_e32 v1, v21, v5 ; 3E020B15 v_mac_f32_e32 v2, v21, v6 ; 3E040D15 v_mac_f32_e32 v3, v21, v7 ; 3E060F15 v_mov_b32_e32 v4, 0x3ecc5048 ; 7E0802FF 3ECC5048 v_mac_f32_e32 v0, v4, v9 ; 3E001304 v_mac_f32_e32 v1, v4, v10 ; 3E021504 v_mac_f32_e32 v2, v4, v11 ; 3E041704 v_mac_f32_e32 v3, v4, v12 ; 3E061904 v_mac_f32_e32 v0, v8, v17 ; 3E002308 v_mac_f32_e32 v1, v8, v18 ; 3E022508 v_mac_f32_e32 v2, v8, v19 ; 3E042708 v_mac_f32_e32 v3, v8, v20 ; 3E062908 v_mac_f32_e32 v0, v21, v13 ; 3E001B15 v_mac_f32_e32 v1, v21, v14 ; 3E021D15 v_mac_f32_e32 v2, v21, v15 ; 3E041F15 v_mac_f32_e32 v3, v21, v16 ; 3E062115 v_mul_f32_e32 v4, v0, v0 ; 10080100 v_mov_b32_e32 v5, 0x3e4ccccc ; 7E0A02FF 3E4CCCCC v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mov_b32_e32 v6, 0x3f4ccccd ; 7E0C02FF 3F4CCCCD v_mac_f32_e32 v0, v6, v4 ; 3E000906 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mac_f32_e32 v1, v6, v4 ; 3E020906 v_mul_f32_e32 v4, v5, v2 ; 10080505 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mac_f32_e32 v4, v6, v2 ; 3E080506 v_min_f32_e32 v0, v0, v9 ; 1E001300 v_min_f32_e32 v1, v1, v10 ; 1E021501 v_min_f32_e32 v2, v4, v11 ; 1E041704 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 352 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.0000, -3.0000, -1.1824, 1.1824} IMM[1] UINT32 {0, 0, 0, 0} IMM[2] FLT32 { 3.0000, 0.0044, 0.2960, 0.3990} 0: MOV TEMP[0].x, IMM[0].xxxx 1: MUL TEMP[1].x, CONST[1][0].yyyy, IMM[0].yyyy 2: MOV TEMP[0].y, TEMP[1].xxxx 3: MOV TEMP[1].x, IMM[0].xxxx 4: MUL TEMP[2].x, CONST[1][0].yyyy, IMM[0].zzzz 5: MOV TEMP[1].y, TEMP[2].xxxx 6: MOV TEMP[2].x, IMM[0].xxxx 7: MUL TEMP[3].x, CONST[1][0].yyyy, IMM[0].wwww 8: MOV TEMP[2].y, TEMP[3].xxxx 9: MOV TEMP[3].x, IMM[0].xxxx 10: MUL TEMP[4].x, CONST[1][0].yyyy, IMM[2].xxxx 11: MOV TEMP[3].y, TEMP[4].xxxx 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4], TEMP[4], SAMP[0], 2D 14: ADD TEMP[3].xy, TEMP[3].xyyy, IN[0].xyyy 15: MOV TEMP[3].xy, TEMP[3].xyyy 16: TEX TEMP[3], TEMP[3], SAMP[0], 2D 17: ADD TEMP[2].xy, TEMP[2].xyyy, IN[0].xyyy 18: MOV TEMP[2].xy, TEMP[2].xyyy 19: TEX TEMP[2], TEMP[2], SAMP[0], 2D 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 21: MOV TEMP[0].xy, TEMP[0].xyyy 22: TEX TEMP[0], TEMP[0], SAMP[0], 2D 23: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 24: MOV TEMP[1].xy, TEMP[1].xyyy 25: TEX TEMP[1], TEMP[1], SAMP[0], 2D 26: MUL TEMP[1], IMM[2].zzzz, TEMP[1] 27: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 28: MAD TEMP[0], IMM[2].wwww, TEMP[4], TEMP[0] 29: MAD TEMP[0], IMM[2].zzzz, TEMP[2], TEMP[0] 30: MAD TEMP[0], IMM[2].yyyy, TEMP[3], TEMP[0] 31: MOV TEMP[1].w, TEMP[0].wwww 32: MIN TEMP[1].xyz, TEMP[4].xyzz, TEMP[0].xyzz 33: MOV OUT[0], TEMP[1] 34: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = fmul float %24, -3.000000e+00 %32 = fmul float %24, 0xBFF2EB3BC0000000 %33 = fmul float %24, 0x3FF2EB3BC0000000 %34 = fmul float %24, 3.000000e+00 %35 = bitcast float %29 to i32 %36 = bitcast float %30 to i32 %37 = insertelement <2 x i32> undef, i32 %35, i32 0 %38 = insertelement <2 x i32> %37, i32 %36, i32 1 %39 = bitcast <8 x i32> %26 to <32 x i8> %40 = bitcast <4 x i32> %28 to <16 x i8> %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %39, <16 x i8> %40, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fadd float %29, 0.000000e+00 %47 = fadd float %34, %30 %48 = bitcast float %46 to i32 %49 = bitcast float %47 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = bitcast <8 x i32> %26 to <32 x i8> %53 = bitcast <4 x i32> %28 to <16 x i8> %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %52, <16 x i8> %53, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fadd float %29, 0.000000e+00 %60 = fadd float %33, %30 %61 = bitcast float %59 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fadd float %29, 0.000000e+00 %73 = fadd float %31, %30 %74 = bitcast float %72 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %26 to <32 x i8> %79 = bitcast <4 x i32> %28 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 1 %83 = extractelement <4 x float> %80, i32 2 %84 = extractelement <4 x float> %80, i32 3 %85 = fadd float %29, 0.000000e+00 %86 = fadd float %32, %30 %87 = bitcast float %85 to i32 %88 = bitcast float %86 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = bitcast <8 x i32> %26 to <32 x i8> %92 = bitcast <4 x i32> %28 to <16 x i8> %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = extractelement <4 x float> %93, i32 3 %98 = fmul float %94, 0x3FD2F25A20000000 %99 = fmul float %95, 0x3FD2F25A20000000 %100 = fmul float %96, 0x3FD2F25A20000000 %101 = fmul float %97, 0x3FD2F25A20000000 %102 = fmul float %81, 0x3F72285660000000 %103 = fadd float %102, %98 %104 = fmul float %82, 0x3F72285660000000 %105 = fadd float %104, %99 %106 = fmul float %83, 0x3F72285660000000 %107 = fadd float %106, %100 %108 = fmul float %84, 0x3F72285660000000 %109 = fadd float %108, %101 %110 = fmul float %42, 0x3FD98A0900000000 %111 = fadd float %110, %103 %112 = fmul float %43, 0x3FD98A0900000000 %113 = fadd float %112, %105 %114 = fmul float %44, 0x3FD98A0900000000 %115 = fadd float %114, %107 %116 = fmul float %45, 0x3FD98A0900000000 %117 = fadd float %116, %109 %118 = fmul float %68, 0x3FD2F25A20000000 %119 = fadd float %118, %111 %120 = fmul float %69, 0x3FD2F25A20000000 %121 = fadd float %120, %113 %122 = fmul float %70, 0x3FD2F25A20000000 %123 = fadd float %122, %115 %124 = fmul float %71, 0x3FD2F25A20000000 %125 = fadd float %124, %117 %126 = fmul float %55, 0x3F72285660000000 %127 = fadd float %126, %119 %128 = fmul float %56, 0x3F72285660000000 %129 = fadd float %128, %121 %130 = fmul float %57, 0x3F72285660000000 %131 = fadd float %130, %123 %132 = fmul float %58, 0x3F72285660000000 %133 = fadd float %132, %125 %134 = call float @llvm.minnum.f32(float %42, float %127) %135 = call float @llvm.minnum.f32(float %43, float %129) %136 = call float @llvm.minnum.f32(float %44, float %131) %137 = call i32 @llvm.SI.packf16(float %134, float %135) %138 = bitcast i32 %137 to float %139 = call i32 @llvm.SI.packf16(float %136, float %133) %140 = bitcast i32 %139 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %138, float %140, float %138, float %140) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000 v_mov_b32_e32 v4, v2 ; 7E080302 v_mov_b32_e32 v5, v3 ; 7E0A0303 v_mov_b32_e32 v1, 0x3f9759de ; 7E0202FF 3F9759DE v_mov_b32_e32 v6, v2 ; 7E0C0302 v_mov_b32_e32 v7, v3 ; 7E0E0303 v_mov_b32_e32 v5, 0xc0400000 ; 7E0A02FF C0400000 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[12:15], 0x1 ; C2060D01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, s12, v0, v3 ; D2820000 040E000C v_mad_f32 v1, s12, v1, v3 ; D2820001 040E020C v_mad_f32 v8, s12, v5, v3 ; D2820008 040E0A0C v_mov_b32_e32 v5, 0xbf9759de ; 7E0A02FF BF9759DE v_mov_b32_e32 v9, v2 ; 7E120302 v_mov_b32_e32 v10, v3 ; 7E140303 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010B02 v_mac_f32_e32 v3, s12, v5 ; 3E060A0C v_mov_b32_e32 v5, v0 ; 7E0A0300 v_mov_b32_e32 v7, v1 ; 7E0E0301 v_mov_b32_e32 v10, v8 ; 7E140308 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010F04 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010406 image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[0:3] ; F0800F00 00011309 image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 v_mov_b32_e32 v8, 0x3e9792d1 ; 7E1002FF 3E9792D1 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_mov_b32_e32 v9, 0x3b9142b3 ; 7E1202FF 3B9142B3 v_mac_f32_e32 v0, v9, v19 ; 3E002709 v_mac_f32_e32 v1, v9, v20 ; 3E022909 v_mac_f32_e32 v2, v9, v21 ; 3E042B09 v_mac_f32_e32 v3, v9, v22 ; 3E062D09 v_mov_b32_e32 v10, 0x3ecc5048 ; 7E1402FF 3ECC5048 v_mac_f32_e32 v0, v10, v11 ; 3E00170A v_mac_f32_e32 v1, v10, v12 ; 3E02190A v_mac_f32_e32 v2, v10, v13 ; 3E041B0A v_mac_f32_e32 v3, v10, v14 ; 3E061D0A v_mac_f32_e32 v0, v8, v4 ; 3E000908 v_mac_f32_e32 v1, v8, v5 ; 3E020B08 v_mac_f32_e32 v2, v8, v6 ; 3E040D08 v_mac_f32_e32 v3, v8, v7 ; 3E060F08 v_mac_f32_e32 v0, v9, v15 ; 3E001F09 v_mac_f32_e32 v1, v9, v16 ; 3E022109 v_mac_f32_e32 v2, v9, v17 ; 3E042309 v_mac_f32_e32 v3, v9, v18 ; 3E062509 v_min_f32_e32 v0, v0, v11 ; 1E001700 v_min_f32_e32 v1, v1, v12 ; 1E021901 v_min_f32_e32 v2, v2, v13 ; 1E041B02 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 324 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 3 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %23) %31 = bitcast i32 %30 to float %32 = call i32 @llvm.SI.packf16(float %24, float %25) %33 = bitcast i32 %32 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %31, float %33, float %31, float %33) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %22, float %23, float %24, float %25) %34 = call i32 @llvm.SI.packf16(float %22, float %23) %35 = bitcast i32 %34 to float %36 = call i32 @llvm.SI.packf16(float %24, float %25) %37 = bitcast i32 %36 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 3, i32 1, float %35, float %37, float %35, float %37) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202 v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302 v_cvt_pkrtz_f16_f32_e32 v4, v0, v1 ; 5E080300 v_cvt_pkrtz_f16_f32_e32 v5, v2, v3 ; 5E0A0702 exp 15, 1, 1, 0, 0, v4, v5, v4, v5 ; F800041F 05040504 exp 15, 2, 0, 0, 0, v0, v1, v2, v3 ; F800002F 03020100 exp 15, 3, 1, 0, 0, v4, v5, v4, v5 ; F800043F 05040504 exp 15, 0, 1, 1, 1, v4, v5, v4, v5 ; F8001C0F 05040504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 64 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..10], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 12, 28, 44} IMM[5] UINT32 {60, 24, 32, 16} IMM[6] UINT32 {48, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[5].x, TEMP[0].xxxx 90: MOV TEMP[5].y, TEMP[2].xxxx 91: MOV TEMP[5].z, TEMP[3].xxxx 92: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 93: MOV TEMP[5].w, TEMP[8].xxxx 94: MOV TEMP[8].x, TEMP[7].xxxx 95: MOV TEMP[8].y, TEMP[7].yyyy 96: MOV TEMP[8].z, TEMP[7].zzzz 97: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 98: RSQ TEMP[9].x, TEMP[9].xxxx 99: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 100: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 102: MUL TEMP[7].xyz, IMM[2].zzzz, TEMP[7].xyzz 103: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 104: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 105: MOV_SAT TEMP[4].x, TEMP[4].xxxx 106: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[4].x, TEMP[4].xxxx 108: MOV TEMP[8].w, TEMP[4].xxxx 109: MOV TEMP[4].w, IMM[0].wwww 110: MOV TEMP[4].x, TEMP[0].xxxx 111: MOV TEMP[4].y, TEMP[2].xxxx 112: MOV TEMP[4].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].wwww 114: MOV TEMP[0].y, CONST[4][1].wwww 115: MOV TEMP[0].z, CONST[4][2].wwww 116: MOV TEMP[0].w, CONST[4][3].wwww 117: DP4 TEMP[0].x, TEMP[4], TEMP[0] 118: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 119: MOV TEMP[3].xy, IN[3].xyyy 120: MOV TEMP[3].w, IMM[0].xxxx 121: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 122: MOV TEMP[7].x, CONST[4][0].xxxx 123: MOV TEMP[7].y, CONST[4][1].xxxx 124: MOV TEMP[7].z, CONST[4][2].xxxx 125: MOV TEMP[7].w, CONST[4][3].xxxx 126: DP4 TEMP[7].x, TEMP[4], TEMP[7] 127: MOV TEMP[9].x, CONST[4][0].yyyy 128: MOV TEMP[9].y, CONST[4][1].yyyy 129: MOV TEMP[9].z, CONST[4][2].yyyy 130: MOV TEMP[9].w, CONST[4][3].yyyy 131: DP4 TEMP[9].x, TEMP[4], TEMP[9] 132: MOV TEMP[7].y, -TEMP[9].xxxx 133: MOV TEMP[9].x, CONST[4][0].zzzz 134: MOV TEMP[9].y, CONST[4][1].zzzz 135: MOV TEMP[9].z, CONST[4][2].zzzz 136: MOV TEMP[9].w, CONST[4][3].zzzz 137: DP4 TEMP[4].x, TEMP[4], TEMP[9] 138: MAD TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz, -TEMP[0].xxxx 139: MOV TEMP[7].z, TEMP[4].xxxx 140: MOV TEMP[7].w, TEMP[0].xxxx 141: MOV OUT[1], TEMP[1] 142: MOV OUT[2].xy, TEMP[2].xyxx 143: MOV OUT[4], TEMP[8] 144: MOV OUT[6], IMM[0].xxxx 145: MOV OUT[7], TEMP[6] 146: MOV OUT[5], TEMP[3] 147: MOV OUT[0], TEMP[7] 148: MOV OUT[3], TEMP[5] 149: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.load.const(<16 x i8> %42, i32 0) %44 = call float @llvm.SI.load.const(<16 x i8> %42, i32 4) %45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 8) %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %10, %6 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <4 x i32> undef, i32 %77, i32 0 %80 = insertelement <4 x i32> %79, i32 %78, i32 1 %81 = insertelement <4 x i32> %80, i32 0, i32 2 %82 = bitcast <8 x i32> %47 to <32 x i8> %83 = bitcast <4 x i32> %49 to <16 x i8> %84 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %81, <32 x i8> %82, <16 x i8> %83, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = bitcast float %75 to i32 %90 = bitcast float %76 to i32 %91 = insertelement <4 x i32> , i32 %89, i32 1 %92 = insertelement <4 x i32> %91, i32 %90, i32 2 %93 = insertelement <4 x i32> %92, i32 0, i32 3 %94 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %93, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = bitcast float %75 to i32 %100 = bitcast float %76 to i32 %101 = insertelement <4 x i32> , i32 %99, i32 1 %102 = insertelement <4 x i32> %101, i32 %100, i32 2 %103 = insertelement <4 x i32> %102, i32 0, i32 3 %104 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %103, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = extractelement <4 x float> %104, i32 2 %108 = extractelement <4 x float> %104, i32 3 %109 = fmul float %61, 2.550000e+02 %110 = fadd float %109, -1.280000e+02 %111 = fmul float %62, 2.550000e+02 %112 = fadd float %111, -1.280000e+02 %113 = fmul float %63, 2.550000e+02 %114 = fadd float %113, -1.280000e+02 %115 = fmul float %64, 2.550000e+02 %116 = fadd float %115, -1.280000e+02 %117 = fcmp olt float %110, 0.000000e+00 %118 = fcmp olt float %112, 0.000000e+00 %119 = fcmp olt float %114, 0.000000e+00 %120 = fcmp olt float %116, 0.000000e+00 %121 = select i1 %117, float 1.000000e+00, float 0.000000e+00 %122 = select i1 %119, float 1.000000e+00, float 0.000000e+00 %123 = select i1 %120, float 1.000000e+00, float 0.000000e+00 %124 = call float @fabs(float %110) %125 = call float @fabs(float %112) %126 = call float @fabs(float %114) %127 = call float @fabs(float %116) %128 = fsub float %124, %121 %129 = select i1 %118, float -1.000000e+00, float -0.000000e+00 %130 = fadd float %125, %129 %131 = fsub float %126, %122 %132 = fsub float %127, %123 %133 = fadd float %128, -6.400000e+01 %134 = fadd float %130, -6.400000e+01 %135 = fadd float %131, -6.400000e+01 %136 = fadd float %132, -6.400000e+01 %137 = fcmp olt float %133, 0.000000e+00 %138 = fcmp olt float %134, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = fcmp olt float %136, 0.000000e+00 %141 = select i1 %137, float 1.000000e+00, float 0.000000e+00 %142 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %143 = select i1 %139, float 1.000000e+00, float 0.000000e+00 %144 = select i1 %140, float 1.000000e+00, float 0.000000e+00 %145 = call float @fabs(float %133) %146 = call float @fabs(float %134) %147 = call float @fabs(float %135) %148 = call float @fabs(float %136) %149 = fsub float %145, %141 %150 = fsub float %146, %142 %151 = fsub float %147, %143 %152 = fsub float %148, %144 %153 = fmul float %149, 0x3F90410420000000 %154 = fmul float %150, 0x3F90410420000000 %155 = fmul float %151, 0x3F90410420000000 %156 = fmul float %152, 0x3F90410420000000 %157 = fmul float %141, 2.000000e+00 %158 = fmul float %142, 2.000000e+00 %159 = fmul float %143, 2.000000e+00 %160 = fmul float %144, 2.000000e+00 %161 = fsub float 1.000000e+00, %157 %162 = fsub float 1.000000e+00, %158 %163 = fsub float 1.000000e+00, %159 %164 = fsub float 1.000000e+00, %160 %165 = fmul float %121, 2.000000e+00 %166 = fmul float %122, 2.000000e+00 %167 = fmul float %123, 2.000000e+00 %168 = fsub float 1.000000e+00, %165 %169 = fsub float 1.000000e+00, %166 %170 = fsub float 1.000000e+00, %167 %171 = fsub float 1.000000e+00, %153 %172 = fsub float %171, %154 %173 = fmul float %153, %153 %174 = fmul float %154, %154 %175 = fadd float %174, %173 %176 = fmul float %172, %172 %177 = fadd float %175, %176 %178 = call float @llvm.AMDGPU.rsq.clamped.f32(float %177) %179 = fmul float %153, %178 %180 = fmul float %154, %178 %181 = fmul float %172, %178 %182 = fmul float %179, %161 %183 = fmul float %180, %162 %184 = fsub float 1.000000e+00, %155 %185 = fsub float %184, %156 %186 = fmul float %155, %155 %187 = fmul float %156, %156 %188 = fadd float %187, %186 %189 = fmul float %185, %185 %190 = fadd float %188, %189 %191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190) %192 = fmul float %155, %191 %193 = fmul float %156, %191 %194 = fmul float %185, %191 %195 = fmul float %192, %163 %196 = fmul float %193, %164 %197 = fmul float %181, %168 %198 = fmul float %182, %85 %199 = fmul float %183, %86 %200 = fadd float %198, %199 %201 = fmul float %197, %87 %202 = fadd float %200, %201 %203 = fmul float %88, 0.000000e+00 %204 = fadd float %202, %203 %205 = fmul float %182, %95 %206 = fmul float %183, %96 %207 = fadd float %205, %206 %208 = fmul float %197, %97 %209 = fadd float %207, %208 %210 = fmul float %98, 0.000000e+00 %211 = fadd float %209, %210 %212 = fmul float %182, %105 %213 = fmul float %183, %106 %214 = fadd float %212, %213 %215 = fmul float %197, %107 %216 = fadd float %214, %215 %217 = fmul float %108, 0.000000e+00 %218 = fadd float %216, %217 %219 = fmul float %204, %204 %220 = fmul float %211, %211 %221 = fadd float %220, %219 %222 = fmul float %218, %218 %223 = fadd float %221, %222 %224 = call float @llvm.AMDGPU.rsq.clamped.f32(float %223) %225 = fmul float %204, %224 %226 = fmul float %211, %224 %227 = fmul float %218, %224 %228 = fmul float %194, %169 %229 = fmul float %195, %85 %230 = fmul float %196, %86 %231 = fadd float %229, %230 %232 = fmul float %228, %87 %233 = fadd float %231, %232 %234 = fmul float %88, 0.000000e+00 %235 = fadd float %233, %234 %236 = fmul float %195, %95 %237 = fmul float %196, %96 %238 = fadd float %236, %237 %239 = fmul float %228, %97 %240 = fadd float %238, %239 %241 = fmul float %98, 0.000000e+00 %242 = fadd float %240, %241 %243 = fmul float %195, %105 %244 = fmul float %196, %106 %245 = fadd float %243, %244 %246 = fmul float %228, %107 %247 = fadd float %245, %246 %248 = fmul float %108, 0.000000e+00 %249 = fadd float %247, %248 %250 = fmul float %235, %225 %251 = fmul float %242, %226 %252 = fadd float %251, %250 %253 = fmul float %249, %227 %254 = fadd float %252, %253 %255 = fmul float %254, %225 %256 = fmul float %254, %226 %257 = fmul float %254, %227 %258 = fsub float %235, %255 %259 = fsub float %242, %256 %260 = fsub float %249, %257 %261 = fmul float %258, %258 %262 = fmul float %259, %259 %263 = fadd float %262, %261 %264 = fmul float %260, %260 %265 = fadd float %263, %264 %266 = call float @llvm.AMDGPU.rsq.clamped.f32(float %265) %267 = fmul float %258, %266 %268 = fmul float %259, %266 %269 = fmul float %260, %266 %270 = fmul float %54, %85 %271 = fmul float %55, %86 %272 = fadd float %270, %271 %273 = fmul float %56, %87 %274 = fadd float %272, %273 %275 = fadd float %274, %88 %276 = fmul float %54, %95 %277 = fmul float %55, %96 %278 = fadd float %276, %277 %279 = fmul float %56, %97 %280 = fadd float %278, %279 %281 = fadd float %280, %98 %282 = fmul float %54, %105 %283 = fmul float %55, %106 %284 = fadd float %282, %283 %285 = fmul float %56, %107 %286 = fadd float %284, %285 %287 = fadd float %286, %108 %288 = fsub float %275, %35 %289 = fsub float %281, %36 %290 = fsub float %287, %37 %291 = fmul float %38, %288 %292 = fmul float %39, %289 %293 = fadd float %292, %291 %294 = fmul float %40, %290 %295 = fadd float %293, %294 %296 = fmul float %288, %288 %297 = fmul float %289, %289 %298 = fadd float %297, %296 %299 = fmul float %290, %290 %300 = fadd float %298, %299 %301 = call float @llvm.AMDGPU.rsq.clamped.f32(float %300) %302 = fmul float %288, %301 %303 = fmul float %289, %301 %304 = fmul float %290, %301 %305 = fmul float %225, %43 %306 = fmul float %226, %44 %307 = fadd float %306, %305 %308 = fmul float %227, %45 %309 = fadd float %307, %308 %310 = fmul float %309, %225 %311 = fmul float %309, %226 %312 = fmul float %309, %227 %313 = fmul float %310, 2.000000e+00 %314 = fmul float %311, 2.000000e+00 %315 = fmul float %312, 2.000000e+00 %316 = fsub float %43, %313 %317 = fsub float %44, %314 %318 = fsub float %45, %315 %319 = fmul float %302, %316 %320 = fsub float -0.000000e+00, %319 %321 = fmul float %303, %317 %322 = fsub float %320, %321 %323 = fmul float %304, %318 %324 = fsub float %322, %323 %325 = call float @llvm.AMDIL.clamp.(float %324, float 0.000000e+00, float 1.000000e+00) %326 = call float @llvm.pow.f32(float %325, float 1.600000e+01) %327 = call float @llvm.AMDIL.clamp.(float %326, float 0.000000e+00, float 1.000000e+00) %328 = fmul float %275, %22 %329 = fmul float %281, %26 %330 = fadd float %328, %329 %331 = fmul float %287, %30 %332 = fadd float %330, %331 %333 = fadd float %332, %34 %334 = fmul float %69, %13 %335 = fadd float %334, %15 %336 = fmul float %70, %14 %337 = fadd float %336, %16 %338 = bitcast float %75 to i32 %339 = bitcast float %76 to i32 %340 = insertelement <4 x i32> , i32 %338, i32 1 %341 = insertelement <4 x i32> %340, i32 %339, i32 2 %342 = insertelement <4 x i32> %341, i32 0, i32 3 %343 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %342, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = extractelement <4 x float> %343, i32 3 %348 = fmul float %275, %19 %349 = fmul float %281, %23 %350 = fadd float %348, %349 %351 = fmul float %287, %27 %352 = fadd float %350, %351 %353 = fadd float %352, %31 %354 = fmul float %275, %20 %355 = fmul float %281, %24 %356 = fadd float %354, %355 %357 = fmul float %287, %28 %358 = fadd float %356, %357 %359 = fadd float %358, %32 %360 = fsub float -0.000000e+00, %359 %361 = fmul float %275, %21 %362 = fmul float %281, %25 %363 = fadd float %361, %362 %364 = fmul float %287, %29 %365 = fadd float %363, %364 %366 = fadd float %365, %33 %367 = fmul float %366, 2.000000e+00 %368 = fsub float %367, %333 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %335, float %337, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %275, float %281, float %287, float %295) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %225, float %226, float %227, float %327) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %344, float %345, float %346, float %347) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %267, float %268, float %269, float %170) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %353, float %360, float %368, float %333) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 buffer_load_format_xyzw v[20:23], v3, s[8:11], 0 idxen ; E00C2000 80021403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v22, 0 ; 7E2C0280 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s23, s[44:47], 0x4d ; C20BAD4D s_buffer_load_dword s22, s[44:47], 0x4e ; C20B2D4E s_buffer_load_dword s14, s[44:47], 0x50 ; C2072D50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s26, s[40:43], 0x6 ; C20D2906 s_buffer_load_dword s25, s[40:43], 0x7 ; C20CA907 s_buffer_load_dword s27, s[40:43], 0x8 ; C20DA908 s_buffer_load_dword s40, s[40:43], 0x9 ; C2142909 s_buffer_load_dword s17, s[44:47], 0x51 ; C208AD51 s_buffer_load_dword s18, s[44:47], 0x52 ; C2092D52 s_buffer_load_dword s6, s[44:47], 0x5 ; C2032D05 s_buffer_load_dword s7, s[44:47], 0x6 ; C203AD06 s_buffer_load_dword s11, s[44:47], 0x7 ; C205AD07 s_buffer_load_dword s4, s[44:47], 0x8 ; C2022D08 s_buffer_load_dword s3, s[44:47], 0x9 ; C201AD09 s_buffer_load_dword s8, s[44:47], 0x0 ; C2042D00 s_buffer_load_dword s9, s[44:47], 0x1 ; C204AD01 s_buffer_load_dword s10, s[44:47], 0x2 ; C2052D02 s_buffer_load_dword s12, s[44:47], 0x3 ; C2062D03 s_buffer_load_dword s15, s[44:47], 0x4 ; C207AD04 s_buffer_load_dword s13, s[44:47], 0xa ; C206AD0A s_buffer_load_dword s16, s[44:47], 0xb ; C2082D0B s_buffer_load_dword s5, s[44:47], 0xc ; C202AD0C s_buffer_load_dword s2, s[44:47], 0xd ; C2012D0D s_buffer_load_dword s1, s[44:47], 0xe ; C200AD0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s27 ; 7E00021B v_mov_b32_e32 v3, s40 ; 7E060228 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[32:39], s[28:31] ; F0900F00 00E81714 v_mov_b32_e32 v19, 0x10001 ; 7E2602FF 00010001 image_sample_l_o v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81B13 v_mov_b32_e32 v19, 0x20002 ; 7E2602FF 00020002 image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81F13 v_mov_b32_e32 v19, 0x30003 ; 7E2602FF 00030003 image_sample_l_o v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0D00F00 00E81013 exp 15, 32, 0, 0, 0, v22, v22, v22, v22 ; F800020F 16161616 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v20, v24, v8 ; 10281118 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mad_f32 v11, v2, v11, v1 ; D282000B 04061702 v_mad_f32 v12, v2, v12, v1 ; D282000C 04061902 v_mac_f32_e32 v1, v2, v13 ; 3E021B02 v_mac_f32_e32 v0, s26, v14 ; 3E001C1A v_mac_f32_e32 v3, s25, v15 ; 3E061E19 v_mac_f32_e32 v20, v23, v7 ; 3E280F17 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v2, v28, v8 ; 1004111C v_mac_f32_e32 v2, v27, v7 ; 3E040F1B s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v32, v8 ; 10101120 v_mac_f32_e32 v8, v31, v7 ; 3E100F1F v_mac_f32_e32 v20, v25, v9 ; 3E281319 v_mac_f32_e32 v2, v29, v9 ; 3E04131D v_mac_f32_e32 v8, v33, v9 ; 3E101321 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v4, |v11|, v4 ; D2060104 0002090B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480 v_sub_f32_e64 v9, |v10|, v7 ; D2080109 00020F0A v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v11, |v12|, v10 ; D208010B 0002150C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480 v_sub_f32_e64 v1, |v1|, v12 ; D2080101 00021901 v_add_f32_e32 v9, v5, v9 ; 06121305 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v11, v5, v11 ; 06161705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v9 ; 7C081280 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v13, v5, |v9| ; D208020D 00021305 v_mad_f32 v13, v13, v6, 1.0 ; D282000D 03CA0D0D v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v14, 0, 1.0, vcc ; D200000E 01A9E480 v_sub_f32_e64 v15, v14, |v4| ; D208020F 0002090E v_mac_f32_e32 v13, v6, v15 ; 3E1A1F06 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 v_sub_f32_e64 v21, v15, |v11| ; D2080215 0002170F v_mad_f32 v21, v21, v6, 1.0 ; D2820015 03CA0D15 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v35, 0, 1.0, vcc ; D2000023 01A9E480 v_sub_f32_e64 v36, v35, |v1| ; D2080224 00020323 v_mac_f32_e32 v21, v6, v36 ; 3E2A4906 v_sub_f32_e64 v9, |v9|, v5 ; D2080109 00020B09 v_sub_f32_e64 v4, |v4|, v14 ; D2080104 00021D04 v_sub_f32_e64 v11, |v11|, v15 ; D208010B 00021F0B v_sub_f32_e64 v1, |v1|, v35 ; D2080101 00024701 v_mul_f32_e32 v9, v6, v9 ; 10121306 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v11, v6, v11 ; 10161706 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v9, v9 ; 100C1309 v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v13, v13 ; 3E0C1B0D v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v36, v11, v11 ; 1048170B v_mac_f32_e32 v36, v1, v1 ; 3E480301 v_mac_f32_e32 v36, v21, v21 ; 3E482B15 v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v9, v6, v9 ; 10121306 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v13 ; 100C1B06 v_mul_f32_e32 v11, v36, v11 ; 10161724 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v13, v36, v21 ; 101A2B24 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v9, -2.0, v14, 1.0 ; D2820009 03CA1CF5 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mad_f32 v9, -2.0, v15, 1.0 ; D2820009 03CA1EF5 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mad_f32 v11, -2.0, v35, 1.0 ; D282000B 03CA46F5 v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mad_f32 v7, -2.0, v7, 1.0 ; D2820007 03CA0EF5 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_mad_f32 v7, -2.0, v10, 1.0 ; D2820007 03CA14F5 v_mul_f32_e32 v7, v7, v13 ; 100E1B07 v_add_f32_e32 v10, v26, v20 ; 0614291A v_mul_f32_e32 v11, v24, v4 ; 10160918 v_mac_f32_e32 v11, v23, v5 ; 3E160B17 v_mul_f32_e32 v13, v24, v1 ; 101A0318 v_mac_f32_e32 v13, v23, v9 ; 3E1A1317 v_mac_f32_e32 v11, v25, v6 ; 3E160D19 v_mac_f32_e32 v13, v25, v7 ; 3E1A0F19 v_add_f32_e32 v2, v30, v2 ; 0604051E v_mul_f32_e32 v14, v28, v4 ; 101C091C v_mac_f32_e32 v14, v27, v5 ; 3E1C0B1B v_mul_f32_e32 v15, v28, v1 ; 101E031C v_mac_f32_e32 v15, v27, v9 ; 3E1E131B v_mac_f32_e32 v14, v29, v6 ; 3E1C0D1D v_mac_f32_e32 v15, v29, v7 ; 3E1E0F1D v_mul_f32_e32 v4, v32, v4 ; 10080920 v_mac_f32_e32 v4, v31, v5 ; 3E080B1F v_mul_f32_e32 v1, v32, v1 ; 10020320 v_mac_f32_e32 v1, v31, v9 ; 3E02131F v_mac_f32_e32 v4, v33, v6 ; 3E080D21 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mac_f32_e32 v5, v14, v14 ; 3E0A1D0E v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mac_f32_e32 v1, v33, v7 ; 3E020F21 v_add_f32_e32 v6, v34, v8 ; 060C1122 exp 15, 33, 0, 0, 0, v0, v3, v22, v22 ; F800021F 16160300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v5, v11 ; 10001705 v_mul_f32_e32 v3, v5, v14 ; 10061D05 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_subrev_f32_e32 v5, s24, v10 ; 0A0A1418 v_subrev_f32_e32 v7, s23, v2 ; 0A0E0417 v_mul_f32_e32 v8, v5, v5 ; 10100B05 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_subrev_f32_e32 v9, s22, v6 ; 0A120C16 v_mac_f32_e32 v8, v9, v9 ; 3E101309 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v11, s21, v0 ; 10160015 v_mac_f32_e32 v11, s20, v3 ; 3E160614 v_mac_f32_e32 v11, s19, v4 ; 3E160813 v_mul_f32_e32 v14, v0, v11 ; 101C1700 v_mad_f32 v14, 2.0, v14, -s21 ; D282000E 80561CF4 v_mul_f32_e32 v20, v8, v5 ; 10280B08 v_mul_f32_e32 v14, v14, v20 ; 101C290E v_mul_f32_e32 v20, v3, v11 ; 10281703 v_mad_f32 v20, 2.0, v20, -s20 ; D2820014 805228F4 v_mul_f32_e32 v21, v8, v7 ; 102A0F08 v_mac_f32_e32 v14, v20, v21 ; 3E1C2B14 v_mul_f32_e32 v11, v4, v11 ; 10161704 v_mad_f32 v11, 2.0, v11, -s19 ; D282000B 804E16F4 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mac_f32_e32 v14, v11, v8 ; 3E1C110B v_mul_f32_e32 v5, s14, v5 ; 100A0A0E v_add_f32_e64 v8, 0, v14 clamp ; D2060808 00021C80 v_log_f32_e32 v8, v8 ; 7E104F08 v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11 v_mac_f32_e32 v5, s18, v9 ; 3E0A1212 exp 15, 34, 0, 0, 0, v10, v2, v6, v5 ; F800022F 0506020A s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v5, 0x41800000, v8 ; 0E0A10FF 41800000 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 exp 15, 35, 0, 0, 0, v0, v3, v4, v5 ; F800023F 05040300 exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s11, v2 ; 100A040B v_mul_f32_e32 v7, s15, v2 ; 100E040F v_mul_f32_e32 v8, s6, v2 ; 10100406 v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mac_f32_e32 v5, s12, v10 ; 3E0A140C v_mac_f32_e32 v7, s8, v10 ; 3E0E1408 v_mac_f32_e32 v8, s9, v10 ; 3E101409 v_mac_f32_e32 v2, s10, v10 ; 3E04140A v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10 v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04 v_mac_f32_e32 v8, s3, v6 ; 3E100C03 v_mac_f32_e32 v2, s13, v6 ; 3E040C0D v_mul_f32_e32 v6, v0, v13 ; 100C1B00 v_mac_f32_e32 v6, v3, v15 ; 3E0C1F03 v_mac_f32_e32 v6, v4, v1 ; 3E0C0304 v_mad_f32 v0, -v6, v0, v13 ; D2820000 24360106 v_mad_f32 v3, -v6, v3, v15 ; D2820003 243E0706 v_mad_f32 v1, -v6, v4, v1 ; D2820001 24060906 v_add_f32_e32 v4, s0, v5 ; 06080A00 v_add_f32_e32 v5, s5, v7 ; 060A0E05 v_mul_f32_e32 v6, v0, v0 ; 100C0100 v_mac_f32_e32 v6, v3, v3 ; 3E0C0703 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_add_f32_e32 v7, s2, v8 ; 060E1002 v_mad_f32 v8, -2.0, v12, 1.0 ; D2820008 03CA18F5 v_add_f32_e32 v2, s1, v2 ; 06040401 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v1, v6, v1 ; 10020306 exp 15, 37, 0, 0, 0, v22, v22, v22, v22 ; F800025F 16161616 exp 15, 38, 0, 0, 0, v0, v3, v1, v8 ; F800026F 08010300 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v7 ; 3A000EFF 80000000 v_mad_f32 v1, 2.0, v2, -v4 ; D2820001 841204F4 exp 15, 12, 0, 0, 0, v5, v0, v1, v4 ; F80000CF 04010005 exp 15, 13, 0, 1, 0, v22, v22, v22, v22 ; F80008DF 16161616 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 1276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..9], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} IMM[3] UINT32 {4, 0, 0, 0} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 6: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[3].xyzz 7: MOV TEMP[3].xy, IN[0].xyyy 8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D 9: MUL TEMP[4].x, CONST[1][3].xxxx, TEMP[3].xxxx 10: MUL TEMP[5].x, CONST[1][2].wwww, TEMP[3].zzzz 11: MUL TEMP[6].xyz, TEMP[0].zxyy, IN[4].yzxx 12: MAD TEMP[6].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[6].xyzz 13: MOV TEMP[7].xy, IN[0].xyyy 14: TEX TEMP[7].yw, TEMP[7], SAMP[2], 2D 15: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[1].xxxx, IMM[1].yyyy 16: MOV TEMP[8].x, TEMP[7].xxxx 17: MOV TEMP[8].y, -TEMP[7].yyyy 18: MUL TEMP[8].xy, TEMP[8].xyyy, CONST[1][0].xxxx 19: MOV TEMP[9].x, TEMP[8].xxxx 20: MOV TEMP[9].y, TEMP[8].yyyy 21: DP2 TEMP[7].x, TEMP[7].xyyy, TEMP[7].xyyy 22: ADD TEMP[7].x, IMM[1].zzzz, -TEMP[7].xxxx 23: MOV_SAT TEMP[7].x, TEMP[7].xxxx 24: SQRT TEMP[7].x, TEMP[7].xxxx 25: MOV TEMP[9].z, TEMP[7].xxxx 26: DP3 TEMP[7].x, TEMP[9].xyzz, TEMP[9].xyzz 27: RSQ TEMP[7].x, TEMP[7].xxxx 28: MUL TEMP[7].xyz, TEMP[9].xyzz, TEMP[7].xxxx 29: DP3 TEMP[8].x, IN[4].xyzz, IN[4].xyzz 30: RSQ TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].xyz, IN[4].xyzz, TEMP[8].xxxx 32: DP3 TEMP[9].x, TEMP[6].xyzz, TEMP[6].xyzz 33: RSQ TEMP[9].x, TEMP[9].xxxx 34: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[9].xxxx 35: MUL TEMP[6].xyz, IN[4].wwww, TEMP[6].xyzz 36: MUL TEMP[6].xyz, TEMP[7].yyyy, TEMP[6].xyzz 37: MAD TEMP[6].xyz, TEMP[7].xxxx, TEMP[8].xyzz, TEMP[6].xyzz 38: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].zzzz, TEMP[6].xyzz 39: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[0].xyzz 40: RSQ TEMP[6].x, TEMP[6].xxxx 41: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[6].xxxx 42: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 43: MOV TEMP[6].w, IMM[2].xxxx 44: MOV TEMP[6].x, TEMP[0].xxxx 45: MOV TEMP[6].y, TEMP[0].yyyy 46: MOV TEMP[6].z, TEMP[0].zzzz 47: FSLT TEMP[0].x, TEMP[1].wwww, CONST[1][0].yyyy 48: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 49: KILL_IF -TEMP[0].xxxx 50: MOV TEMP[0].w, IMM[2].xxxx 51: MOV TEMP[0].x, TEMP[2].xxxx 52: MOV TEMP[0].y, TEMP[2].yyyy 53: MOV TEMP[0].z, TEMP[2].zzzz 54: MOV TEMP[1].w, IMM[2].xxxx 55: MOV TEMP[1].x, TEMP[4].xxxx 56: MOV TEMP[1].y, TEMP[3].yyyy 57: MOV TEMP[1].z, TEMP[5].xxxx 58: MOV OUT[2], IN[1].wwww 59: MOV OUT[0], TEMP[0] 60: MOV OUT[3], TEMP[6] 61: MOV OUT[1], TEMP[1] 62: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %60 = fmul float %50, %50 %61 = fmul float %51, %51 %62 = fadd float %61, %60 %63 = fmul float %52, %52 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %50, %65 %67 = fmul float %51, %65 %68 = fmul float %52, %65 %69 = bitcast float %47 to i32 %70 = bitcast float %48 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %32, <16 x i8> %34, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = extractelement <4 x float> %73, i32 3 %78 = fmul float %26, %74 %79 = fmul float %27, %75 %80 = fmul float %28, %76 %81 = fmul float %78, %53 %82 = fmul float %79, %54 %83 = fmul float %80, %55 %84 = bitcast float %47 to i32 %85 = bitcast float %48 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %37, <16 x i8> %40, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = fmul float %30, %89 %93 = fmul float %29, %91 %94 = fmul float %68, %57 %95 = fmul float %66, %58 %96 = fmul float %67, %56 %97 = fmul float %67, %58 %98 = fsub float %97, %94 %99 = fmul float %68, %56 %100 = fsub float %99, %95 %101 = fmul float %66, %57 %102 = fsub float %101, %96 %103 = bitcast float %47 to i32 %104 = bitcast float %48 to i32 %105 = insertelement <2 x i32> undef, i32 %103, i32 0 %106 = insertelement <2 x i32> %105, i32 %104, i32 1 %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %43, <16 x i8> %46, i32 2) %108 = extractelement <4 x float> %107, i32 1 %109 = extractelement <4 x float> %107, i32 3 %110 = fmul float %109, 2.000000e+00 %111 = fadd float %110, -1.000000e+00 %112 = fmul float %108, 2.000000e+00 %113 = fadd float %112, -1.000000e+00 %114 = fmul float %111, %24 %115 = fmul float %113, %24 %116 = fmul float %111, %111 %117 = fmul float %113, %113 %118 = fadd float %116, %117 %119 = fsub float 1.000000e+00, %118 %120 = call float @llvm.AMDIL.clamp.(float %119, float 0.000000e+00, float 1.000000e+00) %121 = call float @llvm.sqrt.f32(float %120) %122 = fmul float %114, %114 %123 = fmul float %115, %115 %124 = fadd float %123, %122 %125 = fmul float %121, %121 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = fmul float %114, %127 %129 = fmul float %115, %127 %130 = fsub float -0.000000e+00, %129 %131 = fmul float %121, %127 %132 = fmul float %56, %56 %133 = fmul float %57, %57 %134 = fadd float %133, %132 %135 = fmul float %58, %58 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = fmul float %56, %137 %139 = fmul float %57, %137 %140 = fmul float %58, %137 %141 = fmul float %98, %98 %142 = fmul float %100, %100 %143 = fadd float %142, %141 %144 = fmul float %102, %102 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %98, %146 %148 = fmul float %100, %146 %149 = fmul float %102, %146 %150 = fmul float %59, %147 %151 = fmul float %59, %148 %152 = fmul float %59, %149 %153 = fmul float %150, %130 %154 = fmul float %151, %130 %155 = fmul float %152, %130 %156 = fmul float %128, %138 %157 = fadd float %156, %153 %158 = fmul float %128, %139 %159 = fadd float %158, %154 %160 = fmul float %128, %140 %161 = fadd float %160, %155 %162 = fmul float %66, %131 %163 = fadd float %162, %157 %164 = fmul float %67, %131 %165 = fadd float %164, %159 %166 = fmul float %68, %131 %167 = fadd float %166, %161 %168 = fmul float %163, %163 %169 = fmul float %165, %165 %170 = fadd float %169, %168 %171 = fmul float %167, %167 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = fmul float %163, %173 %175 = fmul float %165, %173 %176 = fmul float %167, %173 %177 = fmul float %174, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %175, 5.000000e-01 %180 = fadd float %179, 5.000000e-01 %181 = fmul float %176, 5.000000e-01 %182 = fadd float %181, 5.000000e-01 %183 = fcmp olt float %77, %25 %184 = select i1 %183, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %184) %185 = call i32 @llvm.SI.packf16(float %81, float %82) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %83, float 0.000000e+00) %188 = bitcast i32 %187 to float %189 = call i32 @llvm.SI.packf16(float %92, float %90) %190 = bitcast i32 %189 to float %191 = call i32 @llvm.SI.packf16(float %93, float 0.000000e+00) %192 = bitcast i32 %191 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %186, float %188, float %186, float %188) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %190, float %192, float %190, float %192) %193 = call i32 @llvm.SI.packf16(float %178, float %180) %194 = bitcast i32 %193 to float %195 = call i32 @llvm.SI.packf16(float %182, float 0.000000e+00) %196 = bitcast i32 %195 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %49, float %49, float %49, float %49) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %194, float %196, float %194, float %196) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s44, s[0:3], 0x4 ; C2160104 s_buffer_load_dword s45, s[0:3], 0x5 ; C2168105 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 s_buffer_load_dword s46, s[0:3], 0x1 ; C2170101 s_buffer_load_dword s47, s[0:3], 0x6 ; C2178106 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800F00 01090E02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s44, v14 ; 10021C2C v_mul_f32_e32 v14, s45, v15 ; 101C1E2D v_mul_f32_e32 v15, v5, v5 ; 101E0B05 v_mac_f32_e32 v15, v6, v6 ; 3E1E0D06 v_mac_f32_e32 v15, v7, v7 ; 3E1E0F07 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v16, s47, v16 ; 1020202F v_cmp_gt_f32_e32 vcc, s46, v17 ; 7C08222E image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441102 image_sample v[2:3], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660202 v_mul_f32_e32 v5, v15, v5 ; 100A0B0F v_mul_f32_e32 v6, v15, v6 ; 100C0D0F v_mul_f32_e32 v7, v15, v7 ; 100E0F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mul_f32_e32 v15, v12, v7 ; 101E0F0C v_mad_f32 v15, v6, v13, -v15 ; D282000F 843E1B06 v_mul_f32_e32 v20, v13, v5 ; 10280B0D v_mad_f32 v20, v7, v11, -v20 ; D2820014 84521707 v_mul_f32_e32 v21, v11, v6 ; 102A0D0B v_mad_f32 v21, v5, v12, -v21 ; D2820015 84561905 v_mul_f32_e32 v22, v11, v11 ; 102C170B v_mac_f32_e32 v22, v12, v12 ; 3E2C190C v_mac_f32_e32 v22, v13, v13 ; 3E2C1B0D v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v23, v15, v15 ; 102E1F0F v_mac_f32_e32 v23, v20, v20 ; 3E2E2914 v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v11, v22, v11 ; 10161716 v_mul_f32_e32 v12, v22, v12 ; 10181916 v_mul_f32_e32 v13, v22, v13 ; 101A1B16 v_mul_f32_e32 v15, v23, v15 ; 101E1F17 v_mul_f32_e32 v20, v23, v20 ; 10282917 v_mul_f32_e32 v21, v23, v21 ; 102A2B17 v_mad_f32 v22, -v2, v2, 1.0 ; D2820016 23CA0502 v_mad_f32 v22, -v3, v3, v22 ; D2820016 245A0703 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_sqrt_f32_e32 v22, v22 ; 7E2C6716 v_mul_f32_e32 v23, v3, v3 ; 102E0703 v_mac_f32_e32 v23, v2, v2 ; 3E2E0502 v_mac_f32_e32 v23, v22, v22 ; 3E2E2D16 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v15, v15, v0 ; 101E010F v_mul_f32_e32 v20, v20, v0 ; 10280114 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mul_f32_e32 v2, v23, v2 ; 10040517 v_mul_f32_e32 v15, v2, v15 ; 101E1F02 v_mul_f32_e32 v20, v2, v20 ; 10282902 v_mul_f32_e32 v0, v2, v0 ; 10000102 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s0, s[0:3], 0xb ; C200010B v_mul_f32_e32 v2, v23, v3 ; 10040717 v_mad_f32 v3, v2, v11, -v15 ; D2820003 843E1702 v_mad_f32 v11, v2, v12, -v20 ; D282000B 84521902 v_mad_f32 v0, v2, v13, -v0 ; D2820000 84021B02 v_mul_f32_e32 v2, v23, v22 ; 10042D17 v_mac_f32_e32 v3, v2, v5 ; 3E060B02 v_mac_f32_e32 v11, v2, v6 ; 3E160D02 v_mac_f32_e32 v0, v2, v7 ; 3E000F02 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v17 ; 10042204 v_mul_f32_e32 v5, s0, v19 ; 100A2600 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v11, v11 ; 3E0C170B v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v7, v9, v14 ; 100E1D09 v_mul_f32_e32 v8, v10, v16 ; 1010210A v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v9, v6, v11 ; 10121706 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v6, 0.5, v9, 0.5 ; D2820006 03C212F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cndmask_b32_e64 v9, 0, -1.0, vcc ; D2000009 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v9 ; 7C261280 v_cvt_pkrtz_f16_f32_e32 v2, v2, v18 ; 5E042502 v_cvt_pkrtz_f16_f32_e32 v1, v1, v7 ; 5E020F01 v_cvt_pkrtz_f16_f32_e64 v7, v8, 0 ; D25E0007 00010108 exp 15, 0, 1, 0, 0, v1, v7, v1, v7 ; F800040F 07010701 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v5, 0 ; D25E0001 00010105 exp 15, 1, 1, 0, 0, v2, v1, v2, v1 ; F800041F 01020102 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v1, v3, v6 ; 5E020D03 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 24 Code Size: 652 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1].xyz, TEMP[2], IN[3] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[5].x, TEMP[4].xxxx 16: MOV TEMP[5].y, -TEMP[4].yyyy 17: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 18: MOV TEMP[6].x, TEMP[5].xxxx 19: MOV TEMP[6].y, TEMP[5].yyyy 20: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 21: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 22: MOV_SAT TEMP[4].x, TEMP[4].xxxx 23: SQRT TEMP[4].x, TEMP[4].xxxx 24: MOV TEMP[6].z, TEMP[4].xxxx 25: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 26: RSQ TEMP[4].x, TEMP[4].xxxx 27: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 28: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 29: RSQ TEMP[5].x, TEMP[5].xxxx 30: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[6].x, TEMP[6].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 34: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MOV TEMP[3].w, IMM[2].xxxx 43: MOV TEMP[3].x, TEMP[0].xxxx 44: MOV TEMP[3].y, TEMP[0].yyyy 45: MOV TEMP[3].z, TEMP[0].zzzz 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[1].xxxx 48: MOV TEMP[0].y, TEMP[1].yyyy 49: MOV TEMP[0].z, TEMP[1].zzzz 50: MOV TEMP[1].w, IMM[2].xxxx 51: MUL TEMP[4].x, CONST[1][3].xxxx, TEMP[2].xxxx 52: MUL TEMP[1].x, TEMP[4].xxxx, IN[3].wwww 53: MOV TEMP[1].y, TEMP[2].yyyy 54: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 55: MOV TEMP[1].z, TEMP[2].xxxx 56: MOV OUT[2], IN[1].wwww 57: MOV OUT[0], TEMP[0] 58: MOV OUT[3], TEMP[3] 59: MOV OUT[1], TEMP[1] 60: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %60 = fmul float %49, %49 %61 = fmul float %50, %50 %62 = fadd float %61, %60 %63 = fmul float %51, %51 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %49, %65 %67 = fmul float %50, %65 %68 = fmul float %51, %65 %69 = bitcast float %46 to i32 %70 = bitcast float %47 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %31, <16 x i8> %33, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %25, %74 %78 = fmul float %26, %75 %79 = fmul float %27, %76 %80 = fmul float %77, %52 %81 = fmul float %78, %53 %82 = fmul float %79, %54 %83 = bitcast float %46 to i32 %84 = bitcast float %47 to i32 %85 = insertelement <2 x i32> undef, i32 %83, i32 0 %86 = insertelement <2 x i32> %85, i32 %84, i32 1 %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %36, <16 x i8> %39, i32 2) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = extractelement <4 x float> %87, i32 2 %91 = fmul float %68, %57 %92 = fmul float %66, %58 %93 = fmul float %67, %56 %94 = fmul float %67, %58 %95 = fsub float %94, %91 %96 = fmul float %68, %56 %97 = fsub float %96, %92 %98 = fmul float %66, %57 %99 = fsub float %98, %93 %100 = bitcast float %46 to i32 %101 = bitcast float %47 to i32 %102 = insertelement <2 x i32> undef, i32 %100, i32 0 %103 = insertelement <2 x i32> %102, i32 %101, i32 1 %104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %42, <16 x i8> %45, i32 2) %105 = extractelement <4 x float> %104, i32 1 %106 = extractelement <4 x float> %104, i32 3 %107 = fmul float %106, 2.000000e+00 %108 = fadd float %107, -1.000000e+00 %109 = fmul float %105, 2.000000e+00 %110 = fadd float %109, -1.000000e+00 %111 = fmul float %108, %24 %112 = fmul float %110, %24 %113 = fmul float %108, %108 %114 = fmul float %110, %110 %115 = fadd float %113, %114 %116 = fsub float 1.000000e+00, %115 %117 = call float @llvm.AMDIL.clamp.(float %116, float 0.000000e+00, float 1.000000e+00) %118 = call float @llvm.sqrt.f32(float %117) %119 = fmul float %111, %111 %120 = fmul float %112, %112 %121 = fadd float %120, %119 %122 = fmul float %118, %118 %123 = fadd float %121, %122 %124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123) %125 = fmul float %111, %124 %126 = fmul float %112, %124 %127 = fsub float -0.000000e+00, %126 %128 = fmul float %118, %124 %129 = fmul float %56, %56 %130 = fmul float %57, %57 %131 = fadd float %130, %129 %132 = fmul float %58, %58 %133 = fadd float %131, %132 %134 = call float @llvm.AMDGPU.rsq.clamped.f32(float %133) %135 = fmul float %56, %134 %136 = fmul float %57, %134 %137 = fmul float %58, %134 %138 = fmul float %95, %95 %139 = fmul float %97, %97 %140 = fadd float %139, %138 %141 = fmul float %99, %99 %142 = fadd float %140, %141 %143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142) %144 = fmul float %95, %143 %145 = fmul float %97, %143 %146 = fmul float %99, %143 %147 = fmul float %59, %144 %148 = fmul float %59, %145 %149 = fmul float %59, %146 %150 = fmul float %147, %127 %151 = fmul float %148, %127 %152 = fmul float %149, %127 %153 = fmul float %125, %135 %154 = fadd float %153, %150 %155 = fmul float %125, %136 %156 = fadd float %155, %151 %157 = fmul float %125, %137 %158 = fadd float %157, %152 %159 = fmul float %66, %128 %160 = fadd float %159, %154 %161 = fmul float %67, %128 %162 = fadd float %161, %156 %163 = fmul float %68, %128 %164 = fadd float %163, %158 %165 = fmul float %160, %160 %166 = fmul float %162, %162 %167 = fadd float %166, %165 %168 = fmul float %164, %164 %169 = fadd float %167, %168 %170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169) %171 = fmul float %160, %170 %172 = fmul float %162, %170 %173 = fmul float %164, %170 %174 = fmul float %171, 5.000000e-01 %175 = fadd float %174, 5.000000e-01 %176 = fmul float %172, 5.000000e-01 %177 = fadd float %176, 5.000000e-01 %178 = fmul float %173, 5.000000e-01 %179 = fadd float %178, 5.000000e-01 %180 = fmul float %29, %88 %181 = fmul float %180, %55 %182 = fmul float %28, %90 %183 = call i32 @llvm.SI.packf16(float %80, float %81) %184 = bitcast i32 %183 to float %185 = call i32 @llvm.SI.packf16(float %82, float 0.000000e+00) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %181, float %89) %188 = bitcast i32 %187 to float %189 = call i32 @llvm.SI.packf16(float %182, float 0.000000e+00) %190 = bitcast i32 %189 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %184, float %186, float %184, float %186) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %188, float %190, float %188, float %190) %191 = call i32 @llvm.SI.packf16(float %175, float %177) %192 = bitcast i32 %191 to float %193 = call i32 @llvm.SI.packf16(float %179, float 0.000000e+00) %194 = bitcast i32 %193 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %48, float %48, float %48, float %48) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %192, float %194, float %192, float %194) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 3, 3, [m0] ; C82C0F00 v_interp_p2_f32 v11, [v11], v1, 3, 3, [m0] ; C82D0F01 v_interp_p1_f32 v12, v0, 0, 4, [m0] ; C8301000 v_interp_p2_f32 v12, [v12], v1, 0, 4, [m0] ; C8311001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v14, v0, 2, 4, [m0] ; C8381200 v_interp_p2_f32 v14, [v14], v1, 2, 4, [m0] ; C8391201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450F02 image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[12:15] ; F0800700 00671202 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800A00 00890102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v3, s4, v15 ; 10061E04 v_mul_f32_e32 v15, s5, v16 ; 101E2005 v_mul_f32_e32 v16, s6, v17 ; 10202206 v_mul_f32_e32 v17, v5, v5 ; 10220B05 v_mac_f32_e32 v17, v6, v6 ; 3E220D06 v_mac_f32_e32 v17, v7, v7 ; 3E220F07 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s7, v18 ; 10242407 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mul_f32_e32 v18, s0, v20 ; 10242800 v_cvt_pkrtz_f16_f32_e32 v11, v11, v19 ; 5E16270B v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v6, v17, v6 ; 100C0D11 v_mul_f32_e32 v7, v17, v7 ; 100E0F11 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v17, v13, v7 ; 10220F0D v_mad_f32 v17, v6, v14, -v17 ; D2820011 84461D06 v_mul_f32_e32 v19, v14, v5 ; 10260B0E v_mad_f32 v19, v7, v12, -v19 ; D2820013 844E1907 v_mul_f32_e32 v20, v12, v6 ; 10280D0C v_mad_f32 v20, v5, v13, -v20 ; D2820014 84521B05 v_mul_f32_e32 v21, v12, v12 ; 102A190C v_mac_f32_e32 v21, v13, v13 ; 3E2A1B0D v_mac_f32_e32 v21, v14, v14 ; 3E2A1D0E v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v22, v17, v17 ; 102C2311 v_mac_f32_e32 v22, v19, v19 ; 3E2C2713 v_mac_f32_e32 v22, v20, v20 ; 3E2C2914 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v13, v21, v13 ; 101A1B15 v_mul_f32_e32 v14, v21, v14 ; 101C1D15 v_mul_f32_e32 v17, v22, v17 ; 10222316 v_mul_f32_e32 v19, v22, v19 ; 10262716 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mad_f32 v21, -v1, v1, 1.0 ; D2820015 23CA0301 v_mad_f32 v21, -v2, v2, v21 ; D2820015 24560502 v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v1, s8, v1 ; 10020208 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v22, v2, v2 ; 102C0502 v_mac_f32_e32 v22, v1, v1 ; 3E2C0301 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v17, v17, v0 ; 10220111 v_mul_f32_e32 v19, v19, v0 ; 10260113 v_mul_f32_e32 v0, v20, v0 ; 10000114 v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v17, v1, v17 ; 10222301 v_mul_f32_e32 v19, v1, v19 ; 10262701 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v22, v2 ; 10020516 v_mad_f32 v2, v1, v12, -v17 ; D2820002 84461901 v_mad_f32 v12, v1, v13, -v19 ; D282000C 844E1B01 v_mad_f32 v0, v1, v14, -v0 ; D2820000 84021D01 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v12, v1, v6 ; 3E180D01 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mul_f32_e32 v1, v8, v3 ; 10020708 v_mul_f32_e32 v3, v9, v15 ; 10061F09 v_mul_f32_e32 v5, v10, v16 ; 100A210A v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mul_f32_e32 v3, v2, v2 ; 10060502 v_mac_f32_e32 v3, v12, v12 ; 3E06190C v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_cvt_pkrtz_f16_f32_e64 v5, v5, 0 ; D25E0005 00010105 exp 15, 0, 1, 0, 0, v1, v5, v1, v5 ; F800040F 05010501 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v18, 0 ; D25E0001 00010112 exp 15, 1, 1, 0, 0, v11, v1, v11, v1 ; F800041F 010B010B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v3, v2 ; 10020503 v_mul_f32_e32 v2, v3, v12 ; 10041903 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 636 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].y, IMM[0].xxxx 15: SHL TEMP[2].x, IN[3].yyyy, IMM[2].xxxx 16: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 17: I2F TEMP[2].x, TEMP[2].xxxx 18: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 21: FLR TEMP[2].x, TEMP[0].xxxx 22: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[2].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 24: MOV TEMP[4].y, TEMP[0].xxxx 25: MOV TEMP[0].y, IMM[0].xxxx 26: SHL TEMP[2].x, IN[3].zzzz, IMM[2].xxxx 27: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 28: I2F TEMP[2].x, TEMP[2].xxxx 29: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 30: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 31: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 32: FLR TEMP[2].x, TEMP[0].xxxx 33: ADD TEMP[5].x, TEMP[0].xxxx, -TEMP[2].xxxx 34: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 35: MOV TEMP[5].y, TEMP[0].xxxx 36: MOV TEMP[0].xy, TEMP[5].xyyy 37: MOV TEMP[0].w, IMM[0].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[4].xyyy 40: MOV TEMP[2].w, IMM[0].xxxx 41: TXL TEMP[2], TEMP[2], SAMP[0], 2D 42: MOV TEMP[6].xy, TEMP[3].xyyy 43: MOV TEMP[6].w, IMM[0].xxxx 44: TXL TEMP[6], TEMP[6], SAMP[0], 2D 45: MUL TEMP[6], IN[4].xxxx, TEMP[6] 46: MAD TEMP[2], IN[4].yyyy, TEMP[2], TEMP[6] 47: MAD TEMP[0], IN[4].zzzz, TEMP[0], TEMP[2] 48: MOV TEMP[2].xy, TEMP[5].xyyy 49: MOV TEMP[2].w, IMM[0].xxxx 50: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 51: MOV TEMP[6].xy, TEMP[3].xyyy 52: MOV TEMP[6].w, IMM[0].xxxx 53: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[2].zwz 54: MOV TEMP[7].xy, TEMP[4].xyyy 55: MOV TEMP[7].w, IMM[0].xxxx 56: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[2].zwz 57: MUL TEMP[7], IN[4].yyyy, TEMP[7] 58: MAD TEMP[6], IN[4].xxxx, TEMP[6], TEMP[7] 59: MAD TEMP[2], IN[4].zzzz, TEMP[2], TEMP[6] 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[2].xwx 63: MOV TEMP[4].xy, TEMP[4].xyyy 64: MOV TEMP[4].w, IMM[0].xxxx 65: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[2].xwx 66: MOV TEMP[3].xy, TEMP[3].xyyy 67: MOV TEMP[3].w, IMM[0].xxxx 68: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 69: MUL TEMP[3], IN[4].xxxx, TEMP[3] 70: MAD TEMP[3], IN[4].yyyy, TEMP[4], TEMP[3] 71: MAD TEMP[3], IN[4].zzzz, TEMP[5], TEMP[3] 72: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 73: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 74: AND TEMP[5], TEMP[5], IMM[3].xxxx 75: ABS TEMP[4], TEMP[4] 76: ADD TEMP[4], TEMP[4], -TEMP[5] 77: ADD TEMP[4], TEMP[4], IMM[3].yyyy 78: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 79: AND TEMP[6], TEMP[6], IMM[3].xxxx 80: ABS TEMP[4], TEMP[4] 81: ADD TEMP[4], TEMP[4], -TEMP[6] 82: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 83: MOV TEMP[7].x, TEMP[4].xxxx 84: MOV TEMP[7].y, TEMP[4].yyyy 85: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 86: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 87: MOV TEMP[7].z, TEMP[4].xxxx 88: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 89: RSQ TEMP[4].x, TEMP[4].xxxx 90: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 91: MUL TEMP[6], TEMP[6], IMM[3].wwww 92: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 93: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 94: MOV TEMP[7].w, IMM[0].xxxx 95: MOV TEMP[7].x, TEMP[6].xxxx 96: MOV TEMP[7].y, TEMP[6].yyyy 97: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 98: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 99: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 100: MOV TEMP[7].z, TEMP[4].xxxx 101: DP4 TEMP[4].x, TEMP[7], TEMP[0] 102: DP4 TEMP[5].x, TEMP[7], TEMP[2] 103: MOV TEMP[4].y, TEMP[5].xxxx 104: DP4 TEMP[5].x, TEMP[7], TEMP[3] 105: MOV TEMP[4].z, TEMP[5].xxxx 106: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 109: MOV TEMP[5].xy, IN[5].xyyy 110: MOV TEMP[5].w, IMM[0].xxxx 111: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 112: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 113: MOV TEMP[6].w, IMM[3].xxxx 114: MOV TEMP[6].x, TEMP[5].xxxx 115: MOV TEMP[6].y, TEMP[5].yyyy 116: MOV TEMP[6].z, TEMP[5].zzzz 117: DP4 TEMP[0].x, TEMP[6], TEMP[0] 118: DP4 TEMP[2].x, TEMP[6], TEMP[2] 119: DP4 TEMP[3].x, TEMP[6], TEMP[3] 120: MOV TEMP[5].x, TEMP[0].xxxx 121: MOV TEMP[5].y, TEMP[2].xxxx 122: MOV TEMP[5].z, TEMP[3].xxxx 123: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 124: MOV TEMP[6].x, TEMP[0].xxxx 125: MOV TEMP[6].y, TEMP[2].xxxx 126: MOV TEMP[6].z, TEMP[3].xxxx 127: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 128: MOV TEMP[6].w, TEMP[7].xxxx 129: MOV TEMP[7].x, TEMP[4].xxxx 130: MOV TEMP[7].y, TEMP[4].yyyy 131: MOV TEMP[7].z, TEMP[4].zzzz 132: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 133: RSQ TEMP[8].x, TEMP[8].xxxx 134: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 135: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 136: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 137: MUL TEMP[4].xyz, IMM[3].wwww, TEMP[4].xyzz 138: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 139: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 140: MOV_SAT TEMP[4].x, TEMP[4].xxxx 141: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 142: MOV_SAT TEMP[4].x, TEMP[4].xxxx 143: MOV TEMP[7].w, TEMP[4].xxxx 144: MOV TEMP[4].w, IMM[3].xxxx 145: MOV TEMP[4].x, TEMP[0].xxxx 146: MOV TEMP[4].y, TEMP[2].xxxx 147: MOV TEMP[4].z, TEMP[3].xxxx 148: MOV TEMP[0].x, CONST[4][0].wwww 149: MOV TEMP[0].y, CONST[4][1].wwww 150: MOV TEMP[0].z, CONST[4][2].wwww 151: MOV TEMP[0].w, CONST[4][3].wwww 152: DP4 TEMP[0].x, TEMP[4], TEMP[0] 153: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 154: MOV TEMP[3].xy, IN[5].xyyy 155: MOV TEMP[3].w, IMM[0].xxxx 156: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 157: MOV TEMP[5].x, CONST[4][0].xxxx 158: MOV TEMP[5].y, CONST[4][1].xxxx 159: MOV TEMP[5].z, CONST[4][2].xxxx 160: MOV TEMP[5].w, CONST[4][3].xxxx 161: DP4 TEMP[5].x, TEMP[4], TEMP[5] 162: MOV TEMP[8].x, CONST[4][0].yyyy 163: MOV TEMP[8].y, CONST[4][1].yyyy 164: MOV TEMP[8].z, CONST[4][2].yyyy 165: MOV TEMP[8].w, CONST[4][3].yyyy 166: DP4 TEMP[8].x, TEMP[4], TEMP[8] 167: MOV TEMP[5].y, -TEMP[8].xxxx 168: MOV TEMP[8].x, CONST[4][0].zzzz 169: MOV TEMP[8].y, CONST[4][1].zzzz 170: MOV TEMP[8].z, CONST[4][2].zzzz 171: MOV TEMP[8].w, CONST[4][3].zzzz 172: DP4 TEMP[4].x, TEMP[4], TEMP[8] 173: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 174: MOV TEMP[5].z, TEMP[4].xxxx 175: MOV TEMP[5].w, TEMP[0].xxxx 176: MOV OUT[1], TEMP[1] 177: MOV OUT[2].xy, TEMP[2].xyxx 178: MOV OUT[4], TEMP[7] 179: MOV OUT[6], IMM[0].xxxx 180: MOV OUT[5], TEMP[3] 181: MOV OUT[0], TEMP[5] 182: MOV OUT[3], TEMP[6] 183: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %10, %6 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = bitcast float %77 to i32 %94 = shl i32 %93, 2 %95 = add i32 %94, 8 %96 = sitofp i32 %95 to float %97 = fadd float %96, 5.000000e-01 %98 = fmul float %97, %41 %99 = fadd float %98, %91 %100 = fadd float %92, 0.000000e+00 %101 = call float @floor(float %99) %102 = fsub float %99, %101 %103 = fmul float %101, %42 %104 = fadd float %103, %100 %105 = bitcast float %78 to i32 %106 = shl i32 %105, 2 %107 = add i32 %106, 8 %108 = sitofp i32 %107 to float %109 = fadd float %108, 5.000000e-01 %110 = fmul float %109, %41 %111 = fadd float %110, %91 %112 = fadd float %92, 0.000000e+00 %113 = call float @floor(float %111) %114 = fsub float %111, %113 %115 = fmul float %113, %42 %116 = fadd float %115, %112 %117 = bitcast float %79 to i32 %118 = shl i32 %117, 2 %119 = add i32 %118, 8 %120 = sitofp i32 %119 to float %121 = fadd float %120, 5.000000e-01 %122 = fmul float %121, %41 %123 = fadd float %122, %91 %124 = fadd float %92, 0.000000e+00 %125 = call float @floor(float %123) %126 = fsub float %123, %125 %127 = fmul float %125, %42 %128 = fadd float %127, %124 %129 = bitcast float %126 to i32 %130 = bitcast float %128 to i32 %131 = insertelement <4 x i32> undef, i32 %129, i32 0 %132 = insertelement <4 x i32> %131, i32 %130, i32 1 %133 = insertelement <4 x i32> %132, i32 0, i32 2 %134 = bitcast <8 x i32> %49 to <32 x i8> %135 = bitcast <4 x i32> %51 to <16 x i8> %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %133, <32 x i8> %134, <16 x i8> %135, i32 2) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = bitcast float %114 to i32 %142 = bitcast float %116 to i32 %143 = insertelement <4 x i32> undef, i32 %141, i32 0 %144 = insertelement <4 x i32> %143, i32 %142, i32 1 %145 = insertelement <4 x i32> %144, i32 0, i32 2 %146 = bitcast <8 x i32> %49 to <32 x i8> %147 = bitcast <4 x i32> %51 to <16 x i8> %148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %145, <32 x i8> %146, <16 x i8> %147, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = extractelement <4 x float> %148, i32 1 %151 = extractelement <4 x float> %148, i32 2 %152 = extractelement <4 x float> %148, i32 3 %153 = bitcast float %102 to i32 %154 = bitcast float %104 to i32 %155 = insertelement <4 x i32> undef, i32 %153, i32 0 %156 = insertelement <4 x i32> %155, i32 %154, i32 1 %157 = insertelement <4 x i32> %156, i32 0, i32 2 %158 = bitcast <8 x i32> %49 to <32 x i8> %159 = bitcast <4 x i32> %51 to <16 x i8> %160 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = extractelement <4 x float> %160, i32 3 %165 = fmul float %84, %161 %166 = fmul float %84, %162 %167 = fmul float %84, %163 %168 = fmul float %84, %164 %169 = fmul float %85, %149 %170 = fadd float %169, %165 %171 = fmul float %85, %150 %172 = fadd float %171, %166 %173 = fmul float %85, %151 %174 = fadd float %173, %167 %175 = fmul float %85, %152 %176 = fadd float %175, %168 %177 = fmul float %86, %137 %178 = fadd float %177, %170 %179 = fmul float %86, %138 %180 = fadd float %179, %172 %181 = fmul float %86, %139 %182 = fadd float %181, %174 %183 = fmul float %86, %140 %184 = fadd float %183, %176 %185 = bitcast float %126 to i32 %186 = bitcast float %128 to i32 %187 = insertelement <4 x i32> , i32 %185, i32 1 %188 = insertelement <4 x i32> %187, i32 %186, i32 2 %189 = insertelement <4 x i32> %188, i32 0, i32 3 %190 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %189, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 1 %193 = extractelement <4 x float> %190, i32 2 %194 = extractelement <4 x float> %190, i32 3 %195 = bitcast float %102 to i32 %196 = bitcast float %104 to i32 %197 = insertelement <4 x i32> , i32 %195, i32 1 %198 = insertelement <4 x i32> %197, i32 %196, i32 2 %199 = insertelement <4 x i32> %198, i32 0, i32 3 %200 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %199, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = extractelement <4 x float> %200, i32 3 %205 = bitcast float %114 to i32 %206 = bitcast float %116 to i32 %207 = insertelement <4 x i32> , i32 %205, i32 1 %208 = insertelement <4 x i32> %207, i32 %206, i32 2 %209 = insertelement <4 x i32> %208, i32 0, i32 3 %210 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %209, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %211 = extractelement <4 x float> %210, i32 0 %212 = extractelement <4 x float> %210, i32 1 %213 = extractelement <4 x float> %210, i32 2 %214 = extractelement <4 x float> %210, i32 3 %215 = fmul float %85, %211 %216 = fmul float %85, %212 %217 = fmul float %85, %213 %218 = fmul float %85, %214 %219 = fmul float %84, %201 %220 = fadd float %219, %215 %221 = fmul float %84, %202 %222 = fadd float %221, %216 %223 = fmul float %84, %203 %224 = fadd float %223, %217 %225 = fmul float %84, %204 %226 = fadd float %225, %218 %227 = fmul float %86, %191 %228 = fadd float %227, %220 %229 = fmul float %86, %192 %230 = fadd float %229, %222 %231 = fmul float %86, %193 %232 = fadd float %231, %224 %233 = fmul float %86, %194 %234 = fadd float %233, %226 %235 = bitcast float %126 to i32 %236 = bitcast float %128 to i32 %237 = insertelement <4 x i32> , i32 %235, i32 1 %238 = insertelement <4 x i32> %237, i32 %236, i32 2 %239 = insertelement <4 x i32> %238, i32 0, i32 3 %240 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %239, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %241 = extractelement <4 x float> %240, i32 0 %242 = extractelement <4 x float> %240, i32 1 %243 = extractelement <4 x float> %240, i32 2 %244 = extractelement <4 x float> %240, i32 3 %245 = bitcast float %114 to i32 %246 = bitcast float %116 to i32 %247 = insertelement <4 x i32> , i32 %245, i32 1 %248 = insertelement <4 x i32> %247, i32 %246, i32 2 %249 = insertelement <4 x i32> %248, i32 0, i32 3 %250 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %249, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %251 = extractelement <4 x float> %250, i32 0 %252 = extractelement <4 x float> %250, i32 1 %253 = extractelement <4 x float> %250, i32 2 %254 = extractelement <4 x float> %250, i32 3 %255 = bitcast float %102 to i32 %256 = bitcast float %104 to i32 %257 = insertelement <4 x i32> , i32 %255, i32 1 %258 = insertelement <4 x i32> %257, i32 %256, i32 2 %259 = insertelement <4 x i32> %258, i32 0, i32 3 %260 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %259, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = fmul float %84, %261 %266 = fmul float %84, %262 %267 = fmul float %84, %263 %268 = fmul float %84, %264 %269 = fmul float %85, %251 %270 = fadd float %269, %265 %271 = fmul float %85, %252 %272 = fadd float %271, %266 %273 = fmul float %85, %253 %274 = fadd float %273, %267 %275 = fmul float %85, %254 %276 = fadd float %275, %268 %277 = fmul float %86, %241 %278 = fadd float %277, %270 %279 = fmul float %86, %242 %280 = fadd float %279, %272 %281 = fmul float %86, %243 %282 = fadd float %281, %274 %283 = fmul float %86, %244 %284 = fadd float %283, %276 %285 = fmul float %63, 2.550000e+02 %286 = fadd float %285, -1.280000e+02 %287 = fmul float %64, 2.550000e+02 %288 = fadd float %287, -1.280000e+02 %289 = fmul float %65, 2.550000e+02 %290 = fadd float %289, -1.280000e+02 %291 = fmul float %66, 2.550000e+02 %292 = fadd float %291, -1.280000e+02 %293 = fcmp olt float %286, 0.000000e+00 %294 = fcmp olt float %288, 0.000000e+00 %295 = fcmp olt float %290, 0.000000e+00 %296 = fcmp olt float %292, 0.000000e+00 %297 = select i1 %293, float 1.000000e+00, float 0.000000e+00 %298 = call float @fabs(float %286) %299 = call float @fabs(float %288) %300 = call float @fabs(float %290) %301 = call float @fabs(float %292) %302 = fsub float %298, %297 %303 = select i1 %294, float -1.000000e+00, float -0.000000e+00 %304 = fadd float %299, %303 %305 = select i1 %295, float -1.000000e+00, float -0.000000e+00 %306 = fadd float %300, %305 %307 = select i1 %296, float -1.000000e+00, float -0.000000e+00 %308 = fadd float %301, %307 %309 = fadd float %302, -6.400000e+01 %310 = fadd float %304, -6.400000e+01 %311 = fadd float %306, -6.400000e+01 %312 = fadd float %308, -6.400000e+01 %313 = fcmp olt float %309, 0.000000e+00 %314 = fcmp olt float %310, 0.000000e+00 %315 = select i1 %313, float 1.000000e+00, float 0.000000e+00 %316 = select i1 %314, float 1.000000e+00, float 0.000000e+00 %317 = call float @fabs(float %309) %318 = call float @fabs(float %310) %319 = call float @fabs(float %311) %320 = call float @fabs(float %312) %321 = fsub float %317, %315 %322 = fsub float %318, %316 %323 = fmul float %321, 0x3F90410420000000 %324 = fmul float %322, 0x3F90410420000000 %325 = fsub float 1.000000e+00, %323 %326 = fsub float %325, %324 %327 = fmul float %323, %323 %328 = fmul float %324, %324 %329 = fadd float %328, %327 %330 = fmul float %326, %326 %331 = fadd float %329, %330 %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %323, %332 %334 = fmul float %324, %332 %335 = fmul float %326, %332 %336 = fmul float %315, 2.000000e+00 %337 = fmul float %316, 2.000000e+00 %338 = fsub float 1.000000e+00, %336 %339 = fsub float 1.000000e+00, %337 %340 = fmul float %333, %338 %341 = fmul float %334, %339 %342 = fmul float %297, 2.000000e+00 %343 = fsub float 1.000000e+00, %342 %344 = fmul float %343, %335 %345 = fmul float %340, %178 %346 = fmul float %341, %180 %347 = fadd float %345, %346 %348 = fmul float %344, %182 %349 = fadd float %347, %348 %350 = fmul float %184, 0.000000e+00 %351 = fadd float %349, %350 %352 = fmul float %340, %228 %353 = fmul float %341, %230 %354 = fadd float %352, %353 %355 = fmul float %344, %232 %356 = fadd float %354, %355 %357 = fmul float %234, 0.000000e+00 %358 = fadd float %356, %357 %359 = fmul float %340, %278 %360 = fmul float %341, %280 %361 = fadd float %359, %360 %362 = fmul float %344, %282 %363 = fadd float %361, %362 %364 = fmul float %284, 0.000000e+00 %365 = fadd float %363, %364 %366 = fmul float %351, %351 %367 = fmul float %358, %358 %368 = fadd float %367, %366 %369 = fmul float %365, %365 %370 = fadd float %368, %369 %371 = call float @llvm.AMDGPU.rsq.clamped.f32(float %370) %372 = fmul float %351, %371 %373 = fmul float %358, %371 %374 = fmul float %365, %371 %375 = bitcast float %91 to i32 %376 = bitcast float %92 to i32 %377 = insertelement <4 x i32> undef, i32 %375, i32 0 %378 = insertelement <4 x i32> %377, i32 %376, i32 1 %379 = insertelement <4 x i32> %378, i32 0, i32 2 %380 = bitcast <8 x i32> %49 to <32 x i8> %381 = bitcast <4 x i32> %51 to <16 x i8> %382 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %379, <32 x i8> %380, <16 x i8> %381, i32 2) %383 = extractelement <4 x float> %382, i32 2 %384 = fmul float %56, %383 %385 = fmul float %57, %383 %386 = fmul float %58, %383 %387 = fmul float %384, %178 %388 = fmul float %385, %180 %389 = fadd float %387, %388 %390 = fmul float %386, %182 %391 = fadd float %389, %390 %392 = fadd float %391, %184 %393 = fmul float %384, %228 %394 = fmul float %385, %230 %395 = fadd float %393, %394 %396 = fmul float %386, %232 %397 = fadd float %395, %396 %398 = fadd float %397, %234 %399 = fmul float %384, %278 %400 = fmul float %385, %280 %401 = fadd float %399, %400 %402 = fmul float %386, %282 %403 = fadd float %401, %402 %404 = fadd float %403, %284 %405 = fsub float %392, %35 %406 = fsub float %398, %36 %407 = fsub float %404, %37 %408 = fmul float %38, %405 %409 = fmul float %39, %406 %410 = fadd float %409, %408 %411 = fmul float %40, %407 %412 = fadd float %410, %411 %413 = fmul float %405, %405 %414 = fmul float %406, %406 %415 = fadd float %414, %413 %416 = fmul float %407, %407 %417 = fadd float %415, %416 %418 = call float @llvm.AMDGPU.rsq.clamped.f32(float %417) %419 = fmul float %405, %418 %420 = fmul float %406, %418 %421 = fmul float %407, %418 %422 = fmul float %372, %45 %423 = fmul float %373, %46 %424 = fadd float %423, %422 %425 = fmul float %374, %47 %426 = fadd float %424, %425 %427 = fmul float %426, %372 %428 = fmul float %426, %373 %429 = fmul float %426, %374 %430 = fmul float %427, 2.000000e+00 %431 = fmul float %428, 2.000000e+00 %432 = fmul float %429, 2.000000e+00 %433 = fsub float %45, %430 %434 = fsub float %46, %431 %435 = fsub float %47, %432 %436 = fmul float %419, %433 %437 = fsub float -0.000000e+00, %436 %438 = fmul float %420, %434 %439 = fsub float %437, %438 %440 = fmul float %421, %435 %441 = fsub float %439, %440 %442 = call float @llvm.AMDIL.clamp.(float %441, float 0.000000e+00, float 1.000000e+00) %443 = call float @llvm.pow.f32(float %442, float 1.600000e+01) %444 = call float @llvm.AMDIL.clamp.(float %443, float 0.000000e+00, float 1.000000e+00) %445 = fmul float %392, %22 %446 = fmul float %398, %26 %447 = fadd float %445, %446 %448 = fmul float %404, %30 %449 = fadd float %447, %448 %450 = fadd float %449, %34 %451 = fmul float %71, %13 %452 = fadd float %451, %15 %453 = fmul float %72, %14 %454 = fadd float %453, %16 %455 = bitcast float %91 to i32 %456 = bitcast float %92 to i32 %457 = insertelement <4 x i32> , i32 %455, i32 1 %458 = insertelement <4 x i32> %457, i32 %456, i32 2 %459 = insertelement <4 x i32> %458, i32 0, i32 3 %460 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %459, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %461 = extractelement <4 x float> %460, i32 0 %462 = extractelement <4 x float> %460, i32 1 %463 = extractelement <4 x float> %460, i32 2 %464 = extractelement <4 x float> %460, i32 3 %465 = fmul float %392, %19 %466 = fmul float %398, %23 %467 = fadd float %465, %466 %468 = fmul float %404, %27 %469 = fadd float %467, %468 %470 = fadd float %469, %31 %471 = fmul float %392, %20 %472 = fmul float %398, %24 %473 = fadd float %471, %472 %474 = fmul float %404, %28 %475 = fadd float %473, %474 %476 = fadd float %475, %32 %477 = fsub float -0.000000e+00, %476 %478 = fmul float %392, %21 %479 = fmul float %398, %25 %480 = fadd float %478, %479 %481 = fmul float %404, %29 %482 = fadd float %480, %481 %483 = fadd float %482, %33 %484 = fmul float %483, 2.000000e+00 %485 = fsub float %484, %450 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %452, float %454, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %392, float %398, float %404, float %412) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %372, float %373, float %374, float %444) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %461, float %462, float %463, float %464) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %470, float %477, float %485, float %450) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[44:47], s[2:3], 0x4 ; C0960304 s_load_dwordx4 s[48:51], s[2:3], 0x10 ; C0980310 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[14:17], v0, s[24:27], 0 idxen ; E00C2000 80060E00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[17:20], v0, s[28:31], 0 idxen ; E00C2000 80071100 buffer_load_format_xyzw v[24:27], v3, s[8:11], 0 idxen ; E00C2000 80021803 s_buffer_load_dword s29, s[44:47], 0x6 ; C20EAD06 s_buffer_load_dword s30, s[44:47], 0x7 ; C20F2D07 s_buffer_load_dword s17, s[44:47], 0x8 ; C208AD08 s_buffer_load_dword s18, s[44:47], 0x9 ; C2092D09 s_buffer_load_dword s9, s[48:51], 0x51 ; C204B151 s_buffer_load_dword s10, s[48:51], 0x52 ; C2053152 s_buffer_load_dword s31, s[48:51], 0x66 ; C20FB166 s_buffer_load_dword s44, s[48:51], 0x67 ; C2163167 s_buffer_load_dword s4, s[48:51], 0xf ; C202310F s_buffer_load_dword s28, s[48:51], 0x4c ; C20E314C s_buffer_load_dword s26, s[48:51], 0x4d ; C20D314D s_buffer_load_dword s27, s[48:51], 0x4e ; C20DB14E s_buffer_load_dword s22, s[48:51], 0x50 ; C20B3150 s_buffer_load_dword s25, s[0:3], 0x0 ; C20C8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s23, s[0:3], 0x2 ; C20B8102 s_buffer_load_dword s7, s[48:51], 0x5 ; C203B105 s_buffer_load_dword s8, s[48:51], 0x6 ; C2043106 s_buffer_load_dword s14, s[48:51], 0x7 ; C2073107 s_buffer_load_dword s6, s[48:51], 0x8 ; C2033108 s_buffer_load_dword s5, s[48:51], 0x9 ; C202B109 s_buffer_load_dword s11, s[48:51], 0x0 ; C205B100 s_buffer_load_dword s12, s[48:51], 0x1 ; C2063101 s_buffer_load_dword s13, s[48:51], 0x2 ; C206B102 s_buffer_load_dword s15, s[48:51], 0x3 ; C207B103 s_buffer_load_dword s20, s[48:51], 0x4 ; C20A3104 s_buffer_load_dword s16, s[48:51], 0xa ; C208310A s_buffer_load_dword s21, s[48:51], 0xb ; C20AB10B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s17 ; 7E000211 s_buffer_load_dword s17, s[48:51], 0xc ; C208B10C v_mov_b32_e32 v3, s18 ; 7E060212 s_buffer_load_dword s18, s[48:51], 0xd ; C209310D s_buffer_load_dword s19, s[48:51], 0xe ; C209B10E v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s29, v12 ; 3E00181D v_mac_f32_e32 v3, s30, v13 ; 3E061A1E v_cmp_gt_f32_e64 s[2:3], 0, v10 ; D0080002 00021480 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e64 s[0:1], 0, v1 ; D0080000 00020280 v_lshlrev_b32_e32 v2, 2, v14 ; 34041C82 v_add_i32_e32 v2, 8, v2 ; 4A040488 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_lshlrev_b32_e32 v4, 2, v15 ; 34081E82 v_lshlrev_b32_e32 v11, 2, v16 ; 34162082 v_add_i32_e32 v4, 8, v4 ; 4A080888 v_cvt_f32_i32_e32 v4, v4 ; 7E080B04 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s31, v2, v24 ; D2820002 0462041F v_floor_f32_e32 v12, v2 ; 7E184902 v_subrev_f32_e32 v13, v12, v2 ; 0A1A050C v_mad_f32 v14, s44, v12, v25 ; D282000E 0466182C v_add_i32_e32 v2, 8, v11 ; 4A041688 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_add_f32_e32 v4, 0.5, v4 ; 060808F0 v_mad_f32 v4, s31, v4, v24 ; D2820004 0462081F v_floor_f32_e32 v11, v4 ; 7E164904 v_subrev_f32_e32 v20, v11, v4 ; 0A28090B v_mad_f32 v21, s44, v11, v25 ; D2820015 0466162C v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s31, v2, v24 ; D2820002 0462041F v_floor_f32_e32 v4, v2 ; 7E084902 v_subrev_f32_e32 v31, v4, v2 ; 0A3E0504 v_mad_f32 v32, s44, v4, v25 ; D2820020 0466082C v_mov_b32_e32 v33, 0 ; 7E420280 v_mov_b32_e32 v22, v33 ; 7E2C0321 v_mov_b32_e32 v15, v33 ; 7E1E0321 image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[36:43], s[32:35] ; F0900F00 01091A1F image_sample_l v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01092214 image_sample_l v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[36:43], s[32:35] ; F0900F00 0109260D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v2, v38, v17 ; 10042326 v_mul_f32_e32 v4, v39, v17 ; 10082327 v_mul_f32_e32 v11, v40, v17 ; 10162328 v_mul_f32_e32 v12, v41, v17 ; 10182329 v_mov_b32_e32 v30, 0x10001 ; 7E3C02FF 00010001 v_mac_f32_e32 v2, v34, v18 ; 3E042522 v_mac_f32_e32 v4, v35, v18 ; 3E082523 v_mac_f32_e32 v11, v36, v18 ; 3E162524 v_mov_b32_e32 v38, v30 ; 7E4C031E v_mov_b32_e32 v39, v31 ; 7E4E031F v_mov_b32_e32 v40, v32 ; 7E500320 v_mov_b32_e32 v41, v33 ; 7E520321 v_mov_b32_e32 v42, v30 ; 7E54031E v_mov_b32_e32 v43, v31 ; 7E56031F v_mov_b32_e32 v44, v32 ; 7E580320 v_mov_b32_e32 v45, v33 ; 7E5A0321 v_mac_f32_e32 v12, v37, v18 ; 3E182525 v_mov_b32_e32 v39, v13 ; 7E4E030D v_mov_b32_e32 v43, v20 ; 7E560314 v_mac_f32_e32 v2, v26, v19 ; 3E04271A v_mac_f32_e32 v4, v27, v19 ; 3E08271B v_mac_f32_e32 v11, v28, v19 ; 3E16271C v_mov_b32_e32 v40, v14 ; 7E50030E v_mac_f32_e32 v12, v29, v19 ; 3E18271D v_mov_b32_e32 v44, v21 ; 7E580315 v_mov_b32_e32 v41, v33 ; 7E520321 image_sample_l_o v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0D00F00 01091A1E image_sample_l_o v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[38:41], s[36:43], s[32:35] ; F0D00F00 01092226 v_mov_b32_e32 v45, v33 ; 7E5A0321 image_sample_l_o v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[36:43], s[32:35] ; F0D00F00 0109262A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, v38, v18 ; 101E2526 v_mul_f32_e32 v16, v39, v18 ; 10202527 v_mul_f32_e32 v22, v40, v18 ; 102C2528 v_mul_f32_e32 v38, v41, v18 ; 104C2529 v_mov_b32_e32 v30, 0x20002 ; 7E3C02FF 00020002 image_sample_l_o v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0D00F00 0109271E v_mov_b32_e32 v31, v20 ; 7E3E0314 v_mac_f32_e32 v15, v34, v17 ; 3E1E2322 v_mac_f32_e32 v16, v35, v17 ; 3E202323 v_mac_f32_e32 v22, v36, v17 ; 3E2C2324 v_mac_f32_e32 v38, v37, v17 ; 3E4C2325 v_mov_b32_e32 v32, v21 ; 7E400315 v_mac_f32_e32 v15, v26, v19 ; 3E1E271A v_mac_f32_e32 v16, v27, v19 ; 3E20271B v_mac_f32_e32 v22, v28, v19 ; 3E2C271C image_sample_l_o v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0D00F00 0109221E v_mov_b32_e32 v31, v13 ; 7E3E030D v_mac_f32_e32 v38, v29, v19 ; 3E4C271D v_mov_b32_e32 v32, v14 ; 7E40030E image_sample_l_o v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0D00F00 01091A1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v26, v17 ; 101A231A v_mul_f32_e32 v14, v27, v17 ; 101C231B v_mul_f32_e32 v20, v28, v17 ; 1028231C v_mul_f32_e32 v17, v29, v17 ; 1022231D v_mac_f32_e32 v13, v34, v18 ; 3E1A2522 v_mac_f32_e32 v14, v35, v18 ; 3E1C2523 v_mac_f32_e32 v20, v36, v18 ; 3E282524 v_mac_f32_e32 v17, v37, v18 ; 3E222525 v_mac_f32_e32 v13, v39, v19 ; 3E1A2727 v_mac_f32_e32 v14, v40, v19 ; 3E1C2728 v_mac_f32_e32 v20, v41, v19 ; 3E282729 v_mac_f32_e32 v17, v42, v19 ; 3E22272A v_mov_b32_e32 v26, v33 ; 7E340321 image_sample_l v18, 4, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[32:35] ; F0900400 01091218 v_mov_b32_e32 v23, 0x30003 ; 7E2E02FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v18, v7 ; 100E0F12 v_mul_f32_e32 v8, v18, v8 ; 10101112 v_mul_f32_e32 v9, v18, v9 ; 10121312 v_mov_b32_e32 v26, v33 ; 7E340321 image_sample_l_o v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0D00F00 01091717 v_cndmask_b32_e64 v18, 0, 1.0, s[2:3] ; D2000012 0009E480 v_sub_f32_e64 v10, |v10|, v18 ; D208010A 0002250A v_add_f32_e32 v5, v5, v10 ; 060A1505 v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80 v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480 v_sub_f32_e64 v19, v10, |v5| ; D2080213 00020B0A v_mad_f32 v19, v19, v6, 1.0 ; D2820013 03CA0D13 v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480 v_sub_f32_e64 v27, v21, |v1| ; D208021B 00020315 v_mac_f32_e32 v19, v6, v27 ; 3E263706 v_sub_f32_e64 v5, |v5|, v10 ; D2080105 00021505 v_sub_f32_e64 v1, |v1|, v21 ; D2080101 00022B01 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v5, v5 ; 100C0B05 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mac_f32_e32 v6, v19, v19 ; 3E0C2713 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 exp 15, 32, 0, 0, 0, v33, v33, v33, v33 ; F800020F 21212121 exp 15, 33, 0, 0, 0, v0, v3, v33, v33 ; F800021F 21210300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mad_f32 v0, -2.0, v10, 1.0 ; D2820000 03CA14F5 v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mad_f32 v3, -2.0, v21, 1.0 ; D2820003 03CA2AF5 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v3, v4, v1 ; 10060304 v_mac_f32_e32 v3, v2, v0 ; 3E060102 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mac_f32_e32 v4, v2, v7 ; 3E080F02 v_mul_f32_e32 v2, v16, v1 ; 10040310 v_mac_f32_e32 v2, v15, v0 ; 3E04010F v_mul_f32_e32 v5, v16, v8 ; 100A1110 v_mac_f32_e32 v5, v15, v7 ; 3E0A0F0F v_mul_f32_e32 v1, v14, v1 ; 1002030E v_mul_f32_e32 v8, v14, v8 ; 1010110E v_mac_f32_e32 v1, v13, v0 ; 3E02010D v_mac_f32_e32 v8, v13, v7 ; 3E100F0D v_mul_f32_e32 v0, v6, v19 ; 10002706 v_mad_f32 v6, -2.0, v18, 1.0 ; D2820006 03CA24F5 v_mul_f32_e32 v0, v0, v6 ; 10000D00 v_mac_f32_e32 v3, v11, v0 ; 3E06010B v_mac_f32_e32 v4, v11, v9 ; 3E08130B v_mac_f32_e32 v2, v22, v0 ; 3E040116 v_mac_f32_e32 v5, v22, v9 ; 3E0A1316 v_mac_f32_e32 v1, v20, v0 ; 3E020114 v_mac_f32_e32 v8, v20, v9 ; 3E101314 v_mul_f32_e32 v0, v3, v3 ; 10000703 v_mac_f32_e32 v0, v2, v2 ; 3E000502 v_mac_f32_e32 v0, v1, v1 ; 3E000301 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_add_f32_e32 v4, v12, v4 ; 0608090C v_add_f32_e32 v5, v38, v5 ; 060A0B26 v_add_f32_e32 v6, v17, v8 ; 060C1111 v_mul_f32_e32 v3, v0, v3 ; 10060700 v_mul_f32_e32 v2, v0, v2 ; 10040500 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_subrev_f32_e32 v1, s28, v4 ; 0A02081C v_subrev_f32_e32 v7, s26, v5 ; 0A0E0A1A v_subrev_f32_e32 v8, s27, v6 ; 0A100C1B v_mul_f32_e32 v9, v1, v1 ; 10120301 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, s25, v3 ; 10140619 v_mac_f32_e32 v10, s24, v2 ; 3E140418 v_mac_f32_e32 v10, s23, v0 ; 3E140017 v_mul_f32_e32 v11, v3, v10 ; 10161503 v_mad_f32 v11, 2.0, v11, -s25 ; D282000B 806616F4 v_mul_f32_e32 v12, v9, v1 ; 10180309 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v2, v10 ; 10181502 v_mad_f32 v12, 2.0, v12, -s24 ; D282000C 806218F4 v_mul_f32_e32 v13, v9, v7 ; 101A0F09 v_mac_f32_e32 v11, v12, v13 ; 3E161B0C v_mul_f32_e32 v10, v0, v10 ; 10141500 v_mad_f32 v10, 2.0, v10, -s23 ; D282000A 805E14F4 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mac_f32_e32 v11, v10, v9 ; 3E16130A v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e64 v9, 0, v11 clamp ; D2060809 00021680 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v1, s9, v7 ; 3E020E09 v_mac_f32_e32 v1, s10, v8 ; 3E02100A exp 15, 34, 0, 0, 0, v4, v5, v6, v1 ; F800022F 01060504 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v1, 0x41800000, v9 ; 0E0212FF 41800000 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 exp 15, 35, 0, 0, 0, v3, v2, v0, v1 ; F800023F 01000203 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s14, v5 ; 10000A0E v_mul_f32_e32 v1, s20, v5 ; 10020A14 v_mul_f32_e32 v2, s7, v5 ; 10040A07 v_mul_f32_e32 v3, s8, v5 ; 10060A08 v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v1, s11, v4 ; 3E02080B v_mac_f32_e32 v2, s12, v4 ; 3E04080C v_mac_f32_e32 v3, s13, v4 ; 3E06080D v_mac_f32_e32 v0, s21, v6 ; 3E000C15 v_mac_f32_e32 v1, s6, v6 ; 3E020C06 v_mac_f32_e32 v2, s5, v6 ; 3E040C05 v_mac_f32_e32 v3, s16, v6 ; 3E060C10 v_add_f32_e32 v0, s4, v0 ; 06000004 v_add_f32_e32 v1, s17, v1 ; 06020211 v_add_f32_e32 v2, s18, v2 ; 06040412 v_add_f32_e32 v3, s19, v3 ; 06060613 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v23, v24, v25, v26 ; F800024F 1A191817 exp 15, 37, 0, 0, 0, v33, v33, v33, v33 ; F800025F 21212121 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v33, v33, v33, v33 ; F80008DF 21212121 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 48 Code Size: 1424 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} IMM[2] UINT32 {4, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 6: MUL TEMP[3].x, CONST[1][3].xxxx, TEMP[2].xxxx 7: MUL TEMP[4].x, CONST[1][2].wwww, TEMP[2].zzzz 8: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz 9: RSQ TEMP[5].x, TEMP[5].xxxx 10: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx 11: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].xxxx 12: MOV TEMP[6].w, IMM[1].yyyy 13: MOV TEMP[6].x, TEMP[5].xxxx 14: MOV TEMP[6].y, TEMP[5].yyyy 15: MOV TEMP[6].z, TEMP[5].zzzz 16: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 17: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 18: KILL_IF -TEMP[0].xxxx 19: MOV TEMP[0].w, IMM[1].yyyy 20: MOV TEMP[0].x, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MOV TEMP[0].z, TEMP[1].zzzz 23: MOV TEMP[1].w, IMM[1].yyyy 24: MOV TEMP[1].x, TEMP[3].xxxx 25: MOV TEMP[1].y, TEMP[2].yyyy 26: MOV TEMP[1].z, TEMP[4].xxxx 27: MOV OUT[2], IN[1].wwww 28: MOV OUT[0], TEMP[0] 29: MOV OUT[3], TEMP[6] 30: MOV OUT[1], TEMP[1] 31: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %46 %62 = fmul float %59, %47 %63 = fmul float %60, %48 %64 = bitcast float %40 to i32 %65 = bitcast float %41 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %36, <16 x i8> %39, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = fmul float %29, %69 %73 = fmul float %28, %71 %74 = fmul float %43, %43 %75 = fmul float %44, %44 %76 = fadd float %75, %74 %77 = fmul float %45, %45 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %43, %79 %81 = fmul float %44, %79 %82 = fmul float %45, %79 %83 = fmul float %80, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %81, 5.000000e-01 %86 = fadd float %85, 5.000000e-01 %87 = fmul float %82, 5.000000e-01 %88 = fadd float %87, 5.000000e-01 %89 = fcmp olt float %57, %24 %90 = select i1 %89, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %90) %91 = call i32 @llvm.SI.packf16(float %61, float %62) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %63, float 0.000000e+00) %94 = bitcast i32 %93 to float %95 = call i32 @llvm.SI.packf16(float %72, float %70) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %73, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %92, float %94, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %96, float %98, float %96, float %98) %99 = call i32 @llvm.SI.packf16(float %84, float %86) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %88, float 0.000000e+00) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %42, float %42, float %42, float %42) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %100, float %102, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_cmp_gt_f32_e32 vcc, s7, v13 ; 7C081A07 v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_mac_f32_e32 v13, v7, v7 ; 3E1A0F07 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v7, v13, v7 ; 100E0F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 v_cndmask_b32_e64 v10, 0, -1.0, vcc ; D200000A 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v10 ; 7C261480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v8, v9 ; 5E041308 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v2, v0, v2, v0 ; F800040F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v3, 0 ; D25E0000 00010103 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v7, 0 ; D25E0001 00010107 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[2].xyzz, IN[2].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[2].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1].xyz, TEMP[2], IN[3] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[4].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[4].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].yw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[5].x, TEMP[4].xxxx 16: MOV TEMP[5].y, -TEMP[4].yyyy 17: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 18: MOV TEMP[6].x, TEMP[5].xxxx 19: MOV TEMP[6].y, TEMP[5].yyyy 20: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 21: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx 22: MOV_SAT TEMP[4].x, TEMP[4].xxxx 23: SQRT TEMP[4].x, TEMP[4].xxxx 24: MOV TEMP[6].z, TEMP[4].xxxx 25: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 26: RSQ TEMP[4].x, TEMP[4].xxxx 27: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 28: DP3 TEMP[5].x, IN[4].xyzz, IN[4].xyzz 29: RSQ TEMP[5].x, TEMP[5].xxxx 30: MUL TEMP[5].xyz, IN[4].xyzz, TEMP[5].xxxx 31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[6].x, TEMP[6].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 34: MUL TEMP[3].xyz, IN[4].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MOV TEMP[3].w, IMM[2].xxxx 43: MOV TEMP[3].x, TEMP[0].xxxx 44: MOV TEMP[3].y, TEMP[0].yyyy 45: MOV TEMP[3].z, TEMP[0].zzzz 46: MOV TEMP[0].w, IMM[2].xxxx 47: MOV TEMP[0].x, TEMP[1].xxxx 48: MOV TEMP[0].y, TEMP[1].yyyy 49: MOV TEMP[0].z, TEMP[1].zzzz 50: MOV TEMP[1].w, IMM[2].xxxx 51: MUL TEMP[1].x, CONST[1][3].xxxx, TEMP[2].xxxx 52: MOV TEMP[1].y, TEMP[2].yyyy 53: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 54: MOV TEMP[1].z, TEMP[2].xxxx 55: MOV OUT[2], IN[1].wwww 56: MOV OUT[0], TEMP[0] 57: MOV OUT[3], TEMP[3] 58: MOV OUT[1], TEMP[1] 59: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)* %42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)* %45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %59 = fmul float %49, %49 %60 = fmul float %50, %50 %61 = fadd float %60, %59 %62 = fmul float %51, %51 %63 = fadd float %61, %62 %64 = call float @llvm.AMDGPU.rsq.clamped.f32(float %63) %65 = fmul float %49, %64 %66 = fmul float %50, %64 %67 = fmul float %51, %64 %68 = bitcast float %46 to i32 %69 = bitcast float %47 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %31, <16 x i8> %33, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = extractelement <4 x float> %72, i32 2 %76 = fmul float %25, %73 %77 = fmul float %26, %74 %78 = fmul float %27, %75 %79 = fmul float %76, %52 %80 = fmul float %77, %53 %81 = fmul float %78, %54 %82 = bitcast float %46 to i32 %83 = bitcast float %47 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %36, <16 x i8> %39, i32 2) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = fmul float %67, %56 %91 = fmul float %65, %57 %92 = fmul float %66, %55 %93 = fmul float %66, %57 %94 = fsub float %93, %90 %95 = fmul float %67, %55 %96 = fsub float %95, %91 %97 = fmul float %65, %56 %98 = fsub float %97, %92 %99 = bitcast float %46 to i32 %100 = bitcast float %47 to i32 %101 = insertelement <2 x i32> undef, i32 %99, i32 0 %102 = insertelement <2 x i32> %101, i32 %100, i32 1 %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %42, <16 x i8> %45, i32 2) %104 = extractelement <4 x float> %103, i32 1 %105 = extractelement <4 x float> %103, i32 3 %106 = fmul float %105, 2.000000e+00 %107 = fadd float %106, -1.000000e+00 %108 = fmul float %104, 2.000000e+00 %109 = fadd float %108, -1.000000e+00 %110 = fmul float %107, %24 %111 = fmul float %109, %24 %112 = fmul float %107, %107 %113 = fmul float %109, %109 %114 = fadd float %112, %113 %115 = fsub float 1.000000e+00, %114 %116 = call float @llvm.AMDIL.clamp.(float %115, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.sqrt.f32(float %116) %118 = fmul float %110, %110 %119 = fmul float %111, %111 %120 = fadd float %119, %118 %121 = fmul float %117, %117 %122 = fadd float %120, %121 %123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122) %124 = fmul float %110, %123 %125 = fmul float %111, %123 %126 = fsub float -0.000000e+00, %125 %127 = fmul float %117, %123 %128 = fmul float %55, %55 %129 = fmul float %56, %56 %130 = fadd float %129, %128 %131 = fmul float %57, %57 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %55, %133 %135 = fmul float %56, %133 %136 = fmul float %57, %133 %137 = fmul float %94, %94 %138 = fmul float %96, %96 %139 = fadd float %138, %137 %140 = fmul float %98, %98 %141 = fadd float %139, %140 %142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141) %143 = fmul float %94, %142 %144 = fmul float %96, %142 %145 = fmul float %98, %142 %146 = fmul float %58, %143 %147 = fmul float %58, %144 %148 = fmul float %58, %145 %149 = fmul float %146, %126 %150 = fmul float %147, %126 %151 = fmul float %148, %126 %152 = fmul float %124, %134 %153 = fadd float %152, %149 %154 = fmul float %124, %135 %155 = fadd float %154, %150 %156 = fmul float %124, %136 %157 = fadd float %156, %151 %158 = fmul float %65, %127 %159 = fadd float %158, %153 %160 = fmul float %66, %127 %161 = fadd float %160, %155 %162 = fmul float %67, %127 %163 = fadd float %162, %157 %164 = fmul float %159, %159 %165 = fmul float %161, %161 %166 = fadd float %165, %164 %167 = fmul float %163, %163 %168 = fadd float %166, %167 %169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168) %170 = fmul float %159, %169 %171 = fmul float %161, %169 %172 = fmul float %163, %169 %173 = fmul float %170, 5.000000e-01 %174 = fadd float %173, 5.000000e-01 %175 = fmul float %171, 5.000000e-01 %176 = fadd float %175, 5.000000e-01 %177 = fmul float %172, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %29, %87 %180 = fmul float %28, %89 %181 = call i32 @llvm.SI.packf16(float %79, float %80) %182 = bitcast i32 %181 to float %183 = call i32 @llvm.SI.packf16(float %81, float 0.000000e+00) %184 = bitcast i32 %183 to float %185 = call i32 @llvm.SI.packf16(float %179, float %88) %186 = bitcast i32 %185 to float %187 = call i32 @llvm.SI.packf16(float %180, float 0.000000e+00) %188 = bitcast i32 %187 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %182, float %184, float %182, float %184) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %186, float %188, float %186, float %188) %189 = call i32 @llvm.SI.packf16(float %174, float %176) %190 = bitcast i32 %189 to float %191 = call i32 @llvm.SI.packf16(float %178, float 0.000000e+00) %192 = bitcast i32 %191 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %48, float %48, float %48, float %48) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %190, float %192, float %190, float %192) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v10, v0, 2, 3, [m0] ; C8280E00 v_interp_p2_f32 v10, [v10], v1, 2, 3, [m0] ; C8290E01 v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000 v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200 v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201 v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800700 00450E02 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[12:15] ; F0800700 00671102 image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[16:19] ; F0800A00 00890102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v3, s4, v14 ; 10061C04 v_mul_f32_e32 v14, s5, v15 ; 101C1E05 v_mul_f32_e32 v15, s6, v16 ; 101E2006 v_mul_f32_e32 v16, v5, v5 ; 10200B05 v_mac_f32_e32 v16, v6, v6 ; 3E200D06 v_mac_f32_e32 v16, v7, v7 ; 3E200F07 v_rsq_clamp_f32_e32 v16, v16 ; 7E205910 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, s7, v17 ; 10222207 v_mul_f32_e32 v19, s8, v19 ; 10262608 v_cvt_pkrtz_f16_f32_e32 v17, v17, v18 ; 5E222511 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mul_f32_e32 v6, v16, v6 ; 100C0D10 v_mul_f32_e32 v7, v16, v7 ; 100E0F10 v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v16, v12, v7 ; 10200F0C v_mad_f32 v16, v6, v13, -v16 ; D2820010 84421B06 v_mul_f32_e32 v18, v13, v5 ; 10240B0D v_mad_f32 v18, v7, v11, -v18 ; D2820012 844A1707 v_mul_f32_e32 v20, v11, v6 ; 10280D0B v_mad_f32 v20, v5, v12, -v20 ; D2820014 84521905 v_mul_f32_e32 v21, v11, v11 ; 102A170B v_mac_f32_e32 v21, v12, v12 ; 3E2A190C v_mac_f32_e32 v21, v13, v13 ; 3E2A1B0D v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v22, v16, v16 ; 102C2110 v_mac_f32_e32 v22, v18, v18 ; 3E2C2512 v_mac_f32_e32 v22, v20, v20 ; 3E2C2914 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mul_f32_e32 v12, v21, v12 ; 10181915 v_mul_f32_e32 v13, v21, v13 ; 101A1B15 v_mul_f32_e32 v16, v22, v16 ; 10202116 v_mul_f32_e32 v18, v22, v18 ; 10242516 v_mul_f32_e32 v20, v22, v20 ; 10282916 v_mad_f32 v21, -v1, v1, 1.0 ; D2820015 23CA0301 v_mad_f32 v21, -v2, v2, v21 ; D2820015 24560502 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mul_f32_e32 v22, v2, v2 ; 102C0502 v_mac_f32_e32 v22, v1, v1 ; 3E2C0301 v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15 v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916 v_mul_f32_e32 v16, v16, v0 ; 10200110 v_mul_f32_e32 v18, v18, v0 ; 10240112 v_mul_f32_e32 v0, v20, v0 ; 10000114 v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v16, v1, v16 ; 10202101 v_mul_f32_e32 v18, v1, v18 ; 10242501 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v1, v22, v2 ; 10020516 v_mad_f32 v2, v1, v11, -v16 ; D2820002 84421701 v_mad_f32 v11, v1, v12, -v18 ; D282000B 844A1901 v_mad_f32 v0, v1, v13, -v0 ; D2820000 84021B01 v_mul_f32_e32 v1, v22, v21 ; 10022B16 v_mac_f32_e32 v2, v1, v5 ; 3E040B01 v_mac_f32_e32 v11, v1, v6 ; 3E160D01 v_mac_f32_e32 v0, v1, v7 ; 3E000F01 v_mul_f32_e32 v1, v8, v3 ; 10020708 v_mul_f32_e32 v3, v9, v14 ; 10061D09 v_mul_f32_e32 v5, v10, v15 ; 100A1F0A v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mul_f32_e32 v3, v2, v2 ; 10060502 v_mac_f32_e32 v3, v11, v11 ; 3E06170B v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_cvt_pkrtz_f16_f32_e64 v5, v5, 0 ; D25E0005 00010105 exp 15, 0, 1, 0, 0, v1, v5, v1, v5 ; F800040F 05010501 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, v19, 0 ; D25E0001 00010113 exp 15, 1, 1, 0, 0, v17, v1, v17, v1 ; F800041F 01110111 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v3, v2 ; 10020503 v_mul_f32_e32 v2, v3, v11 ; 10041703 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 624 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} IMM[2] UINT32 {4, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 6: MUL TEMP[3].x, CONST[1][3].xxxx, TEMP[2].xxxx 7: MUL TEMP[4].x, CONST[1][2].wwww, TEMP[2].zzzz 8: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz 9: RSQ TEMP[5].x, TEMP[5].xxxx 10: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx 11: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].xxxx 12: MOV TEMP[6].w, IMM[1].yyyy 13: MOV TEMP[6].x, TEMP[5].xxxx 14: MOV TEMP[6].y, TEMP[5].yyyy 15: MOV TEMP[6].z, TEMP[5].zzzz 16: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 17: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 18: KILL_IF -TEMP[0].xxxx 19: MOV TEMP[0].w, IMM[1].yyyy 20: MOV TEMP[0].x, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MOV TEMP[0].z, TEMP[1].zzzz 23: MOV TEMP[1].w, IMM[1].yyyy 24: MOV TEMP[1].x, TEMP[3].xxxx 25: MOV TEMP[1].y, TEMP[2].yyyy 26: MOV TEMP[1].z, TEMP[4].xxxx 27: MOV OUT[2], IN[1].wwww 28: MOV OUT[0], TEMP[0] 29: MOV OUT[3], TEMP[6] 30: MOV OUT[1], TEMP[1] 31: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %46 %62 = fmul float %59, %47 %63 = fmul float %60, %48 %64 = bitcast float %40 to i32 %65 = bitcast float %41 to i32 %66 = insertelement <2 x i32> undef, i32 %64, i32 0 %67 = insertelement <2 x i32> %66, i32 %65, i32 1 %68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %67, <32 x i8> %36, <16 x i8> %39, i32 2) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = fmul float %29, %69 %73 = fmul float %28, %71 %74 = fmul float %43, %43 %75 = fmul float %44, %44 %76 = fadd float %75, %74 %77 = fmul float %45, %45 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %43, %79 %81 = fmul float %44, %79 %82 = fmul float %45, %79 %83 = fmul float %80, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %81, 5.000000e-01 %86 = fadd float %85, 5.000000e-01 %87 = fmul float %82, 5.000000e-01 %88 = fadd float %87, 5.000000e-01 %89 = fcmp olt float %57, %24 %90 = select i1 %89, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %90) %91 = call i32 @llvm.SI.packf16(float %61, float %62) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %63, float 0.000000e+00) %94 = bitcast i32 %93 to float %95 = call i32 @llvm.SI.packf16(float %72, float %70) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %73, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %92, float %94, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %96, float %98, float %96, float %98) %99 = call i32 @llvm.SI.packf16(float %84, float %86) %100 = bitcast i32 %99 to float %101 = call i32 @llvm.SI.packf16(float %88, float 0.000000e+00) %102 = bitcast i32 %101 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %42, float %42, float %42, float %42) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %100, float %102, float %100, float %102) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_cmp_gt_f32_e32 vcc, s7, v13 ; 7C081A07 v_mul_f32_e32 v13, v5, v5 ; 101A0B05 v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06 v_mac_f32_e32 v13, v7, v7 ; 3E1A0F07 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v8, v8, v10 ; 10101508 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mul_f32_e32 v6, v13, v6 ; 100C0D0D v_mul_f32_e32 v7, v13, v7 ; 100E0F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v3, s8, v3 ; 10060608 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 v_cndmask_b32_e64 v10, 0, -1.0, vcc ; D200000A 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v10 ; 7C261480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v8, v9 ; 5E041308 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v2, v0, v2, v0 ; F800040F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v3, 0 ; D25E0000 00010103 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v7, 0 ; D25E0001 00010107 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..10], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[4].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].xy, TEMP[3].xyyy 15: MOV TEMP[0].w, IMM[0].xxxx 16: TXL TEMP[0], TEMP[0], SAMP[0], 2D 17: MOV TEMP[2].xy, TEMP[3].xyyy 18: MOV TEMP[2].w, IMM[0].xxxx 19: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 20: MOV TEMP[3].xy, TEMP[3].xyyy 21: MOV TEMP[3].w, IMM[0].xxxx 22: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 23: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 24: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 25: AND TEMP[5], TEMP[5], IMM[3].xxxx 26: ABS TEMP[4], TEMP[4] 27: ADD TEMP[4], TEMP[4], -TEMP[5] 28: ADD TEMP[4], TEMP[4], IMM[3].yyyy 29: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 30: AND TEMP[6], TEMP[6], IMM[3].xxxx 31: ABS TEMP[4], TEMP[4] 32: ADD TEMP[4], TEMP[4], -TEMP[6] 33: MUL TEMP[4], TEMP[4], IMM[3].zzzz 34: MUL TEMP[6], TEMP[6], IMM[3].wwww 35: ADD TEMP[6], IMM[3].xxxx, -TEMP[6] 36: MUL TEMP[5], IMM[3].wwww, TEMP[5] 37: ADD TEMP[5].xzw, IMM[3].xxxx, -TEMP[5] 38: MOV TEMP[7].x, TEMP[4].xxxx 39: MOV TEMP[7].y, TEMP[4].yyyy 40: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 41: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 42: MOV TEMP[7].z, TEMP[8].xxxx 43: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 44: RSQ TEMP[8].x, TEMP[8].xxxx 45: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 46: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 47: MOV TEMP[9].x, TEMP[4].zzzz 48: MOV TEMP[9].y, TEMP[4].wwww 49: ADD TEMP[10].x, IMM[3].xxxx, -TEMP[4].zzzz 50: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 51: MOV TEMP[9].z, TEMP[4].xxxx 52: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 53: RSQ TEMP[4].x, TEMP[4].xxxx 54: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 55: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 56: MOV TEMP[9].w, IMM[0].xxxx 57: MOV TEMP[9].x, TEMP[8].xxxx 58: MOV TEMP[9].y, TEMP[8].yyyy 59: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 60: MOV TEMP[9].z, TEMP[7].xxxx 61: DP4 TEMP[7].x, TEMP[9], TEMP[0] 62: DP4 TEMP[8].x, TEMP[9], TEMP[2] 63: MOV TEMP[7].y, TEMP[8].xxxx 64: DP4 TEMP[8].x, TEMP[9], TEMP[3] 65: MOV TEMP[7].z, TEMP[8].xxxx 66: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 67: RSQ TEMP[8].x, TEMP[8].xxxx 68: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 69: MOV TEMP[8].w, IMM[0].xxxx 70: MOV TEMP[8].x, TEMP[6].xxxx 71: MOV TEMP[8].y, TEMP[6].yyyy 72: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 73: MOV TEMP[8].z, TEMP[4].xxxx 74: DP4 TEMP[4].x, TEMP[8], TEMP[0] 75: DP4 TEMP[6].x, TEMP[8], TEMP[2] 76: MOV TEMP[4].y, TEMP[6].xxxx 77: DP4 TEMP[6].x, TEMP[8], TEMP[3] 78: MOV TEMP[4].z, TEMP[6].xxxx 79: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 80: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 81: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 82: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 83: RSQ TEMP[6].x, TEMP[6].xxxx 84: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 85: MOV TEMP[6].x, TEMP[4].xxxx 86: MOV TEMP[6].y, TEMP[4].yyyy 87: MOV TEMP[6].z, TEMP[4].zzzz 88: MOV TEMP[6].w, TEMP[5].wwww 89: MOV TEMP[4].xy, IN[4].xyyy 90: MOV TEMP[4].w, IMM[0].xxxx 91: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 92: MUL TEMP[4].xyz, IN[0].xyzz, TEMP[4].zzzz 93: MOV TEMP[5].w, IMM[3].xxxx 94: MOV TEMP[5].x, TEMP[4].xxxx 95: MOV TEMP[5].y, TEMP[4].yyyy 96: MOV TEMP[5].z, TEMP[4].zzzz 97: DP4 TEMP[0].x, TEMP[5], TEMP[0] 98: DP4 TEMP[2].x, TEMP[5], TEMP[2] 99: DP4 TEMP[3].x, TEMP[5], TEMP[3] 100: MOV TEMP[4].x, TEMP[0].xxxx 101: MOV TEMP[4].y, TEMP[2].xxxx 102: MOV TEMP[4].z, TEMP[3].xxxx 103: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 104: MOV TEMP[5].x, TEMP[0].xxxx 105: MOV TEMP[5].y, TEMP[2].xxxx 106: MOV TEMP[5].z, TEMP[3].xxxx 107: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 108: MOV TEMP[5].w, TEMP[8].xxxx 109: MOV TEMP[8].x, TEMP[7].xxxx 110: MOV TEMP[8].y, TEMP[7].yyyy 111: MOV TEMP[8].z, TEMP[7].zzzz 112: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 113: RSQ TEMP[9].x, TEMP[9].xxxx 114: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 115: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 116: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 117: MUL TEMP[7].xyz, IMM[3].wwww, TEMP[7].xyzz 118: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 119: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 120: MOV_SAT TEMP[4].x, TEMP[4].xxxx 121: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 122: MOV_SAT TEMP[4].x, TEMP[4].xxxx 123: MOV TEMP[8].w, TEMP[4].xxxx 124: MOV TEMP[4].w, IMM[3].xxxx 125: MOV TEMP[4].x, TEMP[0].xxxx 126: MOV TEMP[4].y, TEMP[2].xxxx 127: MOV TEMP[4].z, TEMP[3].xxxx 128: MOV TEMP[0].x, CONST[4][0].wwww 129: MOV TEMP[0].y, CONST[4][1].wwww 130: MOV TEMP[0].z, CONST[4][2].wwww 131: MOV TEMP[0].w, CONST[4][3].wwww 132: DP4 TEMP[0].x, TEMP[4], TEMP[0] 133: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 134: MOV TEMP[3].xy, IN[4].xyyy 135: MOV TEMP[3].w, IMM[0].xxxx 136: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 137: MOV TEMP[7].x, CONST[4][0].xxxx 138: MOV TEMP[7].y, CONST[4][1].xxxx 139: MOV TEMP[7].z, CONST[4][2].xxxx 140: MOV TEMP[7].w, CONST[4][3].xxxx 141: DP4 TEMP[7].x, TEMP[4], TEMP[7] 142: MOV TEMP[9].x, CONST[4][0].yyyy 143: MOV TEMP[9].y, CONST[4][1].yyyy 144: MOV TEMP[9].z, CONST[4][2].yyyy 145: MOV TEMP[9].w, CONST[4][3].yyyy 146: DP4 TEMP[9].x, TEMP[4], TEMP[9] 147: MOV TEMP[7].y, -TEMP[9].xxxx 148: MOV TEMP[9].x, CONST[4][0].zzzz 149: MOV TEMP[9].y, CONST[4][1].zzzz 150: MOV TEMP[9].z, CONST[4][2].zzzz 151: MOV TEMP[9].w, CONST[4][3].zzzz 152: DP4 TEMP[4].x, TEMP[4], TEMP[9] 153: MAD TEMP[4].x, TEMP[4].xxxx, IMM[3].wwww, -TEMP[0].xxxx 154: MOV TEMP[7].z, TEMP[4].xxxx 155: MOV TEMP[7].w, TEMP[0].xxxx 156: MOV OUT[1], TEMP[1] 157: MOV OUT[2].xy, TEMP[2].xyxx 158: MOV OUT[4], TEMP[8] 159: MOV OUT[6], IMM[0].xxxx 160: MOV OUT[7], TEMP[6] 161: MOV OUT[5], TEMP[3] 162: MOV OUT[0], TEMP[7] 163: MOV OUT[3], TEMP[5] 164: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %10, %6 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = bitcast float %77 to i32 %85 = shl i32 %84, 2 %86 = add i32 %85, 8 %87 = sitofp i32 %86 to float %88 = fadd float %87, 5.000000e-01 %89 = fmul float %88, %41 %90 = fadd float %89, %82 %91 = fadd float %83, 0.000000e+00 %92 = call float @floor(float %90) %93 = fsub float %90, %92 %94 = fmul float %92, %42 %95 = fadd float %94, %91 %96 = bitcast float %93 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <4 x i32> undef, i32 %96, i32 0 %99 = insertelement <4 x i32> %98, i32 %97, i32 1 %100 = insertelement <4 x i32> %99, i32 0, i32 2 %101 = bitcast <8 x i32> %49 to <32 x i8> %102 = bitcast <4 x i32> %51 to <16 x i8> %103 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = extractelement <4 x float> %103, i32 3 %108 = bitcast float %93 to i32 %109 = bitcast float %95 to i32 %110 = insertelement <4 x i32> , i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 %109, i32 2 %112 = insertelement <4 x i32> %111, i32 0, i32 3 %113 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %112, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %93 to i32 %119 = bitcast float %95 to i32 %120 = insertelement <4 x i32> , i32 %118, i32 1 %121 = insertelement <4 x i32> %120, i32 %119, i32 2 %122 = insertelement <4 x i32> %121, i32 0, i32 3 %123 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %122, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = extractelement <4 x float> %123, i32 3 %128 = fmul float %63, 2.550000e+02 %129 = fadd float %128, -1.280000e+02 %130 = fmul float %64, 2.550000e+02 %131 = fadd float %130, -1.280000e+02 %132 = fmul float %65, 2.550000e+02 %133 = fadd float %132, -1.280000e+02 %134 = fmul float %66, 2.550000e+02 %135 = fadd float %134, -1.280000e+02 %136 = fcmp olt float %129, 0.000000e+00 %137 = fcmp olt float %131, 0.000000e+00 %138 = fcmp olt float %133, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = select i1 %136, float 1.000000e+00, float 0.000000e+00 %141 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %142 = select i1 %139, float 1.000000e+00, float 0.000000e+00 %143 = call float @fabs(float %129) %144 = call float @fabs(float %131) %145 = call float @fabs(float %133) %146 = call float @fabs(float %135) %147 = fsub float %143, %140 %148 = select i1 %137, float -1.000000e+00, float -0.000000e+00 %149 = fadd float %144, %148 %150 = fsub float %145, %141 %151 = fsub float %146, %142 %152 = fadd float %147, -6.400000e+01 %153 = fadd float %149, -6.400000e+01 %154 = fadd float %150, -6.400000e+01 %155 = fadd float %151, -6.400000e+01 %156 = fcmp olt float %152, 0.000000e+00 %157 = fcmp olt float %153, 0.000000e+00 %158 = fcmp olt float %154, 0.000000e+00 %159 = fcmp olt float %155, 0.000000e+00 %160 = select i1 %156, float 1.000000e+00, float 0.000000e+00 %161 = select i1 %157, float 1.000000e+00, float 0.000000e+00 %162 = select i1 %158, float 1.000000e+00, float 0.000000e+00 %163 = select i1 %159, float 1.000000e+00, float 0.000000e+00 %164 = call float @fabs(float %152) %165 = call float @fabs(float %153) %166 = call float @fabs(float %154) %167 = call float @fabs(float %155) %168 = fsub float %164, %160 %169 = fsub float %165, %161 %170 = fsub float %166, %162 %171 = fsub float %167, %163 %172 = fmul float %168, 0x3F90410420000000 %173 = fmul float %169, 0x3F90410420000000 %174 = fmul float %170, 0x3F90410420000000 %175 = fmul float %171, 0x3F90410420000000 %176 = fmul float %160, 2.000000e+00 %177 = fmul float %161, 2.000000e+00 %178 = fmul float %162, 2.000000e+00 %179 = fmul float %163, 2.000000e+00 %180 = fsub float 1.000000e+00, %176 %181 = fsub float 1.000000e+00, %177 %182 = fsub float 1.000000e+00, %178 %183 = fsub float 1.000000e+00, %179 %184 = fmul float %140, 2.000000e+00 %185 = fmul float %141, 2.000000e+00 %186 = fmul float %142, 2.000000e+00 %187 = fsub float 1.000000e+00, %184 %188 = fsub float 1.000000e+00, %185 %189 = fsub float 1.000000e+00, %186 %190 = fsub float 1.000000e+00, %172 %191 = fsub float %190, %173 %192 = fmul float %172, %172 %193 = fmul float %173, %173 %194 = fadd float %193, %192 %195 = fmul float %191, %191 %196 = fadd float %194, %195 %197 = call float @llvm.AMDGPU.rsq.clamped.f32(float %196) %198 = fmul float %172, %197 %199 = fmul float %173, %197 %200 = fmul float %191, %197 %201 = fmul float %198, %180 %202 = fmul float %199, %181 %203 = fsub float 1.000000e+00, %174 %204 = fsub float %203, %175 %205 = fmul float %174, %174 %206 = fmul float %175, %175 %207 = fadd float %206, %205 %208 = fmul float %204, %204 %209 = fadd float %207, %208 %210 = call float @llvm.AMDGPU.rsq.clamped.f32(float %209) %211 = fmul float %174, %210 %212 = fmul float %175, %210 %213 = fmul float %204, %210 %214 = fmul float %211, %182 %215 = fmul float %212, %183 %216 = fmul float %200, %187 %217 = fmul float %201, %104 %218 = fmul float %202, %105 %219 = fadd float %217, %218 %220 = fmul float %216, %106 %221 = fadd float %219, %220 %222 = fmul float %107, 0.000000e+00 %223 = fadd float %221, %222 %224 = fmul float %201, %114 %225 = fmul float %202, %115 %226 = fadd float %224, %225 %227 = fmul float %216, %116 %228 = fadd float %226, %227 %229 = fmul float %117, 0.000000e+00 %230 = fadd float %228, %229 %231 = fmul float %201, %124 %232 = fmul float %202, %125 %233 = fadd float %231, %232 %234 = fmul float %216, %126 %235 = fadd float %233, %234 %236 = fmul float %127, 0.000000e+00 %237 = fadd float %235, %236 %238 = fmul float %223, %223 %239 = fmul float %230, %230 %240 = fadd float %239, %238 %241 = fmul float %237, %237 %242 = fadd float %240, %241 %243 = call float @llvm.AMDGPU.rsq.clamped.f32(float %242) %244 = fmul float %223, %243 %245 = fmul float %230, %243 %246 = fmul float %237, %243 %247 = fmul float %213, %188 %248 = fmul float %214, %104 %249 = fmul float %215, %105 %250 = fadd float %248, %249 %251 = fmul float %247, %106 %252 = fadd float %250, %251 %253 = fmul float %107, 0.000000e+00 %254 = fadd float %252, %253 %255 = fmul float %214, %114 %256 = fmul float %215, %115 %257 = fadd float %255, %256 %258 = fmul float %247, %116 %259 = fadd float %257, %258 %260 = fmul float %117, 0.000000e+00 %261 = fadd float %259, %260 %262 = fmul float %214, %124 %263 = fmul float %215, %125 %264 = fadd float %262, %263 %265 = fmul float %247, %126 %266 = fadd float %264, %265 %267 = fmul float %127, 0.000000e+00 %268 = fadd float %266, %267 %269 = fmul float %254, %244 %270 = fmul float %261, %245 %271 = fadd float %270, %269 %272 = fmul float %268, %246 %273 = fadd float %271, %272 %274 = fmul float %273, %244 %275 = fmul float %273, %245 %276 = fmul float %273, %246 %277 = fsub float %254, %274 %278 = fsub float %261, %275 %279 = fsub float %268, %276 %280 = fmul float %277, %277 %281 = fmul float %278, %278 %282 = fadd float %281, %280 %283 = fmul float %279, %279 %284 = fadd float %282, %283 %285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284) %286 = fmul float %277, %285 %287 = fmul float %278, %285 %288 = fmul float %279, %285 %289 = bitcast float %82 to i32 %290 = bitcast float %83 to i32 %291 = insertelement <4 x i32> undef, i32 %289, i32 0 %292 = insertelement <4 x i32> %291, i32 %290, i32 1 %293 = insertelement <4 x i32> %292, i32 0, i32 2 %294 = bitcast <8 x i32> %49 to <32 x i8> %295 = bitcast <4 x i32> %51 to <16 x i8> %296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2) %297 = extractelement <4 x float> %296, i32 2 %298 = fmul float %56, %297 %299 = fmul float %57, %297 %300 = fmul float %58, %297 %301 = fmul float %298, %104 %302 = fmul float %299, %105 %303 = fadd float %301, %302 %304 = fmul float %300, %106 %305 = fadd float %303, %304 %306 = fadd float %305, %107 %307 = fmul float %298, %114 %308 = fmul float %299, %115 %309 = fadd float %307, %308 %310 = fmul float %300, %116 %311 = fadd float %309, %310 %312 = fadd float %311, %117 %313 = fmul float %298, %124 %314 = fmul float %299, %125 %315 = fadd float %313, %314 %316 = fmul float %300, %126 %317 = fadd float %315, %316 %318 = fadd float %317, %127 %319 = fsub float %306, %35 %320 = fsub float %312, %36 %321 = fsub float %318, %37 %322 = fmul float %38, %319 %323 = fmul float %39, %320 %324 = fadd float %323, %322 %325 = fmul float %40, %321 %326 = fadd float %324, %325 %327 = fmul float %319, %319 %328 = fmul float %320, %320 %329 = fadd float %328, %327 %330 = fmul float %321, %321 %331 = fadd float %329, %330 %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %319, %332 %334 = fmul float %320, %332 %335 = fmul float %321, %332 %336 = fmul float %244, %45 %337 = fmul float %245, %46 %338 = fadd float %337, %336 %339 = fmul float %246, %47 %340 = fadd float %338, %339 %341 = fmul float %340, %244 %342 = fmul float %340, %245 %343 = fmul float %340, %246 %344 = fmul float %341, 2.000000e+00 %345 = fmul float %342, 2.000000e+00 %346 = fmul float %343, 2.000000e+00 %347 = fsub float %45, %344 %348 = fsub float %46, %345 %349 = fsub float %47, %346 %350 = fmul float %333, %347 %351 = fsub float -0.000000e+00, %350 %352 = fmul float %334, %348 %353 = fsub float %351, %352 %354 = fmul float %335, %349 %355 = fsub float %353, %354 %356 = call float @llvm.AMDIL.clamp.(float %355, float 0.000000e+00, float 1.000000e+00) %357 = call float @llvm.pow.f32(float %356, float 1.600000e+01) %358 = call float @llvm.AMDIL.clamp.(float %357, float 0.000000e+00, float 1.000000e+00) %359 = fmul float %306, %22 %360 = fmul float %312, %26 %361 = fadd float %359, %360 %362 = fmul float %318, %30 %363 = fadd float %361, %362 %364 = fadd float %363, %34 %365 = fmul float %71, %13 %366 = fadd float %365, %15 %367 = fmul float %72, %14 %368 = fadd float %367, %16 %369 = bitcast float %82 to i32 %370 = bitcast float %83 to i32 %371 = insertelement <4 x i32> , i32 %369, i32 1 %372 = insertelement <4 x i32> %371, i32 %370, i32 2 %373 = insertelement <4 x i32> %372, i32 0, i32 3 %374 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %373, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %375 = extractelement <4 x float> %374, i32 0 %376 = extractelement <4 x float> %374, i32 1 %377 = extractelement <4 x float> %374, i32 2 %378 = extractelement <4 x float> %374, i32 3 %379 = fmul float %306, %19 %380 = fmul float %312, %23 %381 = fadd float %379, %380 %382 = fmul float %318, %27 %383 = fadd float %381, %382 %384 = fadd float %383, %31 %385 = fmul float %306, %20 %386 = fmul float %312, %24 %387 = fadd float %385, %386 %388 = fmul float %318, %28 %389 = fadd float %387, %388 %390 = fadd float %389, %32 %391 = fsub float -0.000000e+00, %390 %392 = fmul float %306, %21 %393 = fmul float %312, %25 %394 = fadd float %392, %393 %395 = fmul float %318, %29 %396 = fadd float %394, %395 %397 = fadd float %396, %33 %398 = fmul float %397, 2.000000e+00 %399 = fsub float %398, %364 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %366, float %368, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %306, float %312, float %318, float %326) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %244, float %245, float %246, float %358) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %375, float %376, float %377, float %378) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %286, float %287, float %288, float %189) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %384, float %391, float %399, float %364) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910 s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[16:19], v0, s[24:27], 0 idxen ; E00C2000 80061000 buffer_load_format_xyzw v[21:24], v3, s[8:11], 0 idxen ; E00C2000 80021503 s_buffer_load_dword s25, s[40:43], 0x6 ; C20CA906 s_buffer_load_dword s26, s[40:43], 0x7 ; C20D2907 s_buffer_load_dword s1, s[40:43], 0x8 ; C200A908 s_buffer_load_dword s2, s[40:43], 0x9 ; C2012909 s_buffer_load_dword s11, s[44:47], 0x51 ; C205AD51 s_buffer_load_dword s12, s[44:47], 0x52 ; C2062D52 s_buffer_load_dword s27, s[44:47], 0x66 ; C20DAD66 s_buffer_load_dword s40, s[44:47], 0x67 ; C2142D67 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s23, s[44:47], 0x4d ; C20BAD4D s_buffer_load_dword s22, s[44:47], 0x4e ; C20B2D4E s_buffer_load_dword s18, s[44:47], 0x50 ; C2092D50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s6, s[44:47], 0x5 ; C2032D05 s_buffer_load_dword s7, s[44:47], 0x6 ; C203AD06 s_buffer_load_dword s13, s[44:47], 0x7 ; C206AD07 s_buffer_load_dword s4, s[44:47], 0x8 ; C2022D08 s_buffer_load_dword s3, s[44:47], 0x9 ; C201AD09 s_buffer_load_dword s8, s[44:47], 0x0 ; C2042D00 s_buffer_load_dword s9, s[44:47], 0x1 ; C204AD01 s_buffer_load_dword s10, s[44:47], 0x2 ; C2052D02 s_buffer_load_dword s14, s[44:47], 0x3 ; C2072D03 s_buffer_load_dword s16, s[44:47], 0x4 ; C2082D04 s_buffer_load_dword s15, s[44:47], 0xa ; C207AD0A s_buffer_load_dword s17, s[44:47], 0xb ; C208AD0B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s1 ; 7E000201 s_buffer_load_dword s5, s[44:47], 0xc ; C202AD0C v_mov_b32_e32 v3, s2 ; 7E060202 s_buffer_load_dword s2, s[44:47], 0xd ; C2012D0D s_buffer_load_dword s1, s[44:47], 0xe ; C200AD0E v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_lshlrev_b32_e32 v16, 2, v16 ; 34202082 v_add_i32_e32 v16, 8, v16 ; 4A202088 v_cvt_f32_i32_e32 v16, v16 ; 7E200B10 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_mad_f32 v16, s27, v16, v21 ; D2820010 0456201B v_floor_f32_e32 v17, v16 ; 7E224910 v_subrev_f32_e32 v28, v17, v16 ; 0A382111 v_mad_f32 v29, s40, v17, v22 ; D282001D 045A2228 v_mov_b32_e32 v30, 0 ; 7E3C0280 image_sample_l v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[32:39], s[28:31] ; F0900F00 00E8101C v_mov_b32_e32 v27, 0x10001 ; 7E3602FF 00010001 image_sample_l_o v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[32:39], s[28:31] ; F0D00F00 00E81F1B v_mov_b32_e32 v27, 0x20002 ; 7E3602FF 00020002 v_mov_b32_e32 v23, v30 ; 7E2E031E image_sample_l_o v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[32:39], s[28:31] ; F0D00F00 00E8181B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample_l v28, 4, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[32:39], s[28:31] ; F0900400 00E81C15 v_mov_b32_e32 v20, 0x30003 ; 7E2802FF 00030003 v_mad_f32 v11, v2, v11, v1 ; D282000B 04061702 v_mad_f32 v12, v2, v12, v1 ; D282000C 04061902 v_mac_f32_e32 v1, v2, v13 ; 3E021B02 v_mov_b32_e32 v23, v30 ; 7E2E031E image_sample_l_o v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[32:39], s[28:31] ; F0D00F00 00E81414 v_mac_f32_e32 v0, s25, v14 ; 3E001C19 v_mac_f32_e32 v3, s26, v15 ; 3E061E1A s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v2, v28, v7 ; 10040F1C v_mul_f32_e32 v7, v28, v8 ; 100E111C v_mul_f32_e32 v8, v28, v9 ; 1010131C v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v4, |v11|, v4 ; D2060104 0002090B v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480 v_sub_f32_e64 v10, |v10|, v9 ; D208010A 0002130A v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_sub_f32_e64 v12, |v12|, v11 ; D208010C 0002170C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480 v_sub_f32_e64 v1, |v1|, v13 ; D2080101 00021B01 v_add_f32_e32 v10, v5, v10 ; 06141505 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v12, v5, v12 ; 06181905 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v14, v5, |v10| ; D208020E 00021505 v_mad_f32 v14, v14, v6, 1.0 ; D282000E 03CA0D0E v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v15, 0, 1.0, vcc ; D200000F 01A9E480 v_sub_f32_e64 v28, v15, |v4| ; D208021C 0002090F v_mac_f32_e32 v14, v6, v28 ; 3E1C3906 v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v28, 0, 1.0, vcc ; D200001C 01A9E480 v_sub_f32_e64 v29, v28, |v12| ; D208021D 0002191C v_mad_f32 v29, v29, v6, 1.0 ; D282001D 03CA0D1D v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v35, 0, 1.0, vcc ; D2000023 01A9E480 v_sub_f32_e64 v36, v35, |v1| ; D2080224 00020323 v_mac_f32_e32 v29, v6, v36 ; 3E3A4906 v_sub_f32_e64 v10, |v10|, v5 ; D208010A 00020B0A v_sub_f32_e64 v4, |v4|, v15 ; D2080104 00021F04 v_sub_f32_e64 v12, |v12|, v28 ; D208010C 0002390C v_sub_f32_e64 v1, |v1|, v35 ; D2080101 00024701 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v12, v6, v12 ; 10181906 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v10, v10 ; 100C150A v_mac_f32_e32 v6, v4, v4 ; 3E0C0904 v_mac_f32_e32 v6, v14, v14 ; 3E0C1D0E v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mul_f32_e32 v36, v12, v12 ; 1048190C v_mac_f32_e32 v36, v1, v1 ; 3E480301 v_mac_f32_e32 v36, v29, v29 ; 3E483B1D v_rsq_clamp_f32_e32 v36, v36 ; 7E485924 v_mul_f32_e32 v10, v6, v10 ; 10141506 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v6, v6, v14 ; 100C1D06 v_mul_f32_e32 v12, v36, v12 ; 10181924 v_mul_f32_e32 v1, v36, v1 ; 10020324 v_mul_f32_e32 v14, v36, v29 ; 101C3B24 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v5, v5, v10 ; 100A1505 v_mad_f32 v10, -2.0, v15, 1.0 ; D282000A 03CA1EF5 v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mad_f32 v10, -2.0, v28, 1.0 ; D282000A 03CA38F5 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mad_f32 v12, -2.0, v35, 1.0 ; D282000C 03CA46F5 v_mul_f32_e32 v1, v12, v1 ; 1002030C v_mad_f32 v9, -2.0, v9, 1.0 ; D2820009 03CA12F5 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mad_f32 v9, -2.0, v11, 1.0 ; D2820009 03CA16F5 v_mul_f32_e32 v9, v9, v14 ; 10121D09 v_mul_f32_e32 v11, v17, v7 ; 10160F11 v_mac_f32_e32 v11, v16, v2 ; 3E160510 v_mac_f32_e32 v11, v18, v8 ; 3E161112 v_mul_f32_e32 v12, v17, v4 ; 10180911 v_mac_f32_e32 v12, v16, v5 ; 3E180B10 v_mul_f32_e32 v14, v17, v1 ; 101C0311 v_mac_f32_e32 v14, v16, v10 ; 3E1C1510 v_add_f32_e32 v11, v19, v11 ; 06161713 v_mac_f32_e32 v12, v18, v6 ; 3E180D12 v_mac_f32_e32 v14, v18, v9 ; 3E1C1312 v_mul_f32_e32 v15, v32, v7 ; 101E0F20 v_mac_f32_e32 v15, v31, v2 ; 3E1E051F v_mac_f32_e32 v15, v33, v8 ; 3E1E1121 v_mul_f32_e32 v16, v32, v4 ; 10200920 v_mac_f32_e32 v16, v31, v5 ; 3E200B1F v_mul_f32_e32 v17, v32, v1 ; 10220320 v_mac_f32_e32 v17, v31, v10 ; 3E22151F v_add_f32_e32 v15, v34, v15 ; 061E1F22 v_mac_f32_e32 v16, v33, v6 ; 3E200D21 v_mac_f32_e32 v17, v33, v9 ; 3E221321 v_mul_f32_e32 v7, v25, v7 ; 100E0F19 v_mac_f32_e32 v7, v24, v2 ; 3E0E0518 v_mac_f32_e32 v7, v26, v8 ; 3E0E111A v_mul_f32_e32 v2, v25, v4 ; 10040919 v_mac_f32_e32 v2, v24, v5 ; 3E040B18 v_mul_f32_e32 v1, v25, v1 ; 10020319 v_mac_f32_e32 v1, v24, v10 ; 3E021518 v_mac_f32_e32 v2, v26, v6 ; 3E040D1A v_mac_f32_e32 v1, v26, v9 ; 3E02131A v_mul_f32_e32 v4, v12, v12 ; 1008190C v_mac_f32_e32 v4, v16, v16 ; 3E082110 v_mac_f32_e32 v4, v2, v2 ; 3E080502 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_add_f32_e32 v5, v27, v7 ; 060A0F1B exp 15, 32, 0, 0, 0, v30, v30, v30, v30 ; F800020F 1E1E1E1E exp 15, 33, 0, 0, 0, v0, v3, v30, v30 ; F800021F 1E1E0300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v4, v12 ; 10001904 v_mul_f32_e32 v3, v4, v16 ; 10062104 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_subrev_f32_e32 v4, s24, v11 ; 0A081618 v_subrev_f32_e32 v6, s23, v15 ; 0A0C1E17 v_mul_f32_e32 v7, v4, v4 ; 100E0904 v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06 v_subrev_f32_e32 v8, s22, v5 ; 0A100A16 v_mac_f32_e32 v7, v8, v8 ; 3E0E1108 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v9, s21, v0 ; 10120015 v_mac_f32_e32 v9, s20, v3 ; 3E120614 v_mac_f32_e32 v9, s19, v2 ; 3E120413 v_mul_f32_e32 v10, v0, v9 ; 10141300 v_mad_f32 v10, 2.0, v10, -s21 ; D282000A 805614F4 v_mul_f32_e32 v12, v7, v4 ; 10180907 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v12, v3, v9 ; 10181303 v_mad_f32 v12, 2.0, v12, -s20 ; D282000C 805218F4 v_mul_f32_e32 v16, v7, v6 ; 10200D07 v_mac_f32_e32 v10, v12, v16 ; 3E14210C v_mul_f32_e32 v9, v2, v9 ; 10121302 v_mad_f32 v9, 2.0, v9, -s19 ; D2820009 804E12F4 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mac_f32_e32 v10, v9, v7 ; 3E140F09 v_mul_f32_e32 v4, s18, v4 ; 10080812 v_add_f32_e64 v7, 0, v10 clamp ; D2060807 00021480 v_log_f32_e32 v7, v7 ; 7E0E4F07 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_mac_f32_e32 v4, s12, v8 ; 3E08100C exp 15, 34, 0, 0, 0, v11, v15, v5, v4 ; F800022F 04050F0B s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v4, 0x41800000, v7 ; 0E080EFF 41800000 v_exp_f32_e32 v4, v4 ; 7E084B04 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 exp 15, 35, 0, 0, 0, v0, v3, v2, v4 ; F800023F 04020300 exp 15, 36, 0, 0, 0, v20, v21, v22, v23 ; F800024F 17161514 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v4, s13, v15 ; 10081E0D v_mul_f32_e32 v6, s16, v15 ; 100C1E10 v_mul_f32_e32 v7, s6, v15 ; 100E1E06 v_mul_f32_e32 v8, s7, v15 ; 10101E07 v_mac_f32_e32 v4, s14, v11 ; 3E08160E v_mac_f32_e32 v6, s8, v11 ; 3E0C1608 v_mac_f32_e32 v7, s9, v11 ; 3E0E1609 v_mac_f32_e32 v8, s10, v11 ; 3E10160A v_mac_f32_e32 v4, s17, v5 ; 3E080A11 v_mac_f32_e32 v6, s4, v5 ; 3E0C0A04 v_mac_f32_e32 v7, s3, v5 ; 3E0E0A03 v_mac_f32_e32 v8, s15, v5 ; 3E100A0F v_mul_f32_e32 v5, v0, v14 ; 100A1D00 v_mac_f32_e32 v5, v3, v17 ; 3E0A2303 v_mac_f32_e32 v5, v2, v1 ; 3E0A0302 v_mad_f32 v0, -v5, v0, v14 ; D2820000 243A0105 v_mad_f32 v3, -v5, v3, v17 ; D2820003 24460705 v_mad_f32 v1, -v5, v2, v1 ; D2820001 24060505 v_add_f32_e32 v2, s0, v4 ; 06040800 v_add_f32_e32 v4, s5, v6 ; 06080C05 v_mul_f32_e32 v5, v0, v0 ; 100A0100 v_mac_f32_e32 v5, v3, v3 ; 3E0A0703 v_mac_f32_e32 v5, v1, v1 ; 3E0A0301 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_add_f32_e32 v6, s2, v7 ; 060C0E02 v_mad_f32 v7, -2.0, v13, 1.0 ; D2820007 03CA1AF5 v_add_f32_e32 v8, s1, v8 ; 06101001 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v3, v5, v3 ; 10060705 v_mul_f32_e32 v1, v5, v1 ; 10020305 exp 15, 37, 0, 0, 0, v30, v30, v30, v30 ; F800025F 1E1E1E1E exp 15, 38, 0, 0, 0, v0, v3, v1, v7 ; F800026F 07010300 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v6 ; 3A000CFF 80000000 v_mad_f32 v1, 2.0, v8, -v2 ; D2820001 840A10F4 exp 15, 12, 0, 0, 0, v4, v0, v1, v2 ; F80000CF 02010004 exp 15, 13, 0, 1, 0, v30, v30, v30, v30 ; F80008DF 1E1E1E1E s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 40 Code Size: 1360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 320, 304} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {12, 28, 44, 60} IMM[5] UINT32 {0, 24, 32, 16} IMM[6] INT32 {3, 0, 0, 0} IMM[7] UINT32 {48, 4, 20, 36} IMM[8] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].y, IMM[0].xxxx 15: SHL TEMP[2].x, IN[3].yyyy, IMM[2].xxxx 16: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 17: I2F TEMP[2].x, TEMP[2].xxxx 18: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 19: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 20: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 21: FLR TEMP[2].x, TEMP[0].xxxx 22: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[2].xxxx 23: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 24: MOV TEMP[4].y, TEMP[0].xxxx 25: MOV TEMP[0].y, IMM[0].xxxx 26: SHL TEMP[2].x, IN[3].zzzz, IMM[2].xxxx 27: UADD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 28: I2F TEMP[2].x, TEMP[2].xxxx 29: ADD TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 30: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 31: ADD TEMP[0].xy, TEMP[0].xyyy, IN[5].xyyy 32: FLR TEMP[2].x, TEMP[0].xxxx 33: ADD TEMP[5].x, TEMP[0].xxxx, -TEMP[2].xxxx 34: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 35: MOV TEMP[5].y, TEMP[0].xxxx 36: MOV TEMP[0].xy, TEMP[5].xyyy 37: MOV TEMP[0].w, IMM[0].xxxx 38: TXL TEMP[0], TEMP[0], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[4].xyyy 40: MOV TEMP[2].w, IMM[0].xxxx 41: TXL TEMP[2], TEMP[2], SAMP[0], 2D 42: MOV TEMP[6].xy, TEMP[3].xyyy 43: MOV TEMP[6].w, IMM[0].xxxx 44: TXL TEMP[6], TEMP[6], SAMP[0], 2D 45: MUL TEMP[6], IN[4].xxxx, TEMP[6] 46: MAD TEMP[2], IN[4].yyyy, TEMP[2], TEMP[6] 47: MAD TEMP[0], IN[4].zzzz, TEMP[0], TEMP[2] 48: MOV TEMP[2].xy, TEMP[5].xyyy 49: MOV TEMP[2].w, IMM[0].xxxx 50: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 51: MOV TEMP[6].xy, TEMP[3].xyyy 52: MOV TEMP[6].w, IMM[0].xxxx 53: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[2].zwz 54: MOV TEMP[7].xy, TEMP[4].xyyy 55: MOV TEMP[7].w, IMM[0].xxxx 56: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[2].zwz 57: MUL TEMP[7], IN[4].yyyy, TEMP[7] 58: MAD TEMP[6], IN[4].xxxx, TEMP[6], TEMP[7] 59: MAD TEMP[2], IN[4].zzzz, TEMP[2], TEMP[6] 60: MOV TEMP[5].xy, TEMP[5].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5], TEMP[5], SAMP[0], 2D, IMM[2].xwx 63: MOV TEMP[4].xy, TEMP[4].xyyy 64: MOV TEMP[4].w, IMM[0].xxxx 65: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[2].xwx 66: MOV TEMP[3].xy, TEMP[3].xyyy 67: MOV TEMP[3].w, IMM[0].xxxx 68: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 69: MUL TEMP[3], IN[4].xxxx, TEMP[3] 70: MAD TEMP[3], IN[4].yyyy, TEMP[4], TEMP[3] 71: MAD TEMP[3], IN[4].zzzz, TEMP[5], TEMP[3] 72: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 73: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 74: AND TEMP[5], TEMP[5], IMM[3].xxxx 75: ABS TEMP[4], TEMP[4] 76: ADD TEMP[4], TEMP[4], -TEMP[5] 77: ADD TEMP[4], TEMP[4], IMM[3].yyyy 78: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 79: AND TEMP[6], TEMP[6], IMM[3].xxxx 80: ABS TEMP[4], TEMP[4] 81: ADD TEMP[4], TEMP[4], -TEMP[6] 82: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 83: MOV TEMP[7].x, TEMP[4].xxxx 84: MOV TEMP[7].y, TEMP[4].yyyy 85: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 86: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 87: MOV TEMP[7].z, TEMP[4].xxxx 88: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 89: RSQ TEMP[4].x, TEMP[4].xxxx 90: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 91: MUL TEMP[6], TEMP[6], IMM[3].wwww 92: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 93: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 94: MOV TEMP[7].w, IMM[0].xxxx 95: MOV TEMP[7].x, TEMP[6].xxxx 96: MOV TEMP[7].y, TEMP[6].yyyy 97: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 98: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 99: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 100: MOV TEMP[7].z, TEMP[4].xxxx 101: DP4 TEMP[4].x, TEMP[7], TEMP[0] 102: DP4 TEMP[5].x, TEMP[7], TEMP[2] 103: MOV TEMP[4].y, TEMP[5].xxxx 104: DP4 TEMP[5].x, TEMP[7], TEMP[3] 105: MOV TEMP[4].z, TEMP[5].xxxx 106: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 109: MOV TEMP[5].xy, IN[5].xyyy 110: MOV TEMP[5].w, IMM[0].xxxx 111: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 112: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 113: MOV TEMP[6].w, IMM[3].xxxx 114: MOV TEMP[6].x, TEMP[5].xxxx 115: MOV TEMP[6].y, TEMP[5].yyyy 116: MOV TEMP[6].z, TEMP[5].zzzz 117: DP4 TEMP[0].x, TEMP[6], TEMP[0] 118: DP4 TEMP[2].x, TEMP[6], TEMP[2] 119: DP4 TEMP[3].x, TEMP[6], TEMP[3] 120: MOV TEMP[5].x, TEMP[0].xxxx 121: MOV TEMP[5].y, TEMP[2].xxxx 122: MOV TEMP[5].z, TEMP[3].xxxx 123: MOV TEMP[6].x, TEMP[0].xxxx 124: MOV TEMP[6].y, TEMP[2].xxxx 125: MOV TEMP[6].z, TEMP[3].xxxx 126: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 127: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[5].xyzz 128: MOV TEMP[6].w, TEMP[5].xxxx 129: MOV TEMP[5].w, IMM[0].xxxx 130: MOV TEMP[5].x, TEMP[4].xxxx 131: MOV TEMP[5].y, TEMP[4].yyyy 132: MOV TEMP[5].z, TEMP[4].zzzz 133: MOV TEMP[4].w, IMM[3].xxxx 134: MOV TEMP[4].x, TEMP[0].xxxx 135: MOV TEMP[4].y, TEMP[2].xxxx 136: MOV TEMP[4].z, TEMP[3].xxxx 137: MOV TEMP[0].x, CONST[4][0].wwww 138: MOV TEMP[0].y, CONST[4][1].wwww 139: MOV TEMP[0].z, CONST[4][2].wwww 140: MOV TEMP[0].w, CONST[4][3].wwww 141: DP4 TEMP[0].x, TEMP[4], TEMP[0] 142: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 143: MOV TEMP[3].xy, IN[5].xyyy 144: MOV TEMP[3].w, IMM[0].xxxx 145: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[6].xyx 146: MOV TEMP[7].x, CONST[4][0].xxxx 147: MOV TEMP[7].y, CONST[4][1].xxxx 148: MOV TEMP[7].z, CONST[4][2].xxxx 149: MOV TEMP[7].w, CONST[4][3].xxxx 150: DP4 TEMP[7].x, TEMP[4], TEMP[7] 151: MOV TEMP[8].x, CONST[4][0].yyyy 152: MOV TEMP[8].y, CONST[4][1].yyyy 153: MOV TEMP[8].z, CONST[4][2].yyyy 154: MOV TEMP[8].w, CONST[4][3].yyyy 155: DP4 TEMP[8].x, TEMP[4], TEMP[8] 156: MOV TEMP[7].y, -TEMP[8].xxxx 157: MOV TEMP[8].x, CONST[4][0].zzzz 158: MOV TEMP[8].y, CONST[4][1].zzzz 159: MOV TEMP[8].z, CONST[4][2].zzzz 160: MOV TEMP[8].w, CONST[4][3].zzzz 161: DP4 TEMP[4].x, TEMP[4], TEMP[8] 162: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 163: MOV TEMP[7].z, TEMP[4].xxxx 164: MOV TEMP[7].w, TEMP[0].xxxx 165: MOV OUT[1], TEMP[1] 166: MOV OUT[2].xy, TEMP[2].xyxx 167: MOV OUT[4], TEMP[5] 168: MOV OUT[6], IMM[0].xxxx 169: MOV OUT[5], TEMP[3] 170: MOV OUT[0], TEMP[7] 171: MOV OUT[3], TEMP[6] 172: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %44 = load <8 x i32>, <8 x i32> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %46 = load <4 x i32>, <4 x i32> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = extractelement <4 x float> %57, i32 2 %61 = extractelement <4 x float> %57, i32 3 %62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = extractelement <4 x float> %71, i32 2 %75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0 %77 = add i32 %5, %7 %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 1 %81 = extractelement <4 x float> %78, i32 2 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %10, %6 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = bitcast float %72 to i32 %89 = shl i32 %88, 2 %90 = add i32 %89, 8 %91 = sitofp i32 %90 to float %92 = fadd float %91, 5.000000e-01 %93 = fmul float %92, %41 %94 = fadd float %93, %86 %95 = fadd float %87, 0.000000e+00 %96 = call float @floor(float %94) %97 = fsub float %94, %96 %98 = fmul float %96, %42 %99 = fadd float %98, %95 %100 = bitcast float %73 to i32 %101 = shl i32 %100, 2 %102 = add i32 %101, 8 %103 = sitofp i32 %102 to float %104 = fadd float %103, 5.000000e-01 %105 = fmul float %104, %41 %106 = fadd float %105, %86 %107 = fadd float %87, 0.000000e+00 %108 = call float @floor(float %106) %109 = fsub float %106, %108 %110 = fmul float %108, %42 %111 = fadd float %110, %107 %112 = bitcast float %74 to i32 %113 = shl i32 %112, 2 %114 = add i32 %113, 8 %115 = sitofp i32 %114 to float %116 = fadd float %115, 5.000000e-01 %117 = fmul float %116, %41 %118 = fadd float %117, %86 %119 = fadd float %87, 0.000000e+00 %120 = call float @floor(float %118) %121 = fsub float %118, %120 %122 = fmul float %120, %42 %123 = fadd float %122, %119 %124 = bitcast float %121 to i32 %125 = bitcast float %123 to i32 %126 = insertelement <4 x i32> undef, i32 %124, i32 0 %127 = insertelement <4 x i32> %126, i32 %125, i32 1 %128 = insertelement <4 x i32> %127, i32 0, i32 2 %129 = bitcast <8 x i32> %44 to <32 x i8> %130 = bitcast <4 x i32> %46 to <16 x i8> %131 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = extractelement <4 x float> %131, i32 3 %136 = bitcast float %109 to i32 %137 = bitcast float %111 to i32 %138 = insertelement <4 x i32> undef, i32 %136, i32 0 %139 = insertelement <4 x i32> %138, i32 %137, i32 1 %140 = insertelement <4 x i32> %139, i32 0, i32 2 %141 = bitcast <8 x i32> %44 to <32 x i8> %142 = bitcast <4 x i32> %46 to <16 x i8> %143 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %140, <32 x i8> %141, <16 x i8> %142, i32 2) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = extractelement <4 x float> %143, i32 3 %148 = bitcast float %97 to i32 %149 = bitcast float %99 to i32 %150 = insertelement <4 x i32> undef, i32 %148, i32 0 %151 = insertelement <4 x i32> %150, i32 %149, i32 1 %152 = insertelement <4 x i32> %151, i32 0, i32 2 %153 = bitcast <8 x i32> %44 to <32 x i8> %154 = bitcast <4 x i32> %46 to <16 x i8> %155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %152, <32 x i8> %153, <16 x i8> %154, i32 2) %156 = extractelement <4 x float> %155, i32 0 %157 = extractelement <4 x float> %155, i32 1 %158 = extractelement <4 x float> %155, i32 2 %159 = extractelement <4 x float> %155, i32 3 %160 = fmul float %79, %156 %161 = fmul float %79, %157 %162 = fmul float %79, %158 %163 = fmul float %79, %159 %164 = fmul float %80, %144 %165 = fadd float %164, %160 %166 = fmul float %80, %145 %167 = fadd float %166, %161 %168 = fmul float %80, %146 %169 = fadd float %168, %162 %170 = fmul float %80, %147 %171 = fadd float %170, %163 %172 = fmul float %81, %132 %173 = fadd float %172, %165 %174 = fmul float %81, %133 %175 = fadd float %174, %167 %176 = fmul float %81, %134 %177 = fadd float %176, %169 %178 = fmul float %81, %135 %179 = fadd float %178, %171 %180 = bitcast float %121 to i32 %181 = bitcast float %123 to i32 %182 = insertelement <4 x i32> , i32 %180, i32 1 %183 = insertelement <4 x i32> %182, i32 %181, i32 2 %184 = insertelement <4 x i32> %183, i32 0, i32 3 %185 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %184, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %186 = extractelement <4 x float> %185, i32 0 %187 = extractelement <4 x float> %185, i32 1 %188 = extractelement <4 x float> %185, i32 2 %189 = extractelement <4 x float> %185, i32 3 %190 = bitcast float %97 to i32 %191 = bitcast float %99 to i32 %192 = insertelement <4 x i32> , i32 %190, i32 1 %193 = insertelement <4 x i32> %192, i32 %191, i32 2 %194 = insertelement <4 x i32> %193, i32 0, i32 3 %195 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %194, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = extractelement <4 x float> %195, i32 3 %200 = bitcast float %109 to i32 %201 = bitcast float %111 to i32 %202 = insertelement <4 x i32> , i32 %200, i32 1 %203 = insertelement <4 x i32> %202, i32 %201, i32 2 %204 = insertelement <4 x i32> %203, i32 0, i32 3 %205 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %204, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %206 = extractelement <4 x float> %205, i32 0 %207 = extractelement <4 x float> %205, i32 1 %208 = extractelement <4 x float> %205, i32 2 %209 = extractelement <4 x float> %205, i32 3 %210 = fmul float %80, %206 %211 = fmul float %80, %207 %212 = fmul float %80, %208 %213 = fmul float %80, %209 %214 = fmul float %79, %196 %215 = fadd float %214, %210 %216 = fmul float %79, %197 %217 = fadd float %216, %211 %218 = fmul float %79, %198 %219 = fadd float %218, %212 %220 = fmul float %79, %199 %221 = fadd float %220, %213 %222 = fmul float %81, %186 %223 = fadd float %222, %215 %224 = fmul float %81, %187 %225 = fadd float %224, %217 %226 = fmul float %81, %188 %227 = fadd float %226, %219 %228 = fmul float %81, %189 %229 = fadd float %228, %221 %230 = bitcast float %121 to i32 %231 = bitcast float %123 to i32 %232 = insertelement <4 x i32> , i32 %230, i32 1 %233 = insertelement <4 x i32> %232, i32 %231, i32 2 %234 = insertelement <4 x i32> %233, i32 0, i32 3 %235 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %234, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = extractelement <4 x float> %235, i32 3 %240 = bitcast float %109 to i32 %241 = bitcast float %111 to i32 %242 = insertelement <4 x i32> , i32 %240, i32 1 %243 = insertelement <4 x i32> %242, i32 %241, i32 2 %244 = insertelement <4 x i32> %243, i32 0, i32 3 %245 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %244, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = extractelement <4 x float> %245, i32 2 %249 = extractelement <4 x float> %245, i32 3 %250 = bitcast float %97 to i32 %251 = bitcast float %99 to i32 %252 = insertelement <4 x i32> , i32 %250, i32 1 %253 = insertelement <4 x i32> %252, i32 %251, i32 2 %254 = insertelement <4 x i32> %253, i32 0, i32 3 %255 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %254, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %256 = extractelement <4 x float> %255, i32 0 %257 = extractelement <4 x float> %255, i32 1 %258 = extractelement <4 x float> %255, i32 2 %259 = extractelement <4 x float> %255, i32 3 %260 = fmul float %79, %256 %261 = fmul float %79, %257 %262 = fmul float %79, %258 %263 = fmul float %79, %259 %264 = fmul float %80, %246 %265 = fadd float %264, %260 %266 = fmul float %80, %247 %267 = fadd float %266, %261 %268 = fmul float %80, %248 %269 = fadd float %268, %262 %270 = fmul float %80, %249 %271 = fadd float %270, %263 %272 = fmul float %81, %236 %273 = fadd float %272, %265 %274 = fmul float %81, %237 %275 = fadd float %274, %267 %276 = fmul float %81, %238 %277 = fadd float %276, %269 %278 = fmul float %81, %239 %279 = fadd float %278, %271 %280 = fmul float %58, 2.550000e+02 %281 = fadd float %280, -1.280000e+02 %282 = fmul float %59, 2.550000e+02 %283 = fadd float %282, -1.280000e+02 %284 = fmul float %60, 2.550000e+02 %285 = fadd float %284, -1.280000e+02 %286 = fmul float %61, 2.550000e+02 %287 = fadd float %286, -1.280000e+02 %288 = fcmp olt float %281, 0.000000e+00 %289 = fcmp olt float %283, 0.000000e+00 %290 = fcmp olt float %285, 0.000000e+00 %291 = fcmp olt float %287, 0.000000e+00 %292 = select i1 %288, float 1.000000e+00, float 0.000000e+00 %293 = call float @fabs(float %281) %294 = call float @fabs(float %283) %295 = call float @fabs(float %285) %296 = call float @fabs(float %287) %297 = fsub float %293, %292 %298 = select i1 %289, float -1.000000e+00, float -0.000000e+00 %299 = fadd float %294, %298 %300 = select i1 %290, float -1.000000e+00, float -0.000000e+00 %301 = fadd float %295, %300 %302 = select i1 %291, float -1.000000e+00, float -0.000000e+00 %303 = fadd float %296, %302 %304 = fadd float %297, -6.400000e+01 %305 = fadd float %299, -6.400000e+01 %306 = fadd float %301, -6.400000e+01 %307 = fadd float %303, -6.400000e+01 %308 = fcmp olt float %304, 0.000000e+00 %309 = fcmp olt float %305, 0.000000e+00 %310 = select i1 %308, float 1.000000e+00, float 0.000000e+00 %311 = select i1 %309, float 1.000000e+00, float 0.000000e+00 %312 = call float @fabs(float %304) %313 = call float @fabs(float %305) %314 = call float @fabs(float %306) %315 = call float @fabs(float %307) %316 = fsub float %312, %310 %317 = fsub float %313, %311 %318 = fmul float %316, 0x3F90410420000000 %319 = fmul float %317, 0x3F90410420000000 %320 = fsub float 1.000000e+00, %318 %321 = fsub float %320, %319 %322 = fmul float %318, %318 %323 = fmul float %319, %319 %324 = fadd float %323, %322 %325 = fmul float %321, %321 %326 = fadd float %324, %325 %327 = call float @llvm.AMDGPU.rsq.clamped.f32(float %326) %328 = fmul float %318, %327 %329 = fmul float %319, %327 %330 = fmul float %321, %327 %331 = fmul float %310, 2.000000e+00 %332 = fmul float %311, 2.000000e+00 %333 = fsub float 1.000000e+00, %331 %334 = fsub float 1.000000e+00, %332 %335 = fmul float %328, %333 %336 = fmul float %329, %334 %337 = fmul float %292, 2.000000e+00 %338 = fsub float 1.000000e+00, %337 %339 = fmul float %338, %330 %340 = fmul float %335, %173 %341 = fmul float %336, %175 %342 = fadd float %340, %341 %343 = fmul float %339, %177 %344 = fadd float %342, %343 %345 = fmul float %179, 0.000000e+00 %346 = fadd float %344, %345 %347 = fmul float %335, %223 %348 = fmul float %336, %225 %349 = fadd float %347, %348 %350 = fmul float %339, %227 %351 = fadd float %349, %350 %352 = fmul float %229, 0.000000e+00 %353 = fadd float %351, %352 %354 = fmul float %335, %273 %355 = fmul float %336, %275 %356 = fadd float %354, %355 %357 = fmul float %339, %277 %358 = fadd float %356, %357 %359 = fmul float %279, 0.000000e+00 %360 = fadd float %358, %359 %361 = fmul float %346, %346 %362 = fmul float %353, %353 %363 = fadd float %362, %361 %364 = fmul float %360, %360 %365 = fadd float %363, %364 %366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365) %367 = fmul float %346, %366 %368 = fmul float %353, %366 %369 = fmul float %360, %366 %370 = bitcast float %86 to i32 %371 = bitcast float %87 to i32 %372 = insertelement <4 x i32> undef, i32 %370, i32 0 %373 = insertelement <4 x i32> %372, i32 %371, i32 1 %374 = insertelement <4 x i32> %373, i32 0, i32 2 %375 = bitcast <8 x i32> %44 to <32 x i8> %376 = bitcast <4 x i32> %46 to <16 x i8> %377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2) %378 = extractelement <4 x float> %377, i32 2 %379 = fmul float %51, %378 %380 = fmul float %52, %378 %381 = fmul float %53, %378 %382 = fmul float %379, %173 %383 = fmul float %380, %175 %384 = fadd float %382, %383 %385 = fmul float %381, %177 %386 = fadd float %384, %385 %387 = fadd float %386, %179 %388 = fmul float %379, %223 %389 = fmul float %380, %225 %390 = fadd float %388, %389 %391 = fmul float %381, %227 %392 = fadd float %390, %391 %393 = fadd float %392, %229 %394 = fmul float %379, %273 %395 = fmul float %380, %275 %396 = fadd float %394, %395 %397 = fmul float %381, %277 %398 = fadd float %396, %397 %399 = fadd float %398, %279 %400 = fsub float %387, %35 %401 = fsub float %393, %36 %402 = fsub float %399, %37 %403 = fmul float %38, %400 %404 = fmul float %39, %401 %405 = fadd float %404, %403 %406 = fmul float %40, %402 %407 = fadd float %405, %406 %408 = fmul float %387, %22 %409 = fmul float %393, %26 %410 = fadd float %408, %409 %411 = fmul float %399, %30 %412 = fadd float %410, %411 %413 = fadd float %412, %34 %414 = fmul float %66, %13 %415 = fadd float %414, %15 %416 = fmul float %67, %14 %417 = fadd float %416, %16 %418 = bitcast float %86 to i32 %419 = bitcast float %87 to i32 %420 = insertelement <4 x i32> , i32 %418, i32 1 %421 = insertelement <4 x i32> %420, i32 %419, i32 2 %422 = insertelement <4 x i32> %421, i32 0, i32 3 %423 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %422, <8 x i32> %44, <4 x i32> %46, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %424 = extractelement <4 x float> %423, i32 0 %425 = extractelement <4 x float> %423, i32 1 %426 = extractelement <4 x float> %423, i32 2 %427 = extractelement <4 x float> %423, i32 3 %428 = fmul float %387, %19 %429 = fmul float %393, %23 %430 = fadd float %428, %429 %431 = fmul float %399, %27 %432 = fadd float %430, %431 %433 = fadd float %432, %31 %434 = fmul float %387, %20 %435 = fmul float %393, %24 %436 = fadd float %434, %435 %437 = fmul float %399, %28 %438 = fadd float %436, %437 %439 = fadd float %438, %32 %440 = fsub float -0.000000e+00, %439 %441 = fmul float %387, %21 %442 = fmul float %393, %25 %443 = fadd float %441, %442 %444 = fmul float %399, %29 %445 = fadd float %443, %444 %446 = fadd float %445, %33 %447 = fmul float %446, 2.000000e+00 %448 = fsub float %447, %413 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %415, float %417, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %387, float %393, float %399, float %407) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %367, float %368, float %369, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %424, float %425, float %426, float %427) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %433, float %440, float %448, float %413) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[44:47], s[2:3], 0x4 ; C0960304 s_load_dwordx4 s[48:51], s[2:3], 0x10 ; C0980310 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[14:17], v0, s[36:39], 0 idxen ; E00C2000 80090E00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[17:20], v0, s[40:43], 0 idxen ; E00C2000 800A1100 buffer_load_format_xyzw v[24:27], v3, s[8:11], 0 idxen ; E00C2000 80021803 s_buffer_load_dword s23, s[44:47], 0x6 ; C20BAD06 s_buffer_load_dword s22, s[44:47], 0x7 ; C20B2D07 s_buffer_load_dword s37, s[44:47], 0x8 ; C212AD08 s_buffer_load_dword s36, s[44:47], 0x9 ; C2122D09 s_buffer_load_dword s16, s[48:51], 0x51 ; C2083151 s_buffer_load_dword s8, s[48:51], 0x52 ; C2043152 s_buffer_load_dword s38, s[48:51], 0x66 ; C2133166 s_buffer_load_dword s39, s[48:51], 0x67 ; C213B167 s_buffer_load_dword s0, s[48:51], 0xf ; C200310F s_buffer_load_dword s20, s[48:51], 0x4c ; C20A314C s_buffer_load_dword s19, s[48:51], 0x4d ; C209B14D s_buffer_load_dword s15, s[48:51], 0x4e ; C207B14E s_buffer_load_dword s21, s[48:51], 0x50 ; C20AB150 s_buffer_load_dword s4, s[48:51], 0x5 ; C2023105 s_buffer_load_dword s3, s[48:51], 0x6 ; C201B106 s_buffer_load_dword s9, s[48:51], 0x7 ; C204B107 s_buffer_load_dword s1, s[48:51], 0x8 ; C200B108 s_buffer_load_dword s2, s[48:51], 0x9 ; C2013109 s_buffer_load_dword s5, s[48:51], 0x0 ; C202B100 s_buffer_load_dword s6, s[48:51], 0x1 ; C2033101 s_buffer_load_dword s7, s[48:51], 0x2 ; C203B102 s_buffer_load_dword s10, s[48:51], 0x3 ; C2053103 s_buffer_load_dword s17, s[48:51], 0x4 ; C208B104 s_buffer_load_dword s11, s[48:51], 0xa ; C205B10A s_buffer_load_dword s18, s[48:51], 0xb ; C209310B s_buffer_load_dword s12, s[48:51], 0xc ; C206310C s_buffer_load_dword s13, s[48:51], 0xd ; C206B10D s_buffer_load_dword s14, s[48:51], 0xe ; C207310E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s37 ; 7E000225 v_mad_f32 v3, v2, v10, v1 ; D2820003 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_lshlrev_b32_e32 v2, 2, v14 ; 34041C82 v_add_i32_e32 v2, 8, v2 ; 4A040488 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_lshlrev_b32_e32 v10, 2, v15 ; 34141E82 v_lshlrev_b32_e32 v11, 2, v16 ; 34162082 v_add_i32_e32 v10, 8, v10 ; 4A141488 v_cvt_f32_i32_e32 v10, v10 ; 7E140B0A v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s38, v2, v24 ; D2820002 04620426 v_floor_f32_e32 v14, v2 ; 7E1C4902 v_subrev_f32_e32 v20, v14, v2 ; 0A28050E v_mad_f32 v21, s39, v14, v25 ; D2820015 04661C27 v_add_i32_e32 v2, 8, v11 ; 4A041688 v_cvt_f32_i32_e32 v2, v2 ; 7E040B02 v_add_f32_e32 v10, 0.5, v10 ; 061414F0 v_mad_f32 v10, s38, v10, v24 ; D282000A 04621426 v_floor_f32_e32 v11, v10 ; 7E16490A v_subrev_f32_e32 v26, v11, v10 ; 0A34150B v_mad_f32 v27, s39, v11, v25 ; D282001B 04661627 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mad_f32 v2, s38, v2, v24 ; D2820002 04620426 v_floor_f32_e32 v10, v2 ; 7E144902 v_subrev_f32_e32 v33, v10, v2 ; 0A42050A v_mad_f32 v34, s39, v10, v25 ; D2820022 04661427 v_mov_b32_e32 v35, 0 ; 7E460280 v_mov_b32_e32 v28, v35 ; 7E380323 v_mov_b32_e32 v22, v35 ; 7E2C0323 image_sample_l v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[28:35], s[24:27] ; F0900F00 00C72421 image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[28:35], s[24:27] ; F0900F00 00C71C1A image_sample_l v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[28:35], s[24:27] ; F0900F00 00C72814 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v40, v17 ; 10042328 v_mul_f32_e32 v10, v41, v17 ; 10142329 v_mul_f32_e32 v11, v42, v17 ; 1016232A v_mul_f32_e32 v14, v43, v17 ; 101C232B v_mov_b32_e32 v32, 0x10001 ; 7E4002FF 00010001 v_mac_f32_e32 v2, v28, v18 ; 3E04251C v_mac_f32_e32 v10, v29, v18 ; 3E14251D v_mac_f32_e32 v11, v30, v18 ; 3E16251E v_mov_b32_e32 v40, v32 ; 7E500320 v_mov_b32_e32 v41, v33 ; 7E520321 v_mov_b32_e32 v42, v34 ; 7E540322 v_mov_b32_e32 v43, v35 ; 7E560323 v_mov_b32_e32 v44, v32 ; 7E580320 v_mov_b32_e32 v45, v33 ; 7E5A0321 v_mov_b32_e32 v46, v34 ; 7E5C0322 v_mov_b32_e32 v47, v35 ; 7E5E0323 v_mac_f32_e32 v14, v31, v18 ; 3E1C251F v_mov_b32_e32 v41, v20 ; 7E520314 v_mov_b32_e32 v45, v26 ; 7E5A031A v_mac_f32_e32 v2, v36, v19 ; 3E042724 v_mac_f32_e32 v10, v37, v19 ; 3E142725 v_mac_f32_e32 v11, v38, v19 ; 3E162726 v_mov_b32_e32 v42, v21 ; 7E540315 v_mac_f32_e32 v14, v39, v19 ; 3E1C2727 v_mov_b32_e32 v46, v27 ; 7E5C031B v_mov_b32_e32 v43, v35 ; 7E560323 image_sample_l_o v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[28:35], s[24:27] ; F0D00F00 00C71C20 image_sample_l_o v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[28:35], s[24:27] ; F0D00F00 00C72428 v_mov_b32_e32 v47, v35 ; 7E5E0323 image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[28:35], s[24:27] ; F0D00F00 00C7282C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, v40, v18 ; 101E2528 v_mul_f32_e32 v16, v41, v18 ; 10202529 v_mul_f32_e32 v22, v42, v18 ; 102C252A v_mul_f32_e32 v40, v43, v18 ; 1050252B v_mov_b32_e32 v32, 0x20002 ; 7E4002FF 00020002 image_sample_l_o v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[28:35], s[24:27] ; F0D00F00 00C72920 v_mov_b32_e32 v33, v26 ; 7E42031A v_mac_f32_e32 v15, v36, v17 ; 3E1E2324 v_mac_f32_e32 v16, v37, v17 ; 3E202325 v_mac_f32_e32 v22, v38, v17 ; 3E2C2326 v_mac_f32_e32 v40, v39, v17 ; 3E502327 v_mov_b32_e32 v34, v27 ; 7E44031B v_mac_f32_e32 v15, v28, v19 ; 3E1E271C v_mac_f32_e32 v16, v29, v19 ; 3E20271D v_mac_f32_e32 v22, v30, v19 ; 3E2C271E image_sample_l_o v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[28:35], s[24:27] ; F0D00F00 00C71A20 v_mov_b32_e32 v33, v20 ; 7E420314 v_mac_f32_e32 v40, v31, v19 ; 3E50271F v_mov_b32_e32 v34, v21 ; 7E440315 image_sample_l_o v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[28:35], s[24:27] ; F0D00F00 00C71E20 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v20, v30, v17 ; 1028231E v_mul_f32_e32 v21, v31, v17 ; 102A231F v_mul_f32_e32 v30, v32, v17 ; 103C2320 v_mul_f32_e32 v17, v33, v17 ; 10222321 v_mac_f32_e32 v20, v26, v18 ; 3E28251A v_mac_f32_e32 v21, v27, v18 ; 3E2A251B v_mac_f32_e32 v30, v28, v18 ; 3E3C251C v_mac_f32_e32 v17, v29, v18 ; 3E22251D v_mac_f32_e32 v20, v41, v19 ; 3E282729 v_mac_f32_e32 v21, v42, v19 ; 3E2A272A v_mac_f32_e32 v30, v43, v19 ; 3E3C272B v_mac_f32_e32 v17, v44, v19 ; 3E22272C v_mac_f32_e32 v0, s23, v12 ; 3E001817 v_mov_b32_e32 v12, s36 ; 7E180224 v_mac_f32_e32 v12, s22, v13 ; 3E181A16 v_mov_b32_e32 v26, v35 ; 7E340323 image_sample_l v13, 4, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[28:35], s[24:27] ; F0900400 00C70D18 v_mov_b32_e32 v23, 0x30003 ; 7E2E02FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v8, v13, v8 ; 1010110D v_mul_f32_e32 v9, v13, v9 ; 1012130D v_mov_b32_e32 v26, v35 ; 7E340323 image_sample_l_o v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[28:35], s[24:27] ; F0D00F00 00C71717 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_add_f32_e64 v1, |v1|, v4 ; D2060101 00020901 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 v_sub_f32_e64 v3, |v3|, v4 ; D2080103 00020903 v_add_f32_e32 v3, v5, v3 ; 06060705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v13, v5, |v3| ; D208020D 00020705 v_mad_f32 v13, v13, v6, 1.0 ; D282000D 03CA0D0D v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v18, 0, 1.0, vcc ; D2000012 01A9E480 v_sub_f32_e64 v19, v18, |v1| ; D2080213 00020312 v_mac_f32_e32 v13, v6, v19 ; 3E1A2706 v_sub_f32_e64 v3, |v3|, v5 ; D2080103 00020B03 v_sub_f32_e64 v1, |v1|, v18 ; D2080101 00022501 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mac_f32_e32 v6, v13, v13 ; 3E0C1B0D v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 exp 15, 32, 0, 0, 0, v35, v35, v35, v35 ; F800020F 23232323 exp 15, 33, 0, 0, 0, v0, v12, v35, v35 ; F800021F 23230C00 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mad_f32 v0, -2.0, v5, 1.0 ; D2820000 03CA0AF5 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mad_f32 v3, -2.0, v18, 1.0 ; D2820003 03CA24F5 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v3, v10, v1 ; 1006030A v_mac_f32_e32 v3, v2, v0 ; 3E060102 v_mul_f32_e32 v5, v10, v8 ; 100A110A v_mac_f32_e32 v5, v2, v7 ; 3E0A0F02 v_mul_f32_e32 v2, v16, v1 ; 10040310 v_mac_f32_e32 v2, v15, v0 ; 3E04010F v_mul_f32_e32 v10, v16, v8 ; 10141110 v_mac_f32_e32 v10, v15, v7 ; 3E140F0F v_mul_f32_e32 v1, v21, v1 ; 10020315 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v1, v20, v0 ; 3E020114 v_mac_f32_e32 v8, v20, v7 ; 3E100F14 v_mul_f32_e32 v0, v6, v13 ; 10001B06 v_mad_f32 v4, -2.0, v4, 1.0 ; D2820004 03CA08F5 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mac_f32_e32 v3, v11, v0 ; 3E06010B v_mac_f32_e32 v5, v11, v9 ; 3E0A130B v_mac_f32_e32 v2, v22, v0 ; 3E040116 v_mac_f32_e32 v10, v22, v9 ; 3E141316 v_mac_f32_e32 v1, v30, v0 ; 3E02011E v_mac_f32_e32 v8, v30, v9 ; 3E10131E v_add_f32_e32 v0, v14, v5 ; 06000B0E v_add_f32_e32 v4, v40, v10 ; 06081528 v_subrev_f32_e32 v5, s20, v0 ; 0A0A0014 v_mul_f32_e32 v5, s21, v5 ; 100A0A15 v_subrev_f32_e32 v6, s19, v4 ; 0A0C0813 v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10 v_add_f32_e32 v6, v17, v8 ; 060C1111 v_mul_f32_e32 v7, v3, v3 ; 100E0703 v_mac_f32_e32 v7, v2, v2 ; 3E0E0502 v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_subrev_f32_e32 v8, s15, v6 ; 0A100C0F v_mac_f32_e32 v5, s8, v8 ; 3E0A1008 exp 15, 34, 0, 0, 0, v0, v4, v6, v5 ; F800022F 05060400 v_mul_f32_e32 v3, v7, v3 ; 10060707 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v1, v7, v1 ; 10020307 exp 15, 35, 0, 0, 0, v3, v2, v1, v35 ; F800023F 23010203 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s9, v4 ; 10020809 v_mul_f32_e32 v2, s17, v4 ; 10040811 v_mul_f32_e32 v3, s4, v4 ; 10060804 v_mul_f32_e32 v4, s3, v4 ; 10080803 v_mac_f32_e32 v1, s10, v0 ; 3E02000A v_mac_f32_e32 v2, s5, v0 ; 3E040005 v_mac_f32_e32 v3, s6, v0 ; 3E060006 v_mac_f32_e32 v4, s7, v0 ; 3E080007 v_mac_f32_e32 v1, s18, v6 ; 3E020C12 v_mac_f32_e32 v2, s1, v6 ; 3E040C01 v_mac_f32_e32 v3, s2, v6 ; 3E060C02 v_mac_f32_e32 v4, s11, v6 ; 3E080C0B v_add_f32_e32 v0, s0, v1 ; 06000200 v_add_f32_e32 v1, s12, v2 ; 0602040C v_add_f32_e32 v2, s13, v3 ; 0604060D v_add_f32_e32 v3, s14, v4 ; 0606080E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v23, v24, v25, v26 ; F800024F 1A191817 exp 15, 37, 0, 0, 0, v35, v35, v35, v35 ; F800025F 23232323 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v35, v35, v35, v35 ; F80008DF 23232323 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 48 Code Size: 1276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 3: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz 4: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 5: RSQ TEMP[2].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 7: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 8: MOV TEMP[3].w, IMM[1].yyyy 9: MOV TEMP[3].x, TEMP[2].xxxx 10: MOV TEMP[3].y, TEMP[2].yyyy 11: MOV TEMP[3].z, TEMP[2].zzzz 12: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1][0].yyyy 13: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz 14: KILL_IF -TEMP[0].xxxx 15: MOV TEMP[0].w, IMM[1].yyyy 16: MOV TEMP[0].x, TEMP[1].xxxx 17: MOV TEMP[0].y, TEMP[1].yyyy 18: MOV TEMP[0].z, TEMP[1].zzzz 19: MOV OUT[2], IN[1].wwww 20: MOV OUT[0], TEMP[0] 21: MOV OUT[3], TEMP[3] 22: MOV OUT[1], IMM[1].yyyy 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %41 = bitcast float %32 to i32 %42 = bitcast float %33 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %29, <16 x i8> %31, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %25, %46 %51 = fmul float %26, %47 %52 = fmul float %27, %48 %53 = fmul float %50, %38 %54 = fmul float %51, %39 %55 = fmul float %52, %40 %56 = fmul float %35, %35 %57 = fmul float %36, %36 %58 = fadd float %57, %56 %59 = fmul float %37, %37 %60 = fadd float %58, %59 %61 = call float @llvm.AMDGPU.rsq.clamped.f32(float %60) %62 = fmul float %35, %61 %63 = fmul float %36, %61 %64 = fmul float %37, %61 %65 = fmul float %62, 5.000000e-01 %66 = fadd float %65, 5.000000e-01 %67 = fmul float %63, 5.000000e-01 %68 = fadd float %67, 5.000000e-01 %69 = fmul float %64, 5.000000e-01 %70 = fadd float %69, 5.000000e-01 %71 = fcmp olt float %49, %24 %72 = select i1 %71, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %72) %73 = call i32 @llvm.SI.packf16(float %53, float %54) %74 = bitcast i32 %73 to float %75 = call i32 @llvm.SI.packf16(float %55, float 0.000000e+00) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %74, float %76, float %74, float %76) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %78, float %80, float %78, float %80) %81 = call i32 @llvm.SI.packf16(float %66, float %68) %82 = bitcast i32 %81 to float %83 = call i32 @llvm.SI.packf16(float %70, float 0.000000e+00) %84 = bitcast i32 %83 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %34, float %34, float %34, float %34) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %82, float %84, float %82, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s9, v10 ; 10021409 v_mul_f32_e32 v2, s10, v11 ; 1004160A v_mul_f32_e32 v3, s0, v12 ; 10061800 v_cmp_gt_f32_e32 vcc, s8, v13 ; 7C081A08 v_mul_f32_e32 v10, v5, v5 ; 10140B05 v_mac_f32_e32 v10, v6, v6 ; 3E140D06 v_mac_f32_e32 v10, v7, v7 ; 3E140F07 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v3, v10, v5 ; 10060B0A v_mul_f32_e32 v5, v10, v6 ; 100A0D0A v_mul_f32_e32 v6, v10, v7 ; 100C0F0A v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_cndmask_b32_e64 v7, 0, -1.0, vcc ; D2000007 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v7 ; 7C260E80 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 1, 1, 0, 0, v0, v0, v0, v0 ; F800041F 00000000 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 255.0000, -128.0000} IMM[1] UINT32 {3, 400, 304, 320} IMM[2] INT32 {2, 8, 1, 0} IMM[3] FLT32 { 1.0000, -64.0000, 0.0159, 2.0000} IMM[4] UINT32 {4, 0, 12, 28} IMM[5] FLT32 { 16.0000, 0.0000, 0.0000, 0.0000} IMM[6] UINT32 {44, 60, 24, 32} IMM[7] INT32 {3, 0, 0, 0} IMM[8] UINT32 {16, 48, 20, 36} IMM[9] UINT32 {52, 8, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].y, IMM[0].xxxx 4: SHL TEMP[2].x, IN[3].xxxx, IMM[2].xxxx 5: UADD TEMP[2].x, TEMP[2].xxxx, IMM[2].yyyy 6: I2F TEMP[2].x, TEMP[2].xxxx 7: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 8: MUL TEMP[0].x, TEMP[2].xxxx, CONST[4][25].zzzz 9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[4].xyyy 10: FLR TEMP[2].x, TEMP[0].xxxx 11: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[2].xxxx 12: MAD TEMP[0].x, TEMP[2].xxxx, CONST[4][25].wwww, TEMP[0].yyyy 13: MOV TEMP[3].y, TEMP[0].xxxx 14: MOV TEMP[0].xy, TEMP[3].xyyy 15: MOV TEMP[0].w, IMM[0].xxxx 16: TXL TEMP[0], TEMP[0], SAMP[0], 2D 17: MOV TEMP[2].xy, TEMP[3].xyyy 18: MOV TEMP[2].w, IMM[0].xxxx 19: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[2].zwz 20: MOV TEMP[3].xy, TEMP[3].xyyy 21: MOV TEMP[3].w, IMM[0].xxxx 22: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[2].xwx 23: MAD TEMP[4], IN[1], IMM[0].zzzz, IMM[0].wwww 24: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 25: AND TEMP[5], TEMP[5], IMM[3].xxxx 26: ABS TEMP[4], TEMP[4] 27: ADD TEMP[4], TEMP[4], -TEMP[5] 28: ADD TEMP[4], TEMP[4], IMM[3].yyyy 29: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 30: AND TEMP[6], TEMP[6], IMM[3].xxxx 31: ABS TEMP[4], TEMP[4] 32: ADD TEMP[4], TEMP[4], -TEMP[6] 33: MUL TEMP[4].xy, TEMP[4], IMM[3].zzzz 34: MOV TEMP[7].x, TEMP[4].xxxx 35: MOV TEMP[7].y, TEMP[4].yyyy 36: ADD TEMP[8].x, IMM[3].xxxx, -TEMP[4].xxxx 37: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 38: MOV TEMP[7].z, TEMP[4].xxxx 39: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 40: RSQ TEMP[4].x, TEMP[4].xxxx 41: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 42: MUL TEMP[6], TEMP[6], IMM[3].wwww 43: ADD TEMP[6].xy, IMM[3].xxxx, -TEMP[6] 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 45: MOV TEMP[7].w, IMM[0].xxxx 46: MOV TEMP[7].x, TEMP[6].xxxx 47: MOV TEMP[7].y, TEMP[6].yyyy 48: MUL TEMP[5].x, TEMP[5].xxxx, IMM[3].wwww 49: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx 50: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 51: MOV TEMP[7].z, TEMP[4].xxxx 52: DP4 TEMP[4].x, TEMP[7], TEMP[0] 53: DP4 TEMP[5].x, TEMP[7], TEMP[2] 54: MOV TEMP[4].y, TEMP[5].xxxx 55: DP4 TEMP[5].x, TEMP[7], TEMP[3] 56: MOV TEMP[4].z, TEMP[5].xxxx 57: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 58: RSQ TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 60: MOV TEMP[5].xy, IN[4].xyyy 61: MOV TEMP[5].w, IMM[0].xxxx 62: TXL TEMP[5].z, TEMP[5], SAMP[0], 2D 63: MUL TEMP[5].xyz, IN[0].xyzz, TEMP[5].zzzz 64: MOV TEMP[6].w, IMM[3].xxxx 65: MOV TEMP[6].x, TEMP[5].xxxx 66: MOV TEMP[6].y, TEMP[5].yyyy 67: MOV TEMP[6].z, TEMP[5].zzzz 68: DP4 TEMP[0].x, TEMP[6], TEMP[0] 69: DP4 TEMP[2].x, TEMP[6], TEMP[2] 70: DP4 TEMP[3].x, TEMP[6], TEMP[3] 71: MOV TEMP[5].x, TEMP[0].xxxx 72: MOV TEMP[5].y, TEMP[2].xxxx 73: MOV TEMP[5].z, TEMP[3].xxxx 74: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 75: MOV TEMP[6].x, TEMP[0].xxxx 76: MOV TEMP[6].y, TEMP[2].xxxx 77: MOV TEMP[6].z, TEMP[3].xxxx 78: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 79: MOV TEMP[6].w, TEMP[7].xxxx 80: MOV TEMP[7].x, TEMP[4].xxxx 81: MOV TEMP[7].y, TEMP[4].yyyy 82: MOV TEMP[7].z, TEMP[4].zzzz 83: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 84: RSQ TEMP[8].x, TEMP[8].xxxx 85: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 86: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 87: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 88: MUL TEMP[4].xyz, IMM[3].wwww, TEMP[4].xyzz 89: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 90: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 91: MOV_SAT TEMP[4].x, TEMP[4].xxxx 92: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].xxxx 93: MOV_SAT TEMP[4].x, TEMP[4].xxxx 94: MOV TEMP[7].w, TEMP[4].xxxx 95: MOV TEMP[4].w, IMM[3].xxxx 96: MOV TEMP[4].x, TEMP[0].xxxx 97: MOV TEMP[4].y, TEMP[2].xxxx 98: MOV TEMP[4].z, TEMP[3].xxxx 99: MOV TEMP[0].x, CONST[4][0].wwww 100: MOV TEMP[0].y, CONST[4][1].wwww 101: MOV TEMP[0].z, CONST[4][2].wwww 102: MOV TEMP[0].w, CONST[4][3].wwww 103: DP4 TEMP[0].x, TEMP[4], TEMP[0] 104: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 105: MOV TEMP[3].xy, IN[4].xyyy 106: MOV TEMP[3].w, IMM[0].xxxx 107: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[7].xyx 108: MOV TEMP[5].x, CONST[4][0].xxxx 109: MOV TEMP[5].y, CONST[4][1].xxxx 110: MOV TEMP[5].z, CONST[4][2].xxxx 111: MOV TEMP[5].w, CONST[4][3].xxxx 112: DP4 TEMP[5].x, TEMP[4], TEMP[5] 113: MOV TEMP[8].x, CONST[4][0].yyyy 114: MOV TEMP[8].y, CONST[4][1].yyyy 115: MOV TEMP[8].z, CONST[4][2].yyyy 116: MOV TEMP[8].w, CONST[4][3].yyyy 117: DP4 TEMP[8].x, TEMP[4], TEMP[8] 118: MOV TEMP[5].y, -TEMP[8].xxxx 119: MOV TEMP[8].x, CONST[4][0].zzzz 120: MOV TEMP[8].y, CONST[4][1].zzzz 121: MOV TEMP[8].z, CONST[4][2].zzzz 122: MOV TEMP[8].w, CONST[4][3].zzzz 123: DP4 TEMP[4].x, TEMP[4], TEMP[8] 124: MAD TEMP[4].x, IMM[3].wwww, TEMP[4].xxxx, -TEMP[0].xxxx 125: MOV TEMP[5].z, TEMP[4].xxxx 126: MOV TEMP[5].w, TEMP[0].xxxx 127: MOV OUT[1], TEMP[1] 128: MOV OUT[2].xy, TEMP[2].xyxx 129: MOV OUT[4], TEMP[7] 130: MOV OUT[6], IMM[0].xxxx 131: MOV OUT[5], TEMP[3] 132: MOV OUT[0], TEMP[5] 133: MOV OUT[3], TEMP[6] 134: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 408) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 412) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0 %50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = add i32 %5, %7 %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = extractelement <4 x float> %62, i32 2 %66 = extractelement <4 x float> %62, i32 3 %67 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0 %69 = add i32 %5, %7 %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %69) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0 %75 = add i32 %5, %7 %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75) %77 = extractelement <4 x float> %76, i32 0 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %10, %6 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = bitcast float %77 to i32 %85 = shl i32 %84, 2 %86 = add i32 %85, 8 %87 = sitofp i32 %86 to float %88 = fadd float %87, 5.000000e-01 %89 = fmul float %88, %41 %90 = fadd float %89, %82 %91 = fadd float %83, 0.000000e+00 %92 = call float @floor(float %90) %93 = fsub float %90, %92 %94 = fmul float %92, %42 %95 = fadd float %94, %91 %96 = bitcast float %93 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <4 x i32> undef, i32 %96, i32 0 %99 = insertelement <4 x i32> %98, i32 %97, i32 1 %100 = insertelement <4 x i32> %99, i32 0, i32 2 %101 = bitcast <8 x i32> %49 to <32 x i8> %102 = bitcast <4 x i32> %51 to <16 x i8> %103 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = extractelement <4 x float> %103, i32 2 %107 = extractelement <4 x float> %103, i32 3 %108 = bitcast float %93 to i32 %109 = bitcast float %95 to i32 %110 = insertelement <4 x i32> , i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 %109, i32 2 %112 = insertelement <4 x i32> %111, i32 0, i32 3 %113 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %112, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = bitcast float %93 to i32 %119 = bitcast float %95 to i32 %120 = insertelement <4 x i32> , i32 %118, i32 1 %121 = insertelement <4 x i32> %120, i32 %119, i32 2 %122 = insertelement <4 x i32> %121, i32 0, i32 3 %123 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %122, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 1 %126 = extractelement <4 x float> %123, i32 2 %127 = extractelement <4 x float> %123, i32 3 %128 = fmul float %63, 2.550000e+02 %129 = fadd float %128, -1.280000e+02 %130 = fmul float %64, 2.550000e+02 %131 = fadd float %130, -1.280000e+02 %132 = fmul float %65, 2.550000e+02 %133 = fadd float %132, -1.280000e+02 %134 = fmul float %66, 2.550000e+02 %135 = fadd float %134, -1.280000e+02 %136 = fcmp olt float %129, 0.000000e+00 %137 = fcmp olt float %131, 0.000000e+00 %138 = fcmp olt float %133, 0.000000e+00 %139 = fcmp olt float %135, 0.000000e+00 %140 = select i1 %136, float 1.000000e+00, float 0.000000e+00 %141 = call float @fabs(float %129) %142 = call float @fabs(float %131) %143 = call float @fabs(float %133) %144 = call float @fabs(float %135) %145 = fsub float %141, %140 %146 = select i1 %137, float -1.000000e+00, float -0.000000e+00 %147 = fadd float %142, %146 %148 = select i1 %138, float -1.000000e+00, float -0.000000e+00 %149 = fadd float %143, %148 %150 = select i1 %139, float -1.000000e+00, float -0.000000e+00 %151 = fadd float %144, %150 %152 = fadd float %145, -6.400000e+01 %153 = fadd float %147, -6.400000e+01 %154 = fadd float %149, -6.400000e+01 %155 = fadd float %151, -6.400000e+01 %156 = fcmp olt float %152, 0.000000e+00 %157 = fcmp olt float %153, 0.000000e+00 %158 = select i1 %156, float 1.000000e+00, float 0.000000e+00 %159 = select i1 %157, float 1.000000e+00, float 0.000000e+00 %160 = call float @fabs(float %152) %161 = call float @fabs(float %153) %162 = call float @fabs(float %154) %163 = call float @fabs(float %155) %164 = fsub float %160, %158 %165 = fsub float %161, %159 %166 = fmul float %164, 0x3F90410420000000 %167 = fmul float %165, 0x3F90410420000000 %168 = fsub float 1.000000e+00, %166 %169 = fsub float %168, %167 %170 = fmul float %166, %166 %171 = fmul float %167, %167 %172 = fadd float %171, %170 %173 = fmul float %169, %169 %174 = fadd float %172, %173 %175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174) %176 = fmul float %166, %175 %177 = fmul float %167, %175 %178 = fmul float %169, %175 %179 = fmul float %158, 2.000000e+00 %180 = fmul float %159, 2.000000e+00 %181 = fsub float 1.000000e+00, %179 %182 = fsub float 1.000000e+00, %180 %183 = fmul float %176, %181 %184 = fmul float %177, %182 %185 = fmul float %140, 2.000000e+00 %186 = fsub float 1.000000e+00, %185 %187 = fmul float %186, %178 %188 = fmul float %183, %104 %189 = fmul float %184, %105 %190 = fadd float %188, %189 %191 = fmul float %187, %106 %192 = fadd float %190, %191 %193 = fmul float %107, 0.000000e+00 %194 = fadd float %192, %193 %195 = fmul float %183, %114 %196 = fmul float %184, %115 %197 = fadd float %195, %196 %198 = fmul float %187, %116 %199 = fadd float %197, %198 %200 = fmul float %117, 0.000000e+00 %201 = fadd float %199, %200 %202 = fmul float %183, %124 %203 = fmul float %184, %125 %204 = fadd float %202, %203 %205 = fmul float %187, %126 %206 = fadd float %204, %205 %207 = fmul float %127, 0.000000e+00 %208 = fadd float %206, %207 %209 = fmul float %194, %194 %210 = fmul float %201, %201 %211 = fadd float %210, %209 %212 = fmul float %208, %208 %213 = fadd float %211, %212 %214 = call float @llvm.AMDGPU.rsq.clamped.f32(float %213) %215 = fmul float %194, %214 %216 = fmul float %201, %214 %217 = fmul float %208, %214 %218 = bitcast float %82 to i32 %219 = bitcast float %83 to i32 %220 = insertelement <4 x i32> undef, i32 %218, i32 0 %221 = insertelement <4 x i32> %220, i32 %219, i32 1 %222 = insertelement <4 x i32> %221, i32 0, i32 2 %223 = bitcast <8 x i32> %49 to <32 x i8> %224 = bitcast <4 x i32> %51 to <16 x i8> %225 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %222, <32 x i8> %223, <16 x i8> %224, i32 2) %226 = extractelement <4 x float> %225, i32 2 %227 = fmul float %56, %226 %228 = fmul float %57, %226 %229 = fmul float %58, %226 %230 = fmul float %227, %104 %231 = fmul float %228, %105 %232 = fadd float %230, %231 %233 = fmul float %229, %106 %234 = fadd float %232, %233 %235 = fadd float %234, %107 %236 = fmul float %227, %114 %237 = fmul float %228, %115 %238 = fadd float %236, %237 %239 = fmul float %229, %116 %240 = fadd float %238, %239 %241 = fadd float %240, %117 %242 = fmul float %227, %124 %243 = fmul float %228, %125 %244 = fadd float %242, %243 %245 = fmul float %229, %126 %246 = fadd float %244, %245 %247 = fadd float %246, %127 %248 = fsub float %235, %35 %249 = fsub float %241, %36 %250 = fsub float %247, %37 %251 = fmul float %38, %248 %252 = fmul float %39, %249 %253 = fadd float %252, %251 %254 = fmul float %40, %250 %255 = fadd float %253, %254 %256 = fmul float %248, %248 %257 = fmul float %249, %249 %258 = fadd float %257, %256 %259 = fmul float %250, %250 %260 = fadd float %258, %259 %261 = call float @llvm.AMDGPU.rsq.clamped.f32(float %260) %262 = fmul float %248, %261 %263 = fmul float %249, %261 %264 = fmul float %250, %261 %265 = fmul float %215, %45 %266 = fmul float %216, %46 %267 = fadd float %266, %265 %268 = fmul float %217, %47 %269 = fadd float %267, %268 %270 = fmul float %269, %215 %271 = fmul float %269, %216 %272 = fmul float %269, %217 %273 = fmul float %270, 2.000000e+00 %274 = fmul float %271, 2.000000e+00 %275 = fmul float %272, 2.000000e+00 %276 = fsub float %45, %273 %277 = fsub float %46, %274 %278 = fsub float %47, %275 %279 = fmul float %262, %276 %280 = fsub float -0.000000e+00, %279 %281 = fmul float %263, %277 %282 = fsub float %280, %281 %283 = fmul float %264, %278 %284 = fsub float %282, %283 %285 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %286 = call float @llvm.pow.f32(float %285, float 1.600000e+01) %287 = call float @llvm.AMDIL.clamp.(float %286, float 0.000000e+00, float 1.000000e+00) %288 = fmul float %235, %22 %289 = fmul float %241, %26 %290 = fadd float %288, %289 %291 = fmul float %247, %30 %292 = fadd float %290, %291 %293 = fadd float %292, %34 %294 = fmul float %71, %13 %295 = fadd float %294, %15 %296 = fmul float %72, %14 %297 = fadd float %296, %16 %298 = bitcast float %82 to i32 %299 = bitcast float %83 to i32 %300 = insertelement <4 x i32> , i32 %298, i32 1 %301 = insertelement <4 x i32> %300, i32 %299, i32 2 %302 = insertelement <4 x i32> %301, i32 0, i32 3 %303 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %302, <8 x i32> %49, <4 x i32> %51, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %304 = extractelement <4 x float> %303, i32 0 %305 = extractelement <4 x float> %303, i32 1 %306 = extractelement <4 x float> %303, i32 2 %307 = extractelement <4 x float> %303, i32 3 %308 = fmul float %235, %19 %309 = fmul float %241, %23 %310 = fadd float %308, %309 %311 = fmul float %247, %27 %312 = fadd float %310, %311 %313 = fadd float %312, %31 %314 = fmul float %235, %20 %315 = fmul float %241, %24 %316 = fadd float %314, %315 %317 = fmul float %247, %28 %318 = fadd float %316, %317 %319 = fadd float %318, %32 %320 = fsub float -0.000000e+00, %319 %321 = fmul float %235, %21 %322 = fmul float %241, %25 %323 = fadd float %321, %322 %324 = fmul float %247, %29 %325 = fadd float %323, %324 %326 = fadd float %325, %33 %327 = fmul float %326, 2.000000e+00 %328 = fsub float %327, %293 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %295, float %297, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %241, float %247, float %255) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %215, float %216, float %217, float %287) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %304, float %305, float %306, float %307) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %313, float %320, float %328, float %293) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[44:47], s[2:3], 0x4 ; C0960304 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[14:17], v0, s[24:27], 0 idxen ; E00C2000 80060E00 buffer_load_format_xyzw v[19:22], v3, s[40:43], 0 idxen ; E00C2000 800A1303 s_buffer_load_dword s41, s[44:47], 0x6 ; C214AD06 s_buffer_load_dword s40, s[44:47], 0x7 ; C2142D07 s_buffer_load_dword s43, s[44:47], 0x8 ; C215AD08 s_buffer_load_dword s42, s[44:47], 0x9 ; C2152D09 s_buffer_load_dword s12, s[8:11], 0x51 ; C2060951 s_buffer_load_dword s13, s[8:11], 0x52 ; C2068952 s_buffer_load_dword s27, s[8:11], 0x66 ; C20D8966 s_buffer_load_dword s26, s[8:11], 0x67 ; C20D0967 s_buffer_load_dword s0, s[8:11], 0xf ; C200090F s_buffer_load_dword s25, s[8:11], 0x4c ; C20C894C s_buffer_load_dword s23, s[8:11], 0x4d ; C20B894D s_buffer_load_dword s24, s[8:11], 0x4e ; C20C094E s_buffer_load_dword s19, s[8:11], 0x50 ; C2098950 s_buffer_load_dword s22, s[48:51], 0x0 ; C20B3100 s_buffer_load_dword s21, s[48:51], 0x1 ; C20AB101 s_buffer_load_dword s20, s[48:51], 0x2 ; C20A3102 s_buffer_load_dword s3, s[8:11], 0x5 ; C2018905 s_buffer_load_dword s4, s[8:11], 0x6 ; C2020906 s_buffer_load_dword s14, s[8:11], 0x7 ; C2070907 s_buffer_load_dword s2, s[8:11], 0x8 ; C2010908 s_buffer_load_dword s1, s[8:11], 0x9 ; C2008909 s_buffer_load_dword s5, s[8:11], 0x0 ; C2028900 s_buffer_load_dword s6, s[8:11], 0x1 ; C2030901 s_buffer_load_dword s7, s[8:11], 0x2 ; C2038902 s_buffer_load_dword s15, s[8:11], 0x3 ; C2078903 s_buffer_load_dword s17, s[8:11], 0x4 ; C2088904 s_buffer_load_dword s16, s[8:11], 0xa ; C208090A s_buffer_load_dword s18, s[8:11], 0xb ; C209090B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s43 ; 7E00022B v_mad_f32 v3, v2, v10, v1 ; D2820003 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s41, v12 ; 3E001829 v_mov_b32_e32 v2, s42 ; 7E04022A v_mac_f32_e32 v2, s40, v13 ; 3E041A28 v_lshlrev_b32_e32 v10, 2, v14 ; 34141C82 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v4, v4, -1.0, vcc ; D2000004 01A9E704 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_add_f32_e64 v1, |v1|, v4 ; D2060101 00020901 v_sub_f32_e64 v3, |v3|, v11 ; D2080103 00021703 v_add_f32_e32 v3, v5, v3 ; 06060705 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e64 v4, 0, 1.0, vcc ; D2000004 01A9E480 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_add_i32_e32 v10, 8, v10 ; 4A141488 v_cvt_f32_i32_e32 v10, v10 ; 7E140B0A v_sub_f32_e64 v12, v4, |v3| ; D208020C 00020704 v_mad_f32 v12, v12, v6, 1.0 ; D282000C 03CA0D0C v_sub_f32_e64 v13, v5, |v1| ; D208020D 00020305 v_mac_f32_e32 v12, v6, v13 ; 3E181B06 v_add_f32_e32 v10, 0.5, v10 ; 061414F0 v_mad_f32 v10, s27, v10, v19 ; D282000A 044E141B v_sub_f32_e64 v3, |v3|, v4 ; D2080103 00020903 v_sub_f32_e64 v1, |v1|, v5 ; D2080101 00020B01 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_floor_f32_e32 v6, v10 ; 7E0C490A v_subrev_f32_e32 v26, v6, v10 ; 0A341506 v_mad_f32 v27, s26, v6, v20 ; D282001B 04520C1A v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v1, v1 ; 3E0C0301 v_mov_b32_e32 v28, 0 ; 7E380280 v_mac_f32_e32 v6, v12, v12 ; 3E0C190C image_sample_l v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[32:39], s[28:31] ; F0900F00 00E80D1A v_mov_b32_e32 v25, 0x10001 ; 7E3202FF 00010001 image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[28:31] ; F0D00F00 00E81D19 v_mov_b32_e32 v25, 0x20002 ; 7E3202FF 00020002 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_mov_b32_e32 v21, v28 ; 7E2A031C image_sample_l_o v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[28:31] ; F0D00F00 00E81619 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v10, 4, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[32:39], s[28:31] ; F0900400 00E80A13 v_mov_b32_e32 v18, 0x30003 ; 7E2402FF 00030003 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mov_b32_e32 v21, v28 ; 7E2A031C image_sample_l_o v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[32:39], s[28:31] ; F0D00F00 00E81112 v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mul_f32_e32 v6, v6, v12 ; 100C1906 v_mad_f32 v4, -2.0, v4, 1.0 ; D2820004 03CA08F5 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mad_f32 v4, -2.0, v5, 1.0 ; D2820004 03CA0AF5 v_mul_f32_e32 v1, v4, v1 ; 10020304 v_mad_f32 v4, -2.0, v11, 1.0 ; D2820004 03CA16F5 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v5, v14, v8 ; 100A110E v_mac_f32_e32 v5, v13, v7 ; 3E0A0F0D v_mac_f32_e32 v5, v15, v9 ; 3E0A130F v_mul_f32_e32 v6, v14, v1 ; 100C030E v_mac_f32_e32 v6, v13, v3 ; 3E0C070D v_add_f32_e32 v5, v16, v5 ; 060A0B10 v_mac_f32_e32 v6, v15, v4 ; 3E0C090F v_mul_f32_e32 v10, v30, v8 ; 1014111E v_mac_f32_e32 v10, v29, v7 ; 3E140F1D v_mac_f32_e32 v10, v31, v9 ; 3E14131F v_mul_f32_e32 v11, v30, v1 ; 1016031E v_mac_f32_e32 v11, v29, v3 ; 3E16071D v_add_f32_e32 v10, v32, v10 ; 06141520 v_mac_f32_e32 v11, v31, v4 ; 3E16091F v_mul_f32_e32 v8, v23, v8 ; 10101117 v_mac_f32_e32 v8, v22, v7 ; 3E100F16 v_mac_f32_e32 v8, v24, v9 ; 3E101318 v_mul_f32_e32 v1, v23, v1 ; 10020317 v_mac_f32_e32 v1, v22, v3 ; 3E020716 v_mac_f32_e32 v1, v24, v4 ; 3E020918 v_mul_f32_e32 v3, v6, v6 ; 10060D06 v_mac_f32_e32 v3, v11, v11 ; 3E06170B v_mac_f32_e32 v3, v1, v1 ; 3E060301 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_add_f32_e32 v4, v25, v8 ; 06081119 exp 15, 32, 0, 0, 0, v28, v28, v28, v28 ; F800020F 1C1C1C1C exp 15, 33, 0, 0, 0, v0, v2, v28, v28 ; F800021F 1C1C0200 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v3, v6 ; 10000D03 v_mul_f32_e32 v2, v3, v11 ; 10041703 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_subrev_f32_e32 v3, s25, v5 ; 0A060A19 v_subrev_f32_e32 v6, s23, v10 ; 0A0C1417 v_subrev_f32_e32 v7, s24, v4 ; 0A0E0818 v_mul_f32_e32 v8, v3, v3 ; 10100703 v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v9, s22, v0 ; 10120016 v_mac_f32_e32 v9, s21, v2 ; 3E120415 v_mac_f32_e32 v9, s20, v1 ; 3E120214 v_mul_f32_e32 v11, v0, v9 ; 10161300 v_mad_f32 v11, 2.0, v11, -s22 ; D282000B 805A16F4 v_mul_f32_e32 v12, v8, v3 ; 10180708 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v2, v9 ; 10181302 v_mad_f32 v12, 2.0, v12, -s21 ; D282000C 805618F4 v_mul_f32_e32 v13, v8, v6 ; 101A0D08 v_mac_f32_e32 v11, v12, v13 ; 3E161B0C v_mul_f32_e32 v9, v1, v9 ; 10121301 v_mad_f32 v9, 2.0, v9, -s20 ; D2820009 805212F4 v_mul_f32_e32 v8, v8, v7 ; 10100F08 v_mac_f32_e32 v11, v9, v8 ; 3E161109 v_mul_f32_e32 v3, s19, v3 ; 10060613 v_add_f32_e64 v8, 0, v11 clamp ; D2060808 00021680 v_log_f32_e32 v8, v8 ; 7E104F08 v_mac_f32_e32 v3, s12, v6 ; 3E060C0C v_mac_f32_e32 v3, s13, v7 ; 3E060E0D exp 15, 34, 0, 0, 0, v5, v10, v4, v3 ; F800022F 03040A05 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v3, 0x41800000, v8 ; 0E0610FF 41800000 v_exp_f32_e32 v3, v3 ; 7E064B03 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 exp 15, 35, 0, 0, 0, v0, v2, v1, v3 ; F800023F 03010200 s_buffer_load_dword s12, s[8:11], 0xc ; C206090C s_buffer_load_dword s13, s[8:11], 0xd ; C206890D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s14, v10 ; 1000140E v_mul_f32_e32 v1, s17, v10 ; 10021411 v_mul_f32_e32 v2, s3, v10 ; 10041403 v_mul_f32_e32 v3, s4, v10 ; 10061404 v_mac_f32_e32 v0, s15, v5 ; 3E000A0F v_mac_f32_e32 v1, s5, v5 ; 3E020A05 v_mac_f32_e32 v2, s6, v5 ; 3E040A06 v_mac_f32_e32 v3, s7, v5 ; 3E060A07 v_mac_f32_e32 v0, s18, v4 ; 3E000812 v_mac_f32_e32 v1, s2, v4 ; 3E020802 v_mac_f32_e32 v2, s1, v4 ; 3E040801 v_mac_f32_e32 v3, s16, v4 ; 3E060810 v_add_f32_e32 v0, s0, v0 ; 06000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v2, s13, v2 ; 0604040D v_add_f32_e32 v3, s8, v3 ; 06060608 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v17, v18, v19, v20 ; F800024F 14131211 exp 15, 37, 0, 0, 0, v28, v28, v28, v28 ; F800025F 1C1C1C1C exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v28, v28, v28, v28 ; F80008DF 1C1C1C1C s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 36 Code Size: 1044 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 48, 44} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0].xyz, TEMP[1], IN[3] 5: MOV TEMP[1].xy, IN[0].xyyy 6: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 7: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 10: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 11: MOV TEMP[3].w, IMM[1].yyyy 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MOV TEMP[3].z, TEMP[2].zzzz 15: MOV TEMP[2].w, IMM[1].yyyy 16: MOV TEMP[2].x, TEMP[0].xxxx 17: MOV TEMP[2].y, TEMP[0].yyyy 18: MOV TEMP[2].z, TEMP[0].zzzz 19: MOV TEMP[0].w, IMM[1].yyyy 20: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 21: MOV TEMP[0].y, TEMP[1].yyyy 22: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 23: MOV TEMP[0].z, TEMP[1].xxxx 24: MOV OUT[2], IN[1].wwww 25: MOV OUT[0], TEMP[2] 26: MOV OUT[3], TEMP[3] 27: MOV OUT[1], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0 %31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)* %35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0 %36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)* %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %30, <16 x i8> %32, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = extractelement <4 x float> %52, i32 1 %55 = extractelement <4 x float> %52, i32 2 %56 = fmul float %24, %53 %57 = fmul float %25, %54 %58 = fmul float %26, %55 %59 = fmul float %56, %45 %60 = fmul float %57, %46 %61 = fmul float %58, %47 %62 = bitcast float %39 to i32 %63 = bitcast float %40 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %35, <16 x i8> %38, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = fmul float %42, %42 %71 = fmul float %43, %43 %72 = fadd float %71, %70 %73 = fmul float %44, %44 %74 = fadd float %72, %73 %75 = call float @llvm.AMDGPU.rsq.clamped.f32(float %74) %76 = fmul float %42, %75 %77 = fmul float %43, %75 %78 = fmul float %44, %75 %79 = fmul float %76, 5.000000e-01 %80 = fadd float %79, 5.000000e-01 %81 = fmul float %77, 5.000000e-01 %82 = fadd float %81, 5.000000e-01 %83 = fmul float %78, 5.000000e-01 %84 = fadd float %83, 5.000000e-01 %85 = fmul float %28, %67 %86 = fmul float %27, %69 %87 = call i32 @llvm.SI.packf16(float %59, float %60) %88 = bitcast i32 %87 to float %89 = call i32 @llvm.SI.packf16(float %61, float 0.000000e+00) %90 = bitcast i32 %89 to float %91 = call i32 @llvm.SI.packf16(float %85, float %68) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %86, float 0.000000e+00) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %88, float %90, float %88, float %90) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %92, float %94, float %92, float %94) %95 = call i32 @llvm.SI.packf16(float %80, float %82) %96 = bitcast i32 %95 to float %97 = call i32 @llvm.SI.packf16(float %84, float 0.000000e+00) %98 = bitcast i32 %97 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %41, float %41, float %41, float %41) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %96, float %98, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0xb ; C205810B s_buffer_load_dword s0, s[0:3], 0xc ; C200010C v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800700 00650A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[16:19] ; F0800700 00870102 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v10, s8, v10 ; 10141408 v_mul_f32_e32 v11, s9, v11 ; 10161609 v_mul_f32_e32 v12, s10, v12 ; 1018180A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, s11, v3 ; 1004060B v_mul_f32_e32 v3, v8, v10 ; 10061508 v_mul_f32_e32 v8, v9, v11 ; 10101709 v_mul_f32_e32 v0, v0, v12 ; 10001900 v_cvt_pkrtz_f16_f32_e32 v3, v3, v8 ; 5E061103 v_mul_f32_e32 v8, v5, v5 ; 10100B05 v_mac_f32_e32 v8, v6, v6 ; 3E100D06 v_mac_f32_e32 v8, v7, v7 ; 3E100F07 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 0, 1, 0, 0, v3, v0, v3, v0 ; F800040F 00030003 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v2, 0 ; D25E0000 00010102 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v8, v5 ; 10000B08 v_mul_f32_e32 v1, v8, v6 ; 10020D08 v_mul_f32_e32 v2, v8, v7 ; 10040F08 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 316 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 32, 16, 48} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} IMM[2] UINT32 {44, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: MUL TEMP[1].xyz, IN[3].xyzz, CONST[1][1].xyzz 3: MOV TEMP[2].xy, IN[0].xyyy 4: TEX TEMP[2].y, TEMP[2], SAMP[1], 2D 5: LRP TEMP[1].xyz, TEMP[2].yyyy, TEMP[1].xyzz, CONST[1][2].xyzz 6: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz 7: MOV TEMP[1].xy, IN[0].xyyy 8: TEX TEMP[1].xyz, TEMP[1], SAMP[2], 2D 9: DP3 TEMP[2].x, IN[2].xyzz, IN[2].xyzz 10: RSQ TEMP[2].x, TEMP[2].xxxx 11: MUL TEMP[2].xyz, IN[2].xyzz, TEMP[2].xxxx 12: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 13: MOV TEMP[3].w, IMM[1].yyyy 14: MOV TEMP[3].x, TEMP[2].xxxx 15: MOV TEMP[3].y, TEMP[2].yyyy 16: MOV TEMP[3].z, TEMP[2].zzzz 17: MOV TEMP[2].w, IMM[1].yyyy 18: MOV TEMP[2].x, TEMP[0].xxxx 19: MOV TEMP[2].y, TEMP[0].yyyy 20: MOV TEMP[2].z, TEMP[0].zzzz 21: MOV TEMP[0].w, IMM[1].yyyy 22: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 23: MOV TEMP[0].y, TEMP[1].yyyy 24: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 25: MOV TEMP[0].z, TEMP[1].xxxx 26: MOV OUT[2], IN[1].wwww 27: MOV OUT[0], TEMP[2] 28: MOV OUT[3], TEMP[3] 29: MOV OUT[1], TEMP[0] 30: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0 %34 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %37 = bitcast <8 x i32> addrspace(2)* %36 to <32 x i8> addrspace(2)* %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0 %39 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %40 = bitcast <4 x i32> addrspace(2)* %39 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %57 = bitcast float %48 to i32 %58 = bitcast float %49 to i32 %59 = insertelement <2 x i32> undef, i32 %57, i32 0 %60 = insertelement <2 x i32> %59, i32 %58, i32 1 %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %33, <16 x i8> %35, i32 2) %62 = extractelement <4 x float> %61, i32 0 %63 = extractelement <4 x float> %61, i32 1 %64 = extractelement <4 x float> %61, i32 2 %65 = fmul float %54, %24 %66 = fmul float %55, %25 %67 = fmul float %56, %26 %68 = bitcast float %48 to i32 %69 = bitcast float %49 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %38, <16 x i8> %41, i32 2) %73 = extractelement <4 x float> %72, i32 1 %74 = call float @llvm.AMDGPU.lrp(float %73, float %65, float %27) %75 = call float @llvm.AMDGPU.lrp(float %73, float %66, float %28) %76 = call float @llvm.AMDGPU.lrp(float %73, float %67, float %29) %77 = fmul float %62, %74 %78 = fmul float %63, %75 %79 = fmul float %64, %76 %80 = bitcast float %48 to i32 %81 = bitcast float %49 to i32 %82 = insertelement <2 x i32> undef, i32 %80, i32 0 %83 = insertelement <2 x i32> %82, i32 %81, i32 1 %84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %44, <16 x i8> %47, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = fmul float %51, %51 %89 = fmul float %52, %52 %90 = fadd float %89, %88 %91 = fmul float %53, %53 %92 = fadd float %90, %91 %93 = call float @llvm.AMDGPU.rsq.clamped.f32(float %92) %94 = fmul float %51, %93 %95 = fmul float %52, %93 %96 = fmul float %53, %93 %97 = fmul float %94, 5.000000e-01 %98 = fadd float %97, 5.000000e-01 %99 = fmul float %95, 5.000000e-01 %100 = fadd float %99, 5.000000e-01 %101 = fmul float %96, 5.000000e-01 %102 = fadd float %101, 5.000000e-01 %103 = fmul float %31, %85 %104 = fmul float %30, %87 %105 = call i32 @llvm.SI.packf16(float %77, float %78) %106 = bitcast i32 %105 to float %107 = call i32 @llvm.SI.packf16(float %79, float 0.000000e+00) %108 = bitcast i32 %107 to float %109 = call i32 @llvm.SI.packf16(float %103, float %86) %110 = bitcast i32 %109 to float %111 = call i32 @llvm.SI.packf16(float %104, float 0.000000e+00) %112 = bitcast i32 %111 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %106, float %108, float %106, float %108) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %110, float %112, float %110, float %112) %113 = call i32 @llvm.SI.packf16(float %98, float %100) %114 = bitcast i32 %113 to float %115 = call i32 @llvm.SI.packf16(float %102, float 0.000000e+00) %116 = bitcast i32 %115 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %50, float %50, float %50, float %50) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %114, float %116, float %114, float %116) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430A02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800200 00A60102 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[32:35] ; F0800700 01090D02 s_buffer_load_dword s10, s[0:3], 0xc ; C205010C s_buffer_load_dword s0, s[0:3], 0xb ; C200010B s_waitcnt vmcnt(2) lgkmcnt(0) ; BF8C0072 v_mul_f32_e32 v2, s4, v8 ; 10041004 v_mul_f32_e32 v3, s5, v9 ; 10061205 v_mul_f32_e32 v0, s6, v0 ; 10000006 s_waitcnt vmcnt(1) ; BF8C0771 v_sub_f32_e32 v8, 1.0, v1 ; 081002F2 v_mul_f32_e32 v9, s7, v8 ; 10121007 v_mul_f32_e32 v16, s8, v8 ; 10201008 v_mul_f32_e32 v8, s9, v8 ; 10101009 v_mac_f32_e32 v9, v2, v1 ; 3E120302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, s10, v13 ; 10041A0A v_mul_f32_e32 v13, s0, v15 ; 101A1E00 v_mac_f32_e32 v16, v3, v1 ; 3E200303 v_mac_f32_e32 v8, v0, v1 ; 3E100300 v_mul_f32_e32 v0, v9, v10 ; 10001509 v_mul_f32_e32 v1, v16, v11 ; 10021710 v_mul_f32_e32 v3, v8, v12 ; 10061908 v_cvt_pkrtz_f16_f32_e32 v2, v2, v14 ; 5E041D02 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_mul_f32_e32 v1, v5, v5 ; 10020B05 v_mac_f32_e32 v1, v6, v6 ; 3E020D06 v_mac_f32_e32 v1, v7, v7 ; 3E020F07 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_cvt_pkrtz_f16_f32_e64 v3, v3, 0 ; D25E0003 00010103 exp 15, 0, 1, 0, 0, v0, v3, v0, v3 ; F800040F 03000300 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v0, v13, 0 ; D25E0000 0001010D exp 15, 1, 1, 0, 0, v2, v0, v2, v0 ; F800041F 00020002 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v1, v5 ; 10000B01 v_mul_f32_e32 v2, v1, v6 ; 10040D01 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 372 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 0.0000} IMM[3] UINT32 {3, 320, 304, 12} IMM[4] UINT32 {28, 44, 60, 0} IMM[5] UINT32 {24, 32, 16, 48} IMM[6] UINT32 {4, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4].xy, TEMP[4], IMM[2].yyyy 23: MOV TEMP[7].x, TEMP[4].xxxx 24: MOV TEMP[7].y, TEMP[4].yyyy 25: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 26: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 27: MOV TEMP[7].z, TEMP[4].xxxx 28: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 31: MUL TEMP[6], TEMP[6], IMM[2].zzzz 32: ADD TEMP[6].xy, IMM[0].wwww, -TEMP[6] 33: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 34: MOV TEMP[7].w, IMM[0].xxxx 35: MOV TEMP[7].x, TEMP[6].xxxx 36: MOV TEMP[7].y, TEMP[6].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 38: ADD TEMP[5].x, IMM[0].wwww, -TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 40: MOV TEMP[7].z, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[7], TEMP[0] 42: DP4 TEMP[5].x, TEMP[7], TEMP[2] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[5].x, TEMP[7], TEMP[3] 45: MOV TEMP[4].z, TEMP[5].xxxx 46: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 49: MOV TEMP[5].w, IMM[0].wwww 50: MOV TEMP[5].x, IN[0].xxxx 51: MOV TEMP[5].y, IN[0].yyyy 52: MOV TEMP[5].z, IN[0].zzzz 53: DP4 TEMP[0].x, TEMP[5], TEMP[0] 54: DP4 TEMP[2].x, TEMP[5], TEMP[2] 55: DP4 TEMP[3].x, TEMP[5], TEMP[3] 56: MOV TEMP[5].x, TEMP[0].xxxx 57: MOV TEMP[5].y, TEMP[2].xxxx 58: MOV TEMP[5].z, TEMP[3].xxxx 59: MOV TEMP[6].x, TEMP[0].xxxx 60: MOV TEMP[6].y, TEMP[2].xxxx 61: MOV TEMP[6].z, TEMP[3].xxxx 62: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 63: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[5].xyzz 64: MOV TEMP[6].w, TEMP[5].xxxx 65: MOV TEMP[5].w, IMM[0].xxxx 66: MOV TEMP[5].x, TEMP[4].xxxx 67: MOV TEMP[5].y, TEMP[4].yyyy 68: MOV TEMP[5].z, TEMP[4].zzzz 69: MOV TEMP[4].w, IMM[0].wwww 70: MOV TEMP[4].x, TEMP[0].xxxx 71: MOV TEMP[4].y, TEMP[2].xxxx 72: MOV TEMP[4].z, TEMP[3].xxxx 73: MOV TEMP[0].x, CONST[4][0].wwww 74: MOV TEMP[0].y, CONST[4][1].wwww 75: MOV TEMP[0].z, CONST[4][2].wwww 76: MOV TEMP[0].w, CONST[4][3].wwww 77: DP4 TEMP[0].x, TEMP[4], TEMP[0] 78: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 79: MOV TEMP[3].xy, IN[3].xyyy 80: MOV TEMP[3].w, IMM[0].xxxx 81: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 82: MOV TEMP[7].x, CONST[4][0].xxxx 83: MOV TEMP[7].y, CONST[4][1].xxxx 84: MOV TEMP[7].z, CONST[4][2].xxxx 85: MOV TEMP[7].w, CONST[4][3].xxxx 86: DP4 TEMP[7].x, TEMP[4], TEMP[7] 87: MOV TEMP[8].x, CONST[4][0].yyyy 88: MOV TEMP[8].y, CONST[4][1].yyyy 89: MOV TEMP[8].z, CONST[4][2].yyyy 90: MOV TEMP[8].w, CONST[4][3].yyyy 91: DP4 TEMP[8].x, TEMP[4], TEMP[8] 92: MOV TEMP[7].y, -TEMP[8].xxxx 93: MOV TEMP[8].x, CONST[4][0].zzzz 94: MOV TEMP[8].y, CONST[4][1].zzzz 95: MOV TEMP[8].z, CONST[4][2].zzzz 96: MOV TEMP[8].w, CONST[4][3].zzzz 97: DP4 TEMP[4].x, TEMP[4], TEMP[8] 98: MAD TEMP[4].x, IMM[2].zzzz, TEMP[4].xxxx, -TEMP[0].xxxx 99: MOV TEMP[7].z, TEMP[4].xxxx 100: MOV TEMP[7].w, TEMP[0].xxxx 101: MOV OUT[1], TEMP[1] 102: MOV OUT[2].xy, TEMP[2].xyxx 103: MOV OUT[4], TEMP[5] 104: MOV OUT[6], IMM[0].xxxx 105: MOV OUT[5], TEMP[3] 106: MOV OUT[0], TEMP[7] 107: MOV OUT[3], TEMP[6] 108: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0 %43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = add i32 %10, %6 %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = bitcast float %70 to i32 %73 = bitcast float %71 to i32 %74 = insertelement <4 x i32> undef, i32 %72, i32 0 %75 = insertelement <4 x i32> %74, i32 %73, i32 1 %76 = insertelement <4 x i32> %75, i32 0, i32 2 %77 = bitcast <8 x i32> %42 to <32 x i8> %78 = bitcast <4 x i32> %44 to <16 x i8> %79 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 1 %82 = extractelement <4 x float> %79, i32 2 %83 = extractelement <4 x float> %79, i32 3 %84 = bitcast float %70 to i32 %85 = bitcast float %71 to i32 %86 = insertelement <4 x i32> , i32 %84, i32 1 %87 = insertelement <4 x i32> %86, i32 %85, i32 2 %88 = insertelement <4 x i32> %87, i32 0, i32 3 %89 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %88, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = extractelement <4 x float> %89, i32 3 %94 = bitcast float %70 to i32 %95 = bitcast float %71 to i32 %96 = insertelement <4 x i32> , i32 %94, i32 1 %97 = insertelement <4 x i32> %96, i32 %95, i32 2 %98 = insertelement <4 x i32> %97, i32 0, i32 3 %99 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %98, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = fmul float %56, 2.550000e+02 %105 = fadd float %104, -1.280000e+02 %106 = fmul float %57, 2.550000e+02 %107 = fadd float %106, -1.280000e+02 %108 = fmul float %58, 2.550000e+02 %109 = fadd float %108, -1.280000e+02 %110 = fmul float %59, 2.550000e+02 %111 = fadd float %110, -1.280000e+02 %112 = fcmp olt float %105, 0.000000e+00 %113 = fcmp olt float %107, 0.000000e+00 %114 = fcmp olt float %109, 0.000000e+00 %115 = fcmp olt float %111, 0.000000e+00 %116 = select i1 %112, float 1.000000e+00, float 0.000000e+00 %117 = call float @fabs(float %105) %118 = call float @fabs(float %107) %119 = call float @fabs(float %109) %120 = call float @fabs(float %111) %121 = fsub float %117, %116 %122 = select i1 %113, float -1.000000e+00, float -0.000000e+00 %123 = fadd float %118, %122 %124 = select i1 %114, float -1.000000e+00, float -0.000000e+00 %125 = fadd float %119, %124 %126 = select i1 %115, float -1.000000e+00, float -0.000000e+00 %127 = fadd float %120, %126 %128 = fadd float %121, -6.400000e+01 %129 = fadd float %123, -6.400000e+01 %130 = fadd float %125, -6.400000e+01 %131 = fadd float %127, -6.400000e+01 %132 = fcmp olt float %128, 0.000000e+00 %133 = fcmp olt float %129, 0.000000e+00 %134 = select i1 %132, float 1.000000e+00, float 0.000000e+00 %135 = select i1 %133, float 1.000000e+00, float 0.000000e+00 %136 = call float @fabs(float %128) %137 = call float @fabs(float %129) %138 = call float @fabs(float %130) %139 = call float @fabs(float %131) %140 = fsub float %136, %134 %141 = fsub float %137, %135 %142 = fmul float %140, 0x3F90410420000000 %143 = fmul float %141, 0x3F90410420000000 %144 = fsub float 1.000000e+00, %142 %145 = fsub float %144, %143 %146 = fmul float %142, %142 %147 = fmul float %143, %143 %148 = fadd float %147, %146 %149 = fmul float %145, %145 %150 = fadd float %148, %149 %151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150) %152 = fmul float %142, %151 %153 = fmul float %143, %151 %154 = fmul float %145, %151 %155 = fmul float %134, 2.000000e+00 %156 = fmul float %135, 2.000000e+00 %157 = fsub float 1.000000e+00, %155 %158 = fsub float 1.000000e+00, %156 %159 = fmul float %152, %157 %160 = fmul float %153, %158 %161 = fmul float %116, 2.000000e+00 %162 = fsub float 1.000000e+00, %161 %163 = fmul float %162, %154 %164 = fmul float %159, %80 %165 = fmul float %160, %81 %166 = fadd float %164, %165 %167 = fmul float %163, %82 %168 = fadd float %166, %167 %169 = fmul float %83, 0.000000e+00 %170 = fadd float %168, %169 %171 = fmul float %159, %90 %172 = fmul float %160, %91 %173 = fadd float %171, %172 %174 = fmul float %163, %92 %175 = fadd float %173, %174 %176 = fmul float %93, 0.000000e+00 %177 = fadd float %175, %176 %178 = fmul float %159, %100 %179 = fmul float %160, %101 %180 = fadd float %178, %179 %181 = fmul float %163, %102 %182 = fadd float %180, %181 %183 = fmul float %103, 0.000000e+00 %184 = fadd float %182, %183 %185 = fmul float %170, %170 %186 = fmul float %177, %177 %187 = fadd float %186, %185 %188 = fmul float %184, %184 %189 = fadd float %187, %188 %190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189) %191 = fmul float %170, %190 %192 = fmul float %177, %190 %193 = fmul float %184, %190 %194 = fmul float %49, %80 %195 = fmul float %50, %81 %196 = fadd float %194, %195 %197 = fmul float %51, %82 %198 = fadd float %196, %197 %199 = fadd float %198, %83 %200 = fmul float %49, %90 %201 = fmul float %50, %91 %202 = fadd float %200, %201 %203 = fmul float %51, %92 %204 = fadd float %202, %203 %205 = fadd float %204, %93 %206 = fmul float %49, %100 %207 = fmul float %50, %101 %208 = fadd float %206, %207 %209 = fmul float %51, %102 %210 = fadd float %208, %209 %211 = fadd float %210, %103 %212 = fsub float %199, %35 %213 = fsub float %205, %36 %214 = fsub float %211, %37 %215 = fmul float %38, %212 %216 = fmul float %39, %213 %217 = fadd float %216, %215 %218 = fmul float %40, %214 %219 = fadd float %217, %218 %220 = fmul float %199, %22 %221 = fmul float %205, %26 %222 = fadd float %220, %221 %223 = fmul float %211, %30 %224 = fadd float %222, %223 %225 = fadd float %224, %34 %226 = fmul float %64, %13 %227 = fadd float %226, %15 %228 = fmul float %65, %14 %229 = fadd float %228, %16 %230 = bitcast float %70 to i32 %231 = bitcast float %71 to i32 %232 = insertelement <4 x i32> , i32 %230, i32 1 %233 = insertelement <4 x i32> %232, i32 %231, i32 2 %234 = insertelement <4 x i32> %233, i32 0, i32 3 %235 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %234, <8 x i32> %42, <4 x i32> %44, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = extractelement <4 x float> %235, i32 3 %240 = fmul float %199, %19 %241 = fmul float %205, %23 %242 = fadd float %240, %241 %243 = fmul float %211, %27 %244 = fadd float %242, %243 %245 = fadd float %244, %31 %246 = fmul float %199, %20 %247 = fmul float %205, %24 %248 = fadd float %246, %247 %249 = fmul float %211, %28 %250 = fadd float %248, %249 %251 = fadd float %250, %32 %252 = fsub float -0.000000e+00, %251 %253 = fmul float %199, %21 %254 = fmul float %205, %25 %255 = fadd float %253, %254 %256 = fmul float %211, %29 %257 = fadd float %255, %256 %258 = fadd float %257, %33 %259 = fmul float %258, 2.000000e+00 %260 = fsub float %259, %225 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %227, float %229, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %199, float %205, float %211, float %219) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %191, float %192, float %193, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %236, float %237, float %238, float %239) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %245, float %252, float %260, float %225) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[36:39], s[2:3], 0x4 ; C0920304 s_load_dwordx4 s[40:43], s[2:3], 0x10 ; C0940310 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 buffer_load_format_xyzw v[18:21], v3, s[8:11], 0 idxen ; E00C2000 80021203 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v20, 0 ; 7E280280 s_buffer_load_dword s0, s[40:43], 0xf ; C200290F s_buffer_load_dword s19, s[40:43], 0x4c ; C209A94C s_buffer_load_dword s16, s[40:43], 0x4d ; C208294D s_buffer_load_dword s8, s[40:43], 0x4e ; C204294E s_buffer_load_dword s20, s[40:43], 0x50 ; C20A2950 s_buffer_load_dword s22, s[36:39], 0x6 ; C20B2506 s_buffer_load_dword s23, s[36:39], 0x7 ; C20BA507 s_buffer_load_dword s44, s[36:39], 0x8 ; C2162508 s_buffer_load_dword s36, s[36:39], 0x9 ; C2122509 s_buffer_load_dword s21, s[40:43], 0x51 ; C20AA951 s_buffer_load_dword s15, s[40:43], 0x52 ; C207A952 s_buffer_load_dword s3, s[40:43], 0x5 ; C201A905 s_buffer_load_dword s4, s[40:43], 0x6 ; C2022906 s_buffer_load_dword s9, s[40:43], 0x7 ; C204A907 s_buffer_load_dword s2, s[40:43], 0x8 ; C2012908 s_buffer_load_dword s1, s[40:43], 0x9 ; C200A909 s_buffer_load_dword s5, s[40:43], 0x0 ; C202A900 s_buffer_load_dword s6, s[40:43], 0x1 ; C2032901 s_buffer_load_dword s7, s[40:43], 0x2 ; C203A902 s_buffer_load_dword s11, s[40:43], 0x3 ; C205A903 s_buffer_load_dword s17, s[40:43], 0x4 ; C208A904 s_buffer_load_dword s12, s[40:43], 0xa ; C206290A s_buffer_load_dword s18, s[40:43], 0xb ; C209290B s_buffer_load_dword s13, s[40:43], 0xc ; C206A90C s_buffer_load_dword s10, s[40:43], 0xd ; C205290D s_buffer_load_dword s14, s[40:43], 0xe ; C207290E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s44 ; 7E00022C v_mov_b32_e32 v3, s36 ; 7E060224 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[28:35], s[24:27] ; F0900F00 00C71512 v_mov_b32_e32 v17, 0x10001 ; 7E2202FF 00010001 image_sample_l_o v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C71911 v_mov_b32_e32 v17, 0x20002 ; 7E2202FF 00020002 image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C71D11 v_mov_b32_e32 v17, 0x30003 ; 7E2202FF 00030003 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[28:35], s[24:27] ; F0D00F00 00C70E11 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v18, v22, v8 ; 10241116 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s22, v12 ; 3E001816 v_mac_f32_e32 v3, s23, v13 ; 3E061A17 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v2, 0, 1.0, vcc ; D2000002 01A9E480 v_sub_f32_e64 v4, |v10|, v2 ; D2080104 0002050A v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v10, v5, |v4| ; D208020A 00020905 v_mad_f32 v10, v10, v6, 1.0 ; D282000A 03CA0D0A v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_sub_f32_e64 v12, v11, |v1| ; D208020C 0002030B v_mac_f32_e32 v10, v6, v12 ; 3E141906 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_sub_f32_e64 v1, |v1|, v11 ; D2080101 00021701 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mac_f32_e32 v18, v21, v7 ; 3E240F15 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v6, v26, v8 ; 100C111A v_mac_f32_e32 v6, v25, v7 ; 3E0C0F19 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mac_f32_e32 v8, v29, v7 ; 3E100F1D v_mul_f32_e32 v7, v4, v4 ; 100E0904 v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_mac_f32_e32 v7, v10, v10 ; 3E0E150A v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mac_f32_e32 v18, v23, v9 ; 3E241317 v_mac_f32_e32 v6, v27, v9 ; 3E0C131B v_mac_f32_e32 v8, v31, v9 ; 3E10131F v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v7, v7, v10 ; 100E1507 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mad_f32 v5, -2.0, v11, 1.0 ; D2820005 03CA16F5 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_add_f32_e32 v5, v24, v18 ; 060A2518 v_mul_f32_e32 v7, v22, v1 ; 100E0316 v_mac_f32_e32 v7, v21, v4 ; 3E0E0915 v_mac_f32_e32 v7, v23, v2 ; 3E0E0517 v_add_f32_e32 v6, v28, v6 ; 060C0D1C v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mac_f32_e32 v9, v25, v4 ; 3E120919 v_mac_f32_e32 v9, v27, v2 ; 3E12051B v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mac_f32_e32 v1, v29, v4 ; 3E02091D v_mac_f32_e32 v1, v31, v2 ; 3E02051F v_add_f32_e32 v2, v32, v8 ; 06041120 exp 15, 33, 0, 0, 0, v0, v3, v20, v20 ; F800021F 14140300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_subrev_f32_e32 v0, s19, v5 ; 0A000A13 v_mul_f32_e32 v0, s20, v0 ; 10000014 v_subrev_f32_e32 v3, s16, v6 ; 0A060C10 v_mac_f32_e32 v0, s21, v3 ; 3E000615 v_mul_f32_e32 v3, v7, v7 ; 10060F07 v_mac_f32_e32 v3, v9, v9 ; 3E061309 v_mac_f32_e32 v3, v1, v1 ; 3E060301 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_subrev_f32_e32 v4, s8, v2 ; 0A080408 v_mac_f32_e32 v0, s15, v4 ; 3E00080F exp 15, 34, 0, 0, 0, v5, v6, v2, v0 ; F800022F 00020605 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v3, v7 ; 10000F03 v_mul_f32_e32 v4, v3, v9 ; 10081303 v_mul_f32_e32 v1, v3, v1 ; 10020303 exp 15, 35, 0, 0, 0, v0, v4, v1, v20 ; F800023F 14010400 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s9, v6 ; 10000C09 v_mul_f32_e32 v1, s17, v6 ; 10020C11 v_mul_f32_e32 v3, s3, v6 ; 10060C03 v_mul_f32_e32 v4, s4, v6 ; 10080C04 v_mac_f32_e32 v0, s11, v5 ; 3E000A0B v_mac_f32_e32 v1, s5, v5 ; 3E020A05 v_mac_f32_e32 v3, s6, v5 ; 3E060A06 v_mac_f32_e32 v4, s7, v5 ; 3E080A07 v_mac_f32_e32 v0, s18, v2 ; 3E000412 v_mac_f32_e32 v1, s2, v2 ; 3E020402 v_mac_f32_e32 v3, s1, v2 ; 3E060401 v_mac_f32_e32 v4, s12, v2 ; 3E08040C v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s13, v1 ; 0602020D v_add_f32_e32 v2, s10, v3 ; 0604060A v_add_f32_e32 v3, s14, v4 ; 0606080E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v14, v15, v16, v17 ; F800024F 11100F0E exp 15, 37, 0, 0, 0, v20, v20, v20, v20 ; F800025F 14141414 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 36 Code Size: 820 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..2], LOCAL IMM[0] UINT32 {0, 16, 0, 0} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0].xyz, TEMP[1], IN[3] 5: DP3 TEMP[1].x, IN[2].xyzz, IN[2].xyzz 6: RSQ TEMP[1].x, TEMP[1].xxxx 7: MUL TEMP[1].xyz, IN[2].xyzz, TEMP[1].xxxx 8: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, IMM[1].xxxx 9: MOV TEMP[2].w, IMM[1].yyyy 10: MOV TEMP[2].x, TEMP[1].xxxx 11: MOV TEMP[2].y, TEMP[1].yyyy 12: MOV TEMP[2].z, TEMP[1].zzzz 13: MOV TEMP[1].w, IMM[1].yyyy 14: MOV TEMP[1].x, TEMP[0].xxxx 15: MOV TEMP[1].y, TEMP[0].yyyy 16: MOV TEMP[1].z, TEMP[0].zzzz 17: MOV OUT[2], IN[1].wwww 18: MOV OUT[0], TEMP[1] 19: MOV OUT[3], TEMP[2] 20: MOV OUT[1], IMM[1].yyyy 21: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %40 = bitcast float %31 to i32 %41 = bitcast float %32 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %28, <16 x i8> %30, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = fmul float %24, %45 %49 = fmul float %25, %46 %50 = fmul float %26, %47 %51 = fmul float %48, %37 %52 = fmul float %49, %38 %53 = fmul float %50, %39 %54 = fmul float %34, %34 %55 = fmul float %35, %35 %56 = fadd float %55, %54 %57 = fmul float %36, %36 %58 = fadd float %56, %57 %59 = call float @llvm.AMDGPU.rsq.clamped.f32(float %58) %60 = fmul float %34, %59 %61 = fmul float %35, %59 %62 = fmul float %36, %59 %63 = fmul float %60, 5.000000e-01 %64 = fadd float %63, 5.000000e-01 %65 = fmul float %61, 5.000000e-01 %66 = fadd float %65, 5.000000e-01 %67 = fmul float %62, 5.000000e-01 %68 = fadd float %67, 5.000000e-01 %69 = call i32 @llvm.SI.packf16(float %51, float %52) %70 = bitcast i32 %69 to float %71 = call i32 @llvm.SI.packf16(float %53, float 0.000000e+00) %72 = bitcast i32 %71 to float %73 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %74 = bitcast i32 %73 to float %75 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %76 = bitcast i32 %75 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %70, float %72, float %70, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %74, float %76, float %74, float %76) %77 = call i32 @llvm.SI.packf16(float %64, float %66) %78 = bitcast i32 %77 to float %79 = call i32 @llvm.SI.packf16(float %68, float 0.000000e+00) %80 = bitcast i32 %79 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %33, float %33, float %33, float %33) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %78, float %80, float %78, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700 v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901 v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00 v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01 v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00 v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00 v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_mul_f32_e32 v10, v5, v5 ; 10140B05 v_mac_f32_e32 v10, v6, v6 ; 3E140D06 v_mac_f32_e32 v10, v7, v7 ; 3E140F07 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v1, v8, v1 ; 10020308 v_mul_f32_e32 v2, v9, v2 ; 10040509 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_mul_f32_e32 v3, v10, v5 ; 10060B0A v_mul_f32_e32 v5, v10, v6 ; 100A0D0A v_mul_f32_e32 v6, v10, v7 ; 100C0F0A v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 v_cvt_pkrtz_f16_f32_e64 v2, 0, 0 ; D25E0002 00010080 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 exp 15, 1, 1, 0, 0, v2, v2, v2, v2 ; F800041F 02020202 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03 v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106 exp 15, 2, 0, 0, 0, v4, v4, v4, v4 ; F800002F 04040404 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 272 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL OUT[8], GENERIC[6] DCL OUT[9], GENERIC[7] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..7] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..16], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 20, 36, 52} IMM[5] UINT32 {8, 24, 40, 56} IMM[6] UINT32 {12, 28, 44, 60} IMM[7] UINT32 {16, 32, 48, 348} IMM[8] FLT32 { 0.0175, -0.5000, 0.5000, 0.0001} IMM[9] UINT32 {72, 80, 64, 88} IMM[10] UINT32 {100, 104, 96, 112} IMM[11] FLT32 { 0.0774, 0.9479, 0.0521, 2.4000} IMM[12] FLT32 { 0.0404, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[6].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[6].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[6].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[5].x, TEMP[0].xxxx 90: MOV TEMP[5].y, TEMP[2].xxxx 91: MOV TEMP[5].z, TEMP[3].xxxx 92: DP3 TEMP[8].x, CONST[4][20].xyzz, TEMP[4].xyzz 93: MOV TEMP[5].w, TEMP[8].xxxx 94: MOV TEMP[8].x, TEMP[7].xxxx 95: MOV TEMP[8].y, TEMP[7].yyyy 96: MOV TEMP[8].z, TEMP[7].zzzz 97: DP3 TEMP[9].x, TEMP[4].xyzz, TEMP[4].xyzz 98: RSQ TEMP[9].x, TEMP[9].xxxx 99: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[9].xxxx 100: DP3 TEMP[9].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[7].xyz, TEMP[9].xxxx, TEMP[7].xyzz 102: MUL TEMP[7].xyz, IMM[2].zzzz, TEMP[7].xyzz 103: ADD TEMP[7].xyz, CONST[5][0].xyzz, -TEMP[7].xyzz 104: DP3 TEMP[4].x, -TEMP[4].xyzz, TEMP[7].xyzz 105: MOV_SAT TEMP[4].x, TEMP[4].xxxx 106: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[4].x, TEMP[4].xxxx 108: MOV TEMP[8].w, TEMP[4].xxxx 109: MOV TEMP[4].w, IMM[0].wwww 110: MOV TEMP[4].x, TEMP[0].xxxx 111: MOV TEMP[4].y, TEMP[2].xxxx 112: MOV TEMP[4].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].yyyy 114: MOV TEMP[0].y, CONST[4][1].yyyy 115: MOV TEMP[0].z, CONST[4][2].yyyy 116: MOV TEMP[0].w, CONST[4][3].yyyy 117: DP4 TEMP[0].x, TEMP[4], TEMP[0] 118: MOV TEMP[2].x, CONST[4][0].zzzz 119: MOV TEMP[2].y, CONST[4][1].zzzz 120: MOV TEMP[2].z, CONST[4][2].zzzz 121: MOV TEMP[2].w, CONST[4][3].zzzz 122: DP4 TEMP[2].x, TEMP[4], TEMP[2] 123: MOV TEMP[3].x, CONST[4][0].wwww 124: MOV TEMP[3].y, CONST[4][1].wwww 125: MOV TEMP[3].z, CONST[4][2].wwww 126: MOV TEMP[3].w, CONST[4][3].wwww 127: DP4 TEMP[3].x, TEMP[4], TEMP[3] 128: MOV TEMP[7].x, CONST[4][0].xxxx 129: MOV TEMP[7].y, CONST[4][1].xxxx 130: MOV TEMP[7].z, CONST[4][2].xxxx 131: MOV TEMP[7].w, CONST[4][3].xxxx 132: DP4 TEMP[4].x, TEMP[4], TEMP[7] 133: MOV TEMP[4].w, TEMP[3].xxxx 134: MUL TEMP[7].x, CONST[1][2].xxxx, IMM[8].xxxx 135: ADD TEMP[9].xy, IN[2].xyyy, IMM[8].yyyy 136: COS TEMP[10].x, TEMP[7].xxxx 137: SIN TEMP[7].x, TEMP[7].xxxx 138: MUL TEMP[11].x, TEMP[7].xxxx, TEMP[9].yyyy 139: MAD TEMP[11].x, TEMP[10].xxxx, TEMP[9].xxxx, -TEMP[11].xxxx 140: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].yyyy 141: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx, TEMP[10].xxxx 142: MOV TEMP[11].y, TEMP[7].xxxx 143: MAD TEMP[7].xy, CONST[1][1].yyyy, TEMP[11].xyyy, IMM[8].zzzz 144: MAD TEMP[7].xy, CONST[4][21].wwww, CONST[1][3].xyyy, TEMP[7].xyyy 145: ADD TEMP[7].xy, TEMP[7].xyyy, CONST[1][4].zwww 146: MUL TEMP[10].x, IMM[8].xxxx, CONST[1][2].yyyy 147: COS TEMP[11].x, TEMP[10].xxxx 148: SIN TEMP[10].x, TEMP[10].xxxx 149: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[9].yyyy 150: MAD TEMP[12].x, TEMP[11].xxxx, TEMP[9].xxxx, -TEMP[12].xxxx 151: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 152: MAD TEMP[10].x, TEMP[11].xxxx, TEMP[9].yyyy, TEMP[10].xxxx 153: MOV TEMP[12].y, TEMP[10].xxxx 154: MAD TEMP[10].xy, TEMP[12].xyyy, CONST[1][1].zzzz, IMM[8].zzzz 155: MAD TEMP[10].xy, CONST[4][21].wwww, CONST[1][3].zwww, TEMP[10].xyyy 156: ADD TEMP[10].xy, TEMP[10].xyyy, CONST[1][5].xyyy 157: MOV TEMP[11].x, TEMP[7].xxxx 158: MOV TEMP[11].y, TEMP[7].yyyy 159: MOV TEMP[11].z, TEMP[10].xxxx 160: MOV TEMP[11].w, TEMP[10].yyyy 161: MUL TEMP[7].x, IMM[8].xxxx, CONST[1][2].zzzz 162: COS TEMP[10].x, TEMP[7].xxxx 163: SIN TEMP[7].x, TEMP[7].xxxx 164: MUL TEMP[12].x, TEMP[7].xxxx, TEMP[9].yyyy 165: MAD TEMP[12].x, TEMP[10].xxxx, TEMP[9].xxxx, -TEMP[12].xxxx 166: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx 167: MAD TEMP[7].x, TEMP[10].xxxx, TEMP[9].yyyy, TEMP[7].xxxx 168: MOV TEMP[12].y, TEMP[7].xxxx 169: MAD TEMP[7].xy, TEMP[12].xyyy, CONST[1][1].wwww, IMM[8].zzzz 170: MAD TEMP[7].xy, CONST[4][21].wwww, CONST[1][4].xyyy, TEMP[7].xyyy 171: ADD TEMP[7].xy, TEMP[7].xyyy, CONST[1][5].zwww 172: MUL TEMP[10].x, IMM[8].xxxx, CONST[1][6].yyyy 173: COS TEMP[12].x, TEMP[10].xxxx 174: SIN TEMP[10].x, TEMP[10].xxxx 175: MUL TEMP[13].x, TEMP[10].xxxx, TEMP[9].yyyy 176: MAD TEMP[13].x, TEMP[12].xxxx, TEMP[9].xxxx, -TEMP[13].xxxx 177: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 178: MAD TEMP[9].x, TEMP[12].xxxx, TEMP[9].yyyy, TEMP[10].xxxx 179: MOV TEMP[13].y, TEMP[9].xxxx 180: MAD TEMP[9].xy, CONST[1][6].xxxx, TEMP[13].xyyy, IMM[8].zzzz 181: MAD TEMP[9].xy, CONST[4][21].wwww, CONST[1][6].zwww, TEMP[9].xyyy 182: ADD TEMP[9].xy, TEMP[9].xyyy, CONST[1][7].xyyy 183: MOV TEMP[10].x, TEMP[7].xxxx 184: MOV TEMP[10].y, TEMP[7].yyyy 185: MOV TEMP[10].z, TEMP[9].xxxx 186: MOV TEMP[10].w, TEMP[9].yyyy 187: MOV TEMP[7].xy, IN[6].xyyy 188: MOV TEMP[7].w, IMM[0].xxxx 189: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[1].wyw 190: MUL TEMP[9].xyz, IN[4].xyzz, IMM[8].zzzz 191: MAX TEMP[9].xyz, TEMP[9].xyzz, IMM[8].wwww 192: MUL TEMP[12].xyz, IN[5].xyzz, IMM[11].xxxx 193: MAD TEMP[13].xyz, IN[5].xyzz, IMM[11].yyyy, IMM[11].zzzz 194: POW TEMP[14].x, TEMP[13].xxxx, IMM[11].wwww 195: POW TEMP[14].y, TEMP[13].yyyy, IMM[11].wwww 196: POW TEMP[14].z, TEMP[13].zzzz, IMM[11].wwww 197: FSLT TEMP[13].x, IMM[12].xxxx, IN[5].xxxx 198: UIF TEMP[13].xxxx :0 199: MOV TEMP[13].x, TEMP[14].xxxx 200: ELSE :0 201: MOV TEMP[13].x, TEMP[12].xxxx 202: ENDIF 203: FSLT TEMP[15].x, IMM[12].xxxx, IN[5].yyyy 204: UIF TEMP[15].xxxx :0 205: MOV TEMP[15].x, TEMP[14].yyyy 206: ELSE :0 207: MOV TEMP[15].x, TEMP[12].yyyy 208: ENDIF 209: FSLT TEMP[16].x, IMM[12].xxxx, IN[5].zzzz 210: UIF TEMP[16].xxxx :0 211: MOV TEMP[14].x, TEMP[14].zzzz 212: ELSE :0 213: MOV TEMP[14].x, TEMP[12].zzzz 214: ENDIF 215: MOV TEMP[12].x, TEMP[13].xxxx 216: MOV TEMP[12].y, TEMP[15].xxxx 217: MOV TEMP[12].z, TEMP[14].xxxx 218: MOV TEMP[13].w, TEMP[7].wwww 219: MUL TEMP[13].xyz, TEMP[12].xyzz, TEMP[7].xyzz 220: MOV TEMP[7].w, IMM[0].xxxx 221: MOV TEMP[7].x, IN[3].xxxx 222: MOV TEMP[7].y, IN[3].yyyy 223: MOV TEMP[7].z, IN[3].zzzz 224: MOV TEMP[12].w, IMM[0].xxxx 225: MOV TEMP[12].x, TEMP[9].xxxx 226: MOV TEMP[12].y, TEMP[9].yyyy 227: MOV TEMP[12].z, TEMP[9].zzzz 228: MOV TEMP[4].xw, TEMP[4].xxxw 229: MOV TEMP[0].x, -TEMP[0].xxxx 230: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz, -TEMP[3].xxxx 231: MOV TEMP[0].y, TEMP[2].xxxx 232: MOV TEMP[4].yz, TEMP[0].yxyy 233: MOV OUT[1], TEMP[1] 234: MOV OUT[3], TEMP[8] 235: MOV OUT[7], TEMP[6] 236: MOV OUT[6], TEMP[12] 237: MOV OUT[8], TEMP[11] 238: MOV OUT[4], TEMP[13] 239: MOV OUT[0], TEMP[4] 240: MOV OUT[9], TEMP[10] 241: MOV OUT[2], TEMP[5] 242: MOV OUT[5], TEMP[7] 243: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 0) %40 = call float @llvm.SI.load.const(<16 x i8> %38, i32 4) %41 = call float @llvm.SI.load.const(<16 x i8> %38, i32 8) %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 12) %43 = call float @llvm.SI.load.const(<16 x i8> %38, i32 16) %44 = call float @llvm.SI.load.const(<16 x i8> %38, i32 20) %45 = call float @llvm.SI.load.const(<16 x i8> %38, i32 24) %46 = call float @llvm.SI.load.const(<16 x i8> %38, i32 28) %47 = call float @llvm.SI.load.const(<16 x i8> %38, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %38, i32 36) %49 = call float @llvm.SI.load.const(<16 x i8> %38, i32 40) %50 = call float @llvm.SI.load.const(<16 x i8> %38, i32 44) %51 = call float @llvm.SI.load.const(<16 x i8> %38, i32 48) %52 = call float @llvm.SI.load.const(<16 x i8> %38, i32 52) %53 = call float @llvm.SI.load.const(<16 x i8> %38, i32 56) %54 = call float @llvm.SI.load.const(<16 x i8> %38, i32 60) %55 = call float @llvm.SI.load.const(<16 x i8> %38, i32 304) %56 = call float @llvm.SI.load.const(<16 x i8> %38, i32 308) %57 = call float @llvm.SI.load.const(<16 x i8> %38, i32 312) %58 = call float @llvm.SI.load.const(<16 x i8> %38, i32 320) %59 = call float @llvm.SI.load.const(<16 x i8> %38, i32 324) %60 = call float @llvm.SI.load.const(<16 x i8> %38, i32 328) %61 = call float @llvm.SI.load.const(<16 x i8> %38, i32 348) %62 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0 %64 = call float @llvm.SI.load.const(<16 x i8> %63, i32 0) %65 = call float @llvm.SI.load.const(<16 x i8> %63, i32 4) %66 = call float @llvm.SI.load.const(<16 x i8> %63, i32 8) %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = extractelement <4 x float> %74, i32 2 %78 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %79 = load <16 x i8>, <16 x i8> addrspace(2)* %78, align 16, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = extractelement <4 x float> %81, i32 2 %85 = extractelement <4 x float> %81, i32 3 %86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0 %88 = add i32 %5, %7 %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %88) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0 %94 = add i32 %5, %7 %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0 %101 = add i32 %5, %7 %102 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %100, i32 0, i32 %101) %103 = extractelement <4 x float> %102, i32 0 %104 = extractelement <4 x float> %102, i32 1 %105 = extractelement <4 x float> %102, i32 2 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = add i32 %5, %7 %109 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %108) %110 = extractelement <4 x float> %109, i32 0 %111 = extractelement <4 x float> %109, i32 1 %112 = extractelement <4 x float> %109, i32 2 %113 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 6 %114 = load <16 x i8>, <16 x i8> addrspace(2)* %113, align 16, !tbaa !0 %115 = add i32 %10, %6 %116 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %114, i32 0, i32 %115) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = bitcast float %117 to i32 %120 = bitcast float %118 to i32 %121 = insertelement <4 x i32> undef, i32 %119, i32 0 %122 = insertelement <4 x i32> %121, i32 %120, i32 1 %123 = insertelement <4 x i32> %122, i32 0, i32 2 %124 = bitcast <8 x i32> %68 to <32 x i8> %125 = bitcast <4 x i32> %70 to <16 x i8> %126 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %123, <32 x i8> %124, <16 x i8> %125, i32 2) %127 = extractelement <4 x float> %126, i32 0 %128 = extractelement <4 x float> %126, i32 1 %129 = extractelement <4 x float> %126, i32 2 %130 = extractelement <4 x float> %126, i32 3 %131 = bitcast float %117 to i32 %132 = bitcast float %118 to i32 %133 = insertelement <4 x i32> , i32 %131, i32 1 %134 = insertelement <4 x i32> %133, i32 %132, i32 2 %135 = insertelement <4 x i32> %134, i32 0, i32 3 %136 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %135, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %137 = extractelement <4 x float> %136, i32 0 %138 = extractelement <4 x float> %136, i32 1 %139 = extractelement <4 x float> %136, i32 2 %140 = extractelement <4 x float> %136, i32 3 %141 = bitcast float %117 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <4 x i32> , i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 %142, i32 2 %145 = insertelement <4 x i32> %144, i32 0, i32 3 %146 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %145, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %82, 2.550000e+02 %152 = fadd float %151, -1.280000e+02 %153 = fmul float %83, 2.550000e+02 %154 = fadd float %153, -1.280000e+02 %155 = fmul float %84, 2.550000e+02 %156 = fadd float %155, -1.280000e+02 %157 = fmul float %85, 2.550000e+02 %158 = fadd float %157, -1.280000e+02 %159 = fcmp olt float %152, 0.000000e+00 %160 = fcmp olt float %154, 0.000000e+00 %161 = fcmp olt float %156, 0.000000e+00 %162 = fcmp olt float %158, 0.000000e+00 %163 = select i1 %159, float 1.000000e+00, float 0.000000e+00 %164 = select i1 %161, float 1.000000e+00, float 0.000000e+00 %165 = select i1 %162, float 1.000000e+00, float 0.000000e+00 %166 = call float @fabs(float %152) %167 = call float @fabs(float %154) %168 = call float @fabs(float %156) %169 = call float @fabs(float %158) %170 = fsub float %166, %163 %171 = select i1 %160, float -1.000000e+00, float -0.000000e+00 %172 = fadd float %167, %171 %173 = fsub float %168, %164 %174 = fsub float %169, %165 %175 = fadd float %170, -6.400000e+01 %176 = fadd float %172, -6.400000e+01 %177 = fadd float %173, -6.400000e+01 %178 = fadd float %174, -6.400000e+01 %179 = fcmp olt float %175, 0.000000e+00 %180 = fcmp olt float %176, 0.000000e+00 %181 = fcmp olt float %177, 0.000000e+00 %182 = fcmp olt float %178, 0.000000e+00 %183 = select i1 %179, float 1.000000e+00, float 0.000000e+00 %184 = select i1 %180, float 1.000000e+00, float 0.000000e+00 %185 = select i1 %181, float 1.000000e+00, float 0.000000e+00 %186 = select i1 %182, float 1.000000e+00, float 0.000000e+00 %187 = call float @fabs(float %175) %188 = call float @fabs(float %176) %189 = call float @fabs(float %177) %190 = call float @fabs(float %178) %191 = fsub float %187, %183 %192 = fsub float %188, %184 %193 = fsub float %189, %185 %194 = fsub float %190, %186 %195 = fmul float %191, 0x3F90410420000000 %196 = fmul float %192, 0x3F90410420000000 %197 = fmul float %193, 0x3F90410420000000 %198 = fmul float %194, 0x3F90410420000000 %199 = fmul float %183, 2.000000e+00 %200 = fmul float %184, 2.000000e+00 %201 = fmul float %185, 2.000000e+00 %202 = fmul float %186, 2.000000e+00 %203 = fsub float 1.000000e+00, %199 %204 = fsub float 1.000000e+00, %200 %205 = fsub float 1.000000e+00, %201 %206 = fsub float 1.000000e+00, %202 %207 = fmul float %163, 2.000000e+00 %208 = fmul float %164, 2.000000e+00 %209 = fmul float %165, 2.000000e+00 %210 = fsub float 1.000000e+00, %207 %211 = fsub float 1.000000e+00, %208 %212 = fsub float 1.000000e+00, %209 %213 = fsub float 1.000000e+00, %195 %214 = fsub float %213, %196 %215 = fmul float %195, %195 %216 = fmul float %196, %196 %217 = fadd float %216, %215 %218 = fmul float %214, %214 %219 = fadd float %217, %218 %220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219) %221 = fmul float %195, %220 %222 = fmul float %196, %220 %223 = fmul float %214, %220 %224 = fmul float %221, %203 %225 = fmul float %222, %204 %226 = fsub float 1.000000e+00, %197 %227 = fsub float %226, %198 %228 = fmul float %197, %197 %229 = fmul float %198, %198 %230 = fadd float %229, %228 %231 = fmul float %227, %227 %232 = fadd float %230, %231 %233 = call float @llvm.AMDGPU.rsq.clamped.f32(float %232) %234 = fmul float %197, %233 %235 = fmul float %198, %233 %236 = fmul float %227, %233 %237 = fmul float %234, %205 %238 = fmul float %235, %206 %239 = fmul float %223, %210 %240 = fmul float %224, %127 %241 = fmul float %225, %128 %242 = fadd float %240, %241 %243 = fmul float %239, %129 %244 = fadd float %242, %243 %245 = fmul float %130, 0.000000e+00 %246 = fadd float %244, %245 %247 = fmul float %224, %137 %248 = fmul float %225, %138 %249 = fadd float %247, %248 %250 = fmul float %239, %139 %251 = fadd float %249, %250 %252 = fmul float %140, 0.000000e+00 %253 = fadd float %251, %252 %254 = fmul float %224, %147 %255 = fmul float %225, %148 %256 = fadd float %254, %255 %257 = fmul float %239, %149 %258 = fadd float %256, %257 %259 = fmul float %150, 0.000000e+00 %260 = fadd float %258, %259 %261 = fmul float %246, %246 %262 = fmul float %253, %253 %263 = fadd float %262, %261 %264 = fmul float %260, %260 %265 = fadd float %263, %264 %266 = call float @llvm.AMDGPU.rsq.clamped.f32(float %265) %267 = fmul float %246, %266 %268 = fmul float %253, %266 %269 = fmul float %260, %266 %270 = fmul float %236, %211 %271 = fmul float %237, %127 %272 = fmul float %238, %128 %273 = fadd float %271, %272 %274 = fmul float %270, %129 %275 = fadd float %273, %274 %276 = fmul float %130, 0.000000e+00 %277 = fadd float %275, %276 %278 = fmul float %237, %137 %279 = fmul float %238, %138 %280 = fadd float %278, %279 %281 = fmul float %270, %139 %282 = fadd float %280, %281 %283 = fmul float %140, 0.000000e+00 %284 = fadd float %282, %283 %285 = fmul float %237, %147 %286 = fmul float %238, %148 %287 = fadd float %285, %286 %288 = fmul float %270, %149 %289 = fadd float %287, %288 %290 = fmul float %150, 0.000000e+00 %291 = fadd float %289, %290 %292 = fmul float %277, %267 %293 = fmul float %284, %268 %294 = fadd float %293, %292 %295 = fmul float %291, %269 %296 = fadd float %294, %295 %297 = fmul float %296, %267 %298 = fmul float %296, %268 %299 = fmul float %296, %269 %300 = fsub float %277, %297 %301 = fsub float %284, %298 %302 = fsub float %291, %299 %303 = fmul float %300, %300 %304 = fmul float %301, %301 %305 = fadd float %304, %303 %306 = fmul float %302, %302 %307 = fadd float %305, %306 %308 = call float @llvm.AMDGPU.rsq.clamped.f32(float %307) %309 = fmul float %300, %308 %310 = fmul float %301, %308 %311 = fmul float %302, %308 %312 = fmul float %75, %127 %313 = fmul float %76, %128 %314 = fadd float %312, %313 %315 = fmul float %77, %129 %316 = fadd float %314, %315 %317 = fadd float %316, %130 %318 = fmul float %75, %137 %319 = fmul float %76, %138 %320 = fadd float %318, %319 %321 = fmul float %77, %139 %322 = fadd float %320, %321 %323 = fadd float %322, %140 %324 = fmul float %75, %147 %325 = fmul float %76, %148 %326 = fadd float %324, %325 %327 = fmul float %77, %149 %328 = fadd float %326, %327 %329 = fadd float %328, %150 %330 = fsub float %317, %55 %331 = fsub float %323, %56 %332 = fsub float %329, %57 %333 = fmul float %58, %330 %334 = fmul float %59, %331 %335 = fadd float %334, %333 %336 = fmul float %60, %332 %337 = fadd float %335, %336 %338 = fmul float %330, %330 %339 = fmul float %331, %331 %340 = fadd float %339, %338 %341 = fmul float %332, %332 %342 = fadd float %340, %341 %343 = call float @llvm.AMDGPU.rsq.clamped.f32(float %342) %344 = fmul float %330, %343 %345 = fmul float %331, %343 %346 = fmul float %332, %343 %347 = fmul float %267, %64 %348 = fmul float %268, %65 %349 = fadd float %348, %347 %350 = fmul float %269, %66 %351 = fadd float %349, %350 %352 = fmul float %351, %267 %353 = fmul float %351, %268 %354 = fmul float %351, %269 %355 = fmul float %352, 2.000000e+00 %356 = fmul float %353, 2.000000e+00 %357 = fmul float %354, 2.000000e+00 %358 = fsub float %64, %355 %359 = fsub float %65, %356 %360 = fsub float %66, %357 %361 = fmul float %344, %358 %362 = fsub float -0.000000e+00, %361 %363 = fmul float %345, %359 %364 = fsub float %362, %363 %365 = fmul float %346, %360 %366 = fsub float %364, %365 %367 = call float @llvm.AMDIL.clamp.(float %366, float 0.000000e+00, float 1.000000e+00) %368 = call float @llvm.pow.f32(float %367, float 1.600000e+01) %369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %370 = fmul float %317, %40 %371 = fmul float %323, %44 %372 = fadd float %370, %371 %373 = fmul float %329, %48 %374 = fadd float %372, %373 %375 = fadd float %374, %52 %376 = fmul float %317, %41 %377 = fmul float %323, %45 %378 = fadd float %376, %377 %379 = fmul float %329, %49 %380 = fadd float %378, %379 %381 = fadd float %380, %53 %382 = fmul float %317, %42 %383 = fmul float %323, %46 %384 = fadd float %382, %383 %385 = fmul float %329, %50 %386 = fadd float %384, %385 %387 = fadd float %386, %54 %388 = fmul float %317, %39 %389 = fmul float %323, %43 %390 = fadd float %388, %389 %391 = fmul float %329, %47 %392 = fadd float %390, %391 %393 = fadd float %392, %51 %394 = fmul float %16, 0x3F91DF4720000000 %395 = fadd float %90, -5.000000e-01 %396 = fadd float %91, -5.000000e-01 %397 = call float @llvm.cos.f32(float %394) %398 = call float @llvm.sin.f32(float %394) %399 = fmul float %398, %396 %400 = fmul float %397, %395 %401 = fsub float %400, %399 %402 = fmul float %397, %396 %403 = fmul float %398, %395 %404 = fadd float %403, %402 %405 = fmul float %13, %401 %406 = fadd float %405, 5.000000e-01 %407 = fmul float %13, %404 %408 = fadd float %407, 5.000000e-01 %409 = fmul float %61, %19 %410 = fadd float %409, %406 %411 = fmul float %61, %20 %412 = fadd float %411, %408 %413 = fadd float %410, %25 %414 = fadd float %412, %26 %415 = fmul float %17, 0x3F91DF4720000000 %416 = call float @llvm.cos.f32(float %415) %417 = call float @llvm.sin.f32(float %415) %418 = fmul float %417, %396 %419 = fmul float %416, %395 %420 = fsub float %419, %418 %421 = fmul float %417, %395 %422 = fmul float %416, %396 %423 = fadd float %422, %421 %424 = fmul float %420, %14 %425 = fadd float %424, 5.000000e-01 %426 = fmul float %423, %14 %427 = fadd float %426, 5.000000e-01 %428 = fmul float %61, %21 %429 = fadd float %428, %425 %430 = fmul float %61, %22 %431 = fadd float %430, %427 %432 = fadd float %429, %27 %433 = fadd float %431, %28 %434 = fmul float %18, 0x3F91DF4720000000 %435 = call float @llvm.cos.f32(float %434) %436 = call float @llvm.sin.f32(float %434) %437 = fmul float %436, %396 %438 = fmul float %435, %395 %439 = fsub float %438, %437 %440 = fmul float %436, %395 %441 = fmul float %435, %396 %442 = fadd float %441, %440 %443 = fmul float %439, %15 %444 = fadd float %443, 5.000000e-01 %445 = fmul float %442, %15 %446 = fadd float %445, 5.000000e-01 %447 = fmul float %61, %23 %448 = fadd float %447, %444 %449 = fmul float %61, %24 %450 = fadd float %449, %446 %451 = fadd float %448, %29 %452 = fadd float %450, %30 %453 = fmul float %32, 0x3F91DF4720000000 %454 = call float @llvm.cos.f32(float %453) %455 = call float @llvm.sin.f32(float %453) %456 = fmul float %455, %396 %457 = fmul float %454, %395 %458 = fsub float %457, %456 %459 = fmul float %455, %395 %460 = fmul float %454, %396 %461 = fadd float %460, %459 %462 = fmul float %31, %458 %463 = fadd float %462, 5.000000e-01 %464 = fmul float %31, %461 %465 = fadd float %464, 5.000000e-01 %466 = fmul float %61, %33 %467 = fadd float %466, %463 %468 = fmul float %61, %34 %469 = fadd float %468, %465 %470 = fadd float %467, %35 %471 = fadd float %469, %36 %472 = bitcast float %117 to i32 %473 = bitcast float %118 to i32 %474 = insertelement <4 x i32> , i32 %472, i32 1 %475 = insertelement <4 x i32> %474, i32 %473, i32 2 %476 = insertelement <4 x i32> %475, i32 0, i32 3 %477 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %476, <8 x i32> %68, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %478 = extractelement <4 x float> %477, i32 0 %479 = extractelement <4 x float> %477, i32 1 %480 = extractelement <4 x float> %477, i32 2 %481 = extractelement <4 x float> %477, i32 3 %482 = fmul float %103, 5.000000e-01 %483 = fmul float %104, 5.000000e-01 %484 = fmul float %105, 5.000000e-01 %485 = call float @llvm.maxnum.f32(float %482, float 0x3F1A36E2E0000000) %486 = call float @llvm.maxnum.f32(float %483, float 0x3F1A36E2E0000000) %487 = call float @llvm.maxnum.f32(float %484, float 0x3F1A36E2E0000000) %488 = fmul float %110, 0x3FB3D07220000000 %489 = fmul float %111, 0x3FB3D07220000000 %490 = fmul float %112, 0x3FB3D07220000000 %491 = fmul float %110, 0x3FEE54EDE0000000 %492 = fadd float %491, 0x3FAAB12320000000 %493 = fmul float %111, 0x3FEE54EDE0000000 %494 = fadd float %493, 0x3FAAB12320000000 %495 = fmul float %112, 0x3FEE54EDE0000000 %496 = fadd float %495, 0x3FAAB12320000000 %497 = call float @llvm.pow.f32(float %492, float 0x4003333340000000) %498 = call float @llvm.pow.f32(float %494, float 0x4003333340000000) %499 = call float @llvm.pow.f32(float %496, float 0x4003333340000000) %500 = fcmp ogt float %110, 0x3FA4B5DCC0000000 %. = select i1 %500, float %497, float %488 %501 = fcmp ogt float %111, 0x3FA4B5DCC0000000 %temp60.0 = select i1 %501, float %498, float %489 %502 = fcmp ogt float %112, 0x3FA4B5DCC0000000 %.74 = select i1 %502, float %499, float %490 %503 = fmul float %., %478 %504 = fmul float %temp60.0, %479 %505 = fmul float %.74, %480 %506 = fsub float -0.000000e+00, %375 %507 = fmul float %381, 2.000000e+00 %508 = fsub float %507, %387 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %317, float %323, float %329, float %337) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %267, float %268, float %269, float %369) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %503, float %504, float %505, float %481) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %96, float %97, float %98, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %485, float %486, float %487, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %309, float %310, float %311, float %212) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %413, float %414, float %432, float %433) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %451, float %452, float %470, float %471) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %393, float %506, float %508, float %387) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v2, 0xc3000000 ; 7E0402FF C3000000 v_mov_b32_e32 v18, 0x437f0000 ; 7E2402FF 437F0000 v_mov_b32_e32 v19, 0x80000000 ; 7E2602FF 80000000 v_mov_b32_e32 v20, 0xc2800000 ; 7E2802FF C2800000 v_mov_b32_e32 v1, 0x3c820821 ; 7E0202FF 3C820821 v_mov_b32_e32 v21, 0x3d558919 ; 7E2A02FF 3D558919 v_add_i32_e32 v4, s10, v0 ; 4A08000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 v_mov_b32_e32 v22, 0x3f72a76f ; 7E2C02FF 3F72A76F v_mov_b32_e32 v0, 0x3b360b65 ; 7E0002FF 3B360B65 v_mov_b32_e32 v23, 0x4019999a ; 7E2E02FF 4019999A v_mov_b32_e32 v24, 0x3d9e8391 ; 7E3002FF 3D9E8391 v_mov_b32_e32 v25, 0x3d25aee6 ; 7E3202FF 3D25AEE6 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[36:39], s[8:9], 0xc ; C092090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[48:51], s[8:9], 0x18 ; C0980918 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[52:55], s[2:3], 0x14 ; C09A0314 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[26:29], v4, s[12:15], 0 idxen ; E00C2000 80031A04 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[29:32], v4, s[16:19], 0 idxen ; E00C2000 80041D04 buffer_load_format_xyzw v[8:11], v4, s[20:23], 0 idxen ; E00C2000 80050804 buffer_load_format_xyzw v[14:17], v4, s[36:39], 0 idxen ; E00C2000 80090E04 s_waitcnt vmcnt(1) ; BF8C0771 buffer_load_format_xyzw v[10:13], v4, s[40:43], 0 idxen ; E00C2000 800A0A04 buffer_load_format_xyzw v[33:36], v4, s[44:47], 0 idxen ; E00C2000 800B2104 buffer_load_format_xyzw v[4:7], v3, s[48:51], 0 idxen ; E00C2000 800C0403 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_buffer_load_dword s17, s[52:55], 0x0 ; C208B500 s_buffer_load_dword s16, s[52:55], 0x1 ; C2083501 s_buffer_load_dword s15, s[52:55], 0x2 ; C207B502 s_buffer_load_dword s13, s[8:11], 0x51 ; C2068951 s_buffer_load_dword s14, s[8:11], 0x52 ; C2070952 s_buffer_load_dword s12, s[8:11], 0x57 ; C2060957 s_buffer_load_dword s19, s[8:11], 0x4c ; C209894C s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D s_buffer_load_dword s20, s[8:11], 0x4e ; C20A094E s_buffer_load_dword s18, s[8:11], 0x50 ; C2090950 image_sample_l v[36:39], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[28:35], s[24:27] ; F0900F00 00C72404 v_mov_b32_e32 v3, 0x10001 ; 7E0602FF 00010001 image_sample_l_o v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72803 v_mov_b32_e32 v3, 0x20002 ; 7E0602FF 00020002 image_sample_l_o v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C72C03 v_mov_b32_e32 v3, 0x30003 ; 7E0602FF 00030003 image_sample_l_o v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[24:27] ; F0D00F00 00C73003 exp 15, 32, 0, 0, 0, v6, v6, v6, v6 ; F800020F 06060606 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v3, v37, v27 ; 10063725 v_mad_f32 v4, v18, v29, v2 ; D2820004 040A3B12 v_mad_f32 v5, v18, v30, v2 ; D2820005 040A3D12 v_mad_f32 v7, v18, v31, v2 ; D2820007 040A3F12 v_mac_f32_e32 v2, v18, v32 ; 3E044112 v_mad_f32 v13, v22, v33, v21 ; D282000D 04564316 v_mad_f32 v17, v22, v34, v21 ; D2820011 04564516 v_mac_f32_e32 v21, v22, v35 ; 3E2A4716 v_cmp_gt_f32_e32 vcc, v33, v25 ; 7C083321 v_cmp_gt_f32_e64 s[0:1], v34, v25 ; D0080000 00023322 v_cmp_gt_f32_e64 s[4:5], v35, v25 ; D0080004 00023323 v_cmp_gt_f32_e64 s[6:7], 0, v5 ; D0080006 00020A80 v_cndmask_b32_e64 v18, v19, -1.0, s[6:7] ; D2000012 0019E713 v_add_f32_e64 v5, |v5|, v18 ; D2060105 00022505 v_cmp_gt_f32_e64 s[6:7], 0, v4 ; D0080006 00020880 v_cndmask_b32_e64 v18, 0, 1.0, s[6:7] ; D2000012 0019E480 v_sub_f32_e64 v4, |v4|, v18 ; D2080104 00022504 v_add_f32_e32 v4, v20, v4 ; 06080914 v_add_f32_e32 v5, v20, v5 ; 060A0B14 v_cmp_gt_f32_e64 s[6:7], 0, v4 ; D0080006 00020880 v_cndmask_b32_e64 v19, 0, 1.0, s[6:7] ; D2000013 0019E480 v_sub_f32_e64 v22, v19, |v4| ; D2080216 00020913 v_mad_f32 v22, v22, v1, 1.0 ; D2820016 03CA0316 v_cmp_gt_f32_e64 s[6:7], 0, v5 ; D0080006 00020A80 v_cndmask_b32_e64 v25, 0, 1.0, s[6:7] ; D2000019 0019E480 v_sub_f32_e64 v29, v25, |v5| ; D208021D 00020B19 v_mac_f32_e32 v22, v1, v29 ; 3E2C3B01 v_sub_f32_e64 v4, |v4|, v19 ; D2080104 00022704 v_sub_f32_e64 v5, |v5|, v25 ; D2080105 00023305 v_mul_f32_e32 v4, v1, v4 ; 10080901 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mul_f32_e32 v29, v4, v4 ; 103A0904 v_mac_f32_e32 v29, v5, v5 ; 3E3A0B05 v_mac_f32_e32 v29, v22, v22 ; 3E3A2D16 v_rsq_clamp_f32_e32 v29, v29 ; 7E3A591D v_mul_f32_e32 v30, v24, v33 ; 103C4318 v_mul_f32_e32 v31, v24, v34 ; 103E4518 v_mul_f32_e32 v24, v24, v35 ; 10304718 v_mul_f32_e32 v4, v29, v4 ; 1008091D v_mul_f32_e32 v5, v29, v5 ; 100A0B1D v_mul_f32_e32 v22, v29, v22 ; 102C2D1D v_mad_f32 v19, -2.0, v19, 1.0 ; D2820013 03CA26F5 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_mad_f32 v19, -2.0, v25, 1.0 ; D2820013 03CA32F5 v_mul_f32_e32 v5, v19, v5 ; 100A0B13 v_mad_f32 v18, -2.0, v18, 1.0 ; D2820012 03CA24F5 v_mul_f32_e32 v18, v18, v22 ; 10242D12 v_mul_f32_e32 v19, v37, v5 ; 10260B25 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v22, v41, v5 ; 102C0B29 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v5, v45, v5 ; 100A0B2D v_mac_f32_e32 v19, v36, v4 ; 3E260924 v_mac_f32_e32 v22, v40, v4 ; 3E2C0928 v_mac_f32_e32 v5, v44, v4 ; 3E0A092C v_mac_f32_e32 v19, v38, v18 ; 3E262526 v_mac_f32_e32 v22, v42, v18 ; 3E2C252A v_mac_f32_e32 v5, v46, v18 ; 3E0A252E v_mac_f32_e32 v3, v36, v26 ; 3E063524 v_mul_f32_e32 v4, v41, v27 ; 10083729 v_mac_f32_e32 v4, v40, v26 ; 3E083528 v_mul_f32_e32 v18, v45, v27 ; 1024372D v_mac_f32_e32 v18, v44, v26 ; 3E24352C v_mul_f32_e32 v25, v19, v19 ; 10322713 v_mac_f32_e32 v25, v22, v22 ; 3E322D16 v_mac_f32_e32 v25, v5, v5 ; 3E320B05 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mac_f32_e32 v3, v38, v28 ; 3E063926 v_mac_f32_e32 v4, v42, v28 ; 3E08392A v_mac_f32_e32 v18, v46, v28 ; 3E24392E v_mul_f32_e32 v19, v25, v19 ; 10262719 v_mul_f32_e32 v22, v25, v22 ; 102C2D19 v_mul_f32_e32 v5, v25, v5 ; 100A0B19 v_add_f32_e32 v3, v39, v3 ; 06060727 v_add_f32_e32 v4, v43, v4 ; 0608092B v_add_f32_e32 v18, v47, v18 ; 0624252F v_subrev_f32_e32 v25, s19, v3 ; 0A320613 v_subrev_f32_e32 v26, s21, v4 ; 0A340815 v_mul_f32_e32 v27, v25, v25 ; 10363319 v_mac_f32_e32 v27, v26, v26 ; 3E36351A v_subrev_f32_e32 v28, s20, v18 ; 0A382414 v_mac_f32_e32 v27, v28, v28 ; 3E36391C v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v29, s17, v19 ; 103A2611 v_mac_f32_e32 v29, s16, v22 ; 3E3A2C10 v_mac_f32_e32 v29, s15, v5 ; 3E3A0A0F v_mul_f32_e32 v32, v19, v29 ; 10403B13 v_mad_f32 v32, 2.0, v32, -s17 ; D2820020 804640F4 v_mul_f32_e32 v33, v27, v25 ; 1042331B v_mul_f32_e32 v32, v32, v33 ; 10404320 v_mul_f32_e32 v33, v22, v29 ; 10423B16 v_mad_f32 v33, 2.0, v33, -s16 ; D2820021 804242F4 v_mul_f32_e32 v34, v27, v26 ; 1044351B v_mac_f32_e32 v32, v33, v34 ; 3E404521 v_mul_f32_e32 v29, v5, v29 ; 103A3B05 v_mad_f32 v29, 2.0, v29, -s15 ; D282001D 803E3AF4 v_mul_f32_e32 v27, v27, v28 ; 1036391B v_mac_f32_e32 v32, v29, v27 ; 3E40371D v_log_f32_e32 v13, v13 ; 7E1A4F0D v_log_f32_e32 v17, v17 ; 7E224F11 v_log_f32_e32 v21, v21 ; 7E2A4F15 v_cmp_gt_f32_e64 s[6:7], 0, v7 ; D0080006 00020E80 v_cndmask_b32_e64 v27, 0, 1.0, s[6:7] ; D200001B 0019E480 v_sub_f32_e64 v7, |v7|, v27 ; D2080107 00023707 v_cmp_gt_f32_e64 s[6:7], 0, v2 ; D0080006 00020480 v_cndmask_b32_e64 v29, 0, 1.0, s[6:7] ; D200001D 0019E480 v_sub_f32_e64 v2, |v2|, v29 ; D2080102 00023B02 v_add_f32_e32 v7, v20, v7 ; 060E0F14 v_add_f32_e32 v2, v20, v2 ; 06040514 v_mul_legacy_f32_e32 v13, v23, v13 ; 0E1A1B17 v_mul_legacy_f32_e32 v17, v23, v17 ; 0E222317 v_mul_legacy_f32_e32 v20, v23, v21 ; 0E282B17 v_mul_f32_e32 v21, s18, v25 ; 102A3212 v_mac_f32_e32 v21, s13, v26 ; 3E2A340D v_mac_f32_e32 v21, s14, v28 ; 3E2A380E v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_cndmask_b32_e32 v13, v30, v13 ; 001A1B1E v_exp_f32_e32 v17, v17 ; 7E224B11 v_cndmask_b32_e64 v17, v31, v17, s[0:1] ; D2000011 0002231F v_exp_f32_e32 v20, v20 ; 7E284B14 v_cndmask_b32_e64 v20, v24, v20, s[4:5] ; D2000014 00122918 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v23, 0, 1.0, vcc ; D2000017 01A9E480 v_sub_f32_e64 v24, v23, |v7| ; D2080218 00020F17 v_mad_f32 v24, v24, v1, 1.0 ; D2820018 03CA0318 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v25, 0, 1.0, vcc ; D2000019 01A9E480 v_sub_f32_e64 v26, v25, |v2| ; D208021A 00020519 v_mac_f32_e32 v24, v1, v26 ; 3E303501 v_sub_f32_e64 v7, |v7|, v23 ; D2080107 00022F07 v_sub_f32_e64 v2, |v2|, v25 ; D2080102 00023302 v_add_f32_e64 v26, 0, v32 clamp ; D206081A 00024080 v_log_f32_e32 v26, v26 ; 7E344F1A v_mul_f32_e32 v7, v1, v7 ; 100E0F01 v_mul_f32_e32 v1, v1, v2 ; 10020501 exp 15, 33, 0, 0, 0, v3, v4, v18, v21 ; F800021F 15120403 v_mul_legacy_f32_e32 v2, 0x41800000, v26 ; 0E0434FF 41800000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 exp 15, 34, 0, 0, 0, v19, v22, v5, v2 ; F800022F 02051613 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v2, v48, v13 ; 10041B30 v_mul_f32_e32 v13, v7, v7 ; 101A0F07 v_mac_f32_e32 v13, v1, v1 ; 3E1A0301 v_mac_f32_e32 v13, v24, v24 ; 3E1A3118 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v17, v49, v17 ; 10222331 v_mul_f32_e32 v20, v50, v20 ; 10282932 exp 15, 35, 0, 0, 0, v2, v17, v20, v51 ; F800023F 33141102 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v13, v7 ; 10040F0D v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mul_f32_e32 v7, v13, v24 ; 100E310D v_mad_f32 v13, -2.0, v23, 1.0 ; D282000D 03CA2EF5 v_mul_f32_e32 v2, v13, v2 ; 1004050D v_mad_f32 v13, -2.0, v25, 1.0 ; D282000D 03CA32F5 v_mul_f32_e32 v1, v13, v1 ; 1002030D v_mad_f32 v13, -2.0, v27, 1.0 ; D282000D 03CA36F5 v_mul_f32_e32 v7, v13, v7 ; 100E0F0D v_mul_f32_e32 v13, v37, v1 ; 101A0325 v_mac_f32_e32 v13, v36, v2 ; 3E1A0524 v_mac_f32_e32 v13, v38, v7 ; 3E1A0F26 v_mul_f32_e32 v17, v41, v1 ; 10220329 v_mac_f32_e32 v17, v40, v2 ; 3E220528 v_mac_f32_e32 v17, v42, v7 ; 3E220F2A v_mul_f32_e32 v1, v45, v1 ; 1002032D v_mac_f32_e32 v1, v44, v2 ; 3E02052C v_mac_f32_e32 v1, v46, v7 ; 3E020F2E exp 15, 36, 0, 0, 0, v14, v15, v16, v6 ; F800024F 06100F0E v_mul_f32_e32 v2, 0.5, v10 ; 100414F0 v_mul_f32_e32 v7, 0.5, v11 ; 100E16F0 v_mul_f32_e32 v10, 0.5, v12 ; 101418F0 v_mul_f32_e32 v11, v19, v13 ; 10161B13 v_mac_f32_e32 v11, v22, v17 ; 3E162316 v_mac_f32_e32 v11, v5, v1 ; 3E160305 v_mad_f32 v12, -v11, v19, v13 ; D282000C 2436270B v_mad_f32 v13, -v11, v22, v17 ; D282000D 24462D0B s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_mad_f32 v1, -v11, v5, v1 ; D2820001 24060B0B v_mov_b32_e32 v5, 0x38d1b717 ; 7E0A02FF 38D1B717 v_max_f32_e32 v2, v5, v2 ; 20040505 v_max_f32_e32 v7, v5, v7 ; 200E0F05 v_mul_f32_e32 v11, v12, v12 ; 1016190C v_mac_f32_e32 v11, v13, v13 ; 3E161B0D v_mac_f32_e32 v11, v1, v1 ; 3E160301 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_max_f32_e32 v5, v5, v10 ; 200A1505 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 exp 15, 37, 0, 0, 0, v2, v7, v5, v6 ; F800025F 06050702 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, v11, v12 ; 1004190B v_mul_f32_e32 v5, v11, v13 ; 100A1B0B v_mul_f32_e32 v1, v11, v1 ; 1002030B v_mad_f32 v7, -2.0, v29, 1.0 ; D2820007 03CA3AF5 exp 15, 38, 0, 0, 0, v2, v5, v1, v7 ; F800026F 07010502 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, -0.5, v8 ; 060210F1 v_add_f32_e32 v2, -0.5, v9 ; 060412F1 v_mul_f32_e32 v5, s4, v0 ; 100A0004 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_cos_f32_e32 v7, v5 ; 7E0E6D05 v_sin_f32_e32 v5, v5 ; 7E0A6B05 v_mul_f32_e32 v8, v2, v5 ; 10100B02 v_mad_f32 v8, v7, v1, -v8 ; D2820008 84220307 v_mul_f32_e32 v7, v2, v7 ; 100E0F02 v_mac_f32_e32 v7, v1, v5 ; 3E0E0B01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s5, v0 ; 100A0005 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_cos_f32_e32 v9, v5 ; 7E126D05 v_sin_f32_e32 v5, v5 ; 7E0A6B05 v_mul_f32_e32 v10, v2, v5 ; 10140B02 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mad_f32 v10, v9, v1, -v10 ; D282000A 842A0309 v_mac_f32_e32 v5, v2, v9 ; 3E0A1302 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s13, s[0:3], 0xd ; C206810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, s4, v8, 0.5 ; D2820008 03C21004 v_mad_f32 v7, s4, v7, 0.5 ; D2820007 03C20E04 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113 s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114 v_mad_f32 v9, v10, s5, 0.5 ; D2820009 03C00B0A v_mad_f32 v5, v5, s5, 0.5 ; D2820005 03C00B05 v_mov_b32_e32 v10, s7 ; 7E140207 v_mac_f32_e32 v8, s12, v10 ; 3E10140C v_mov_b32_e32 v10, s13 ; 7E14020D v_mac_f32_e32 v7, s12, v10 ; 3E0E140C v_mov_b32_e32 v10, s14 ; 7E14020E v_mac_f32_e32 v9, s12, v10 ; 3E12140C v_mov_b32_e32 v10, s15 ; 7E14020F s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 v_mac_f32_e32 v5, s12, v10 ; 3E0A140C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s4, v8 ; 06101004 v_add_f32_e32 v7, s16, v7 ; 060E0E10 v_add_f32_e32 v9, s17, v9 ; 06121211 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116 s_buffer_load_dword s13, s[0:3], 0x17 ; C2068117 s_buffer_load_dword s14, s[0:3], 0x18 ; C2070118 s_buffer_load_dword s15, s[0:3], 0x19 ; C2078119 v_add_f32_e32 v5, s5, v5 ; 060A0A05 exp 15, 39, 0, 0, 0, v8, v7, v9, v5 ; F800027F 05090708 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111 s_buffer_load_dword s17, s[0:3], 0x1a ; C208811A s_buffer_load_dword s18, s[0:3], 0x1b ; C209011B s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C s_buffer_load_dword s0, s[0:3], 0x1d ; C200011D s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v5, s4, v0 ; 100A0004 v_mul_f32_e32 v0, s15, v0 ; 1000000F v_fract_f32_e32 v5, v5 ; 7E0A4105 v_cos_f32_e32 v7, v5 ; 7E0E6D05 v_sin_f32_e32 v5, v5 ; 7E0A6B05 v_mul_f32_e32 v8, v2, v5 ; 10100B02 v_mul_f32_e32 v5, v1, v5 ; 100A0B01 v_mad_f32 v8, v7, v1, -v8 ; D2820008 84220307 v_mac_f32_e32 v5, v2, v7 ; 3E0A0F02 v_fract_f32_e32 v0, v0 ; 7E004100 v_cos_f32_e32 v7, v0 ; 7E0E6D00 v_sin_f32_e32 v0, v0 ; 7E006B00 v_mul_f32_e32 v9, v2, v0 ; 10120102 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v1, v7, v1, -v9 ; D2820001 84260307 v_mac_f32_e32 v0, v2, v7 ; 3E000F02 v_mad_f32 v2, v8, s6, 0.5 ; D2820002 03C00D08 v_mad_f32 v5, v5, s6, 0.5 ; D2820005 03C00D05 v_mad_f32 v1, s14, v1, 0.5 ; D2820001 03C2020E v_mad_f32 v0, s14, v0, 0.5 ; D2820000 03C2000E v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mac_f32_e32 v2, s12, v7 ; 3E040E0C v_mov_b32_e32 v7, s16 ; 7E0E0210 v_mac_f32_e32 v5, s12, v7 ; 3E0A0E0C v_mov_b32_e32 v7, s17 ; 7E0E0211 v_mac_f32_e32 v1, s12, v7 ; 3E020E0C v_mov_b32_e32 v7, s18 ; 7E0E0212 v_mac_f32_e32 v0, s12, v7 ; 3E000E0C v_add_f32_e32 v2, s7, v2 ; 06040407 v_add_f32_e32 v5, s13, v5 ; 060A0A0D v_add_f32_e32 v1, s19, v1 ; 06020213 v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 40, 0, 0, 0, v2, v5, v1, v0 ; F800028F 00010502 s_buffer_load_dword s0, s[8:11], 0xf ; C200090F s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900 s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901 s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902 s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903 s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904 s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905 s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906 s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907 s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908 s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909 s_buffer_load_dword s15, s[8:11], 0xa ; C207890A s_buffer_load_dword s16, s[8:11], 0xb ; C208090B s_buffer_load_dword s17, s[8:11], 0xc ; C208890C s_buffer_load_dword s18, s[8:11], 0xd ; C209090D s_buffer_load_dword s8, s[8:11], 0xe ; C204090E s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s6, v4 ; 10000806 v_mul_f32_e32 v1, s7, v4 ; 10020807 v_mul_f32_e32 v2, s12, v4 ; 1004080C v_mul_f32_e32 v4, s5, v4 ; 10080805 v_mac_f32_e32 v0, s2, v3 ; 3E000602 v_mac_f32_e32 v1, s3, v3 ; 3E020603 v_mac_f32_e32 v2, s4, v3 ; 3E040604 v_mac_f32_e32 v4, s1, v3 ; 3E080601 v_mac_f32_e32 v0, s14, v18 ; 3E00240E v_mac_f32_e32 v1, s15, v18 ; 3E02240F v_mac_f32_e32 v2, s16, v18 ; 3E042410 v_mac_f32_e32 v4, s13, v18 ; 3E08240D v_add_f32_e32 v0, s18, v0 ; 06000012 v_add_f32_e32 v1, s8, v1 ; 06020208 v_add_f32_e32 v2, s0, v2 ; 06040400 v_add_f32_e32 v3, s17, v4 ; 06060811 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_mad_f32 v1, 2.0, v1, -v2 ; D2820001 840A02F4 exp 15, 12, 0, 0, 0, v3, v0, v1, v2 ; F80000CF 02010003 exp 15, 13, 0, 1, 0, v6, v6, v6, v6 ; F80008DF 06060606 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 52 Code Size: 1956 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[5], PERSPECTIVE DCL IN[6], GENERIC[6], PERSPECTIVE DCL IN[7], GENERIC[7], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL SVIEW[11], 2D, FLOAT DCL CONST[1][0..24] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 3.0000, 2.0000} IMM[1] UINT32 {0, 64, 48, 32} IMM[2] UINT32 {16, 224, 228, 0} IMM[3] FLT32 { -1.0000, 0.5000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[6].zwww 4: TEX TEMP[1], TEMP[1], SAMP[1], 2D 5: MOV TEMP[2].xy, IN[7].xyyy 6: TEX TEMP[2], TEMP[2], SAMP[2], 2D 7: MOV TEMP[3].xy, IN[7].zwww 8: TEX TEMP[3], TEMP[3], SAMP[3], 2D 9: ADD TEMP[4].x, TEMP[1].wwww, -IN[4].xxxx 10: MAX TEMP[4].x, IMM[0].xxxx, TEMP[4].xxxx 11: ADD TEMP[5].x, IN[3].xxxx, -TEMP[4].xxxx 12: ADD TEMP[6].x, IN[4].xxxx, TEMP[1].wwww 13: MIN TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx 14: ADD TEMP[4].x, TEMP[6].xxxx, -TEMP[4].xxxx 15: RCP TEMP[4].x, TEMP[4].xxxx 16: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 17: MOV_SAT TEMP[4].x, TEMP[4].xxxx 18: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx 19: ADD TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx 20: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx 21: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 22: ADD TEMP[5].x, IMM[0].yyyy, -TEMP[4].xxxx 23: ADD TEMP[6].x, TEMP[2].wwww, -IN[4].yyyy 24: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx 25: ADD TEMP[7].x, IN[3].yyyy, -TEMP[6].xxxx 26: ADD TEMP[8].x, IN[4].yyyy, TEMP[2].wwww 27: MIN TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx 28: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx 29: RCP TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 31: MOV_SAT TEMP[6].x, TEMP[6].xxxx 32: MUL TEMP[7].x, IMM[0].wwww, TEMP[6].xxxx 33: ADD TEMP[7].x, IMM[0].zzzz, -TEMP[7].xxxx 34: MUL TEMP[7].x, TEMP[6].xxxx, TEMP[7].xxxx 35: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 36: MAX TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 37: MIN TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx 38: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[6].xxxx 39: ADD TEMP[7].x, TEMP[3].wwww, -IN[4].zzzz 40: MAX TEMP[7].x, IMM[0].xxxx, TEMP[7].xxxx 41: ADD TEMP[8].x, IN[3].zzzz, -TEMP[7].xxxx 42: ADD TEMP[9].x, IN[4].zzzz, TEMP[3].wwww 43: MIN TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx 44: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 45: RCP TEMP[7].x, TEMP[7].xxxx 46: MUL TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx 47: MOV_SAT TEMP[7].x, TEMP[7].xxxx 48: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 49: ADD TEMP[8].x, IMM[0].zzzz, -TEMP[8].xxxx 50: MUL TEMP[8].x, TEMP[7].xxxx, TEMP[8].xxxx 51: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 52: MAX TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx 53: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 54: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[7].xxxx 55: MUL TEMP[3].xyz, CONST[1][4].xyzz, TEMP[3].xyzz 56: MUL TEMP[2].xyz, CONST[1][3].xyzz, TEMP[2].xyzz 57: MUL TEMP[1].xyz, CONST[1][2].xyzz, TEMP[1].xyzz 58: MOV TEMP[8].xy, IN[6].xyyy 59: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D 60: MUL TEMP[8].xyz, CONST[1][1].xyzz, TEMP[8].xyzz 61: MUL TEMP[8].xyz, TEMP[5].xxxx, TEMP[8].xyzz 62: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[1].xyzz, TEMP[8].xyzz 63: MAD TEMP[1].xyz, TEMP[6].xxxx, TEMP[2].xyzz, TEMP[1].xyzz 64: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[7].xxxx, TEMP[1].xyzz 65: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[2].xyzz 66: MOV TEMP[2].xy, IN[7].zwww 67: TEX TEMP[2].xyz, TEMP[2], SAMP[7], 2D 68: MOV TEMP[3].xy, IN[7].xyyy 69: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 70: MOV TEMP[8].xy, IN[6].zwww 71: TEX TEMP[8].xyz, TEMP[8], SAMP[5], 2D 72: MOV TEMP[9].xy, IN[6].xyyy 73: TEX TEMP[9].xyz, TEMP[9], SAMP[4], 2D 74: MUL TEMP[9].xyz, TEMP[5].xxxx, TEMP[9].xyzz 75: MAD TEMP[8].xyz, TEMP[4].xxxx, TEMP[8].xyzz, TEMP[9].xyzz 76: MAD TEMP[3].xyz, TEMP[6].xxxx, TEMP[3].xyzz, TEMP[8].xyzz 77: MAD TEMP[2].xyz, TEMP[7].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 78: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[5].yzxx 79: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[5].zxyy, -TEMP[3].xyzz 80: MOV TEMP[8].xy, IN[7].zwww 81: TEX TEMP[8], TEMP[8], SAMP[11], 2D 82: MOV TEMP[9].xy, IN[7].xyyy 83: TEX TEMP[9], TEMP[9], SAMP[10], 2D 84: MOV TEMP[10].xy, IN[6].xyyy 85: TEX TEMP[10], TEMP[10], SAMP[8], 2D 86: MOV TEMP[11].xy, IN[6].zwww 87: TEX TEMP[11], TEMP[11], SAMP[9], 2D 88: MUL TEMP[4], TEMP[4].xxxx, TEMP[11] 89: MAD TEMP[4], TEMP[5].xxxx, TEMP[10], TEMP[4] 90: MAD TEMP[4], TEMP[6].xxxx, TEMP[9], TEMP[4] 91: MAD TEMP[4].yw, TEMP[7].xxxx, TEMP[8], TEMP[4] 92: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].wwww, IMM[3].xxxx 93: MOV TEMP[5].x, TEMP[4].xxxx 94: MOV TEMP[5].y, -TEMP[4].yyyy 95: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 96: MOV TEMP[6].x, TEMP[5].xxxx 97: MOV TEMP[6].y, TEMP[5].yyyy 98: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 99: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 100: MOV_SAT TEMP[4].x, TEMP[4].xxxx 101: SQRT TEMP[4].x, TEMP[4].xxxx 102: MOV TEMP[6].z, TEMP[4].xxxx 103: DP3 TEMP[4].x, TEMP[6].xyzz, TEMP[6].xyzz 104: RSQ TEMP[4].x, TEMP[4].xxxx 105: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 106: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz 107: RSQ TEMP[5].x, TEMP[5].xxxx 108: MUL TEMP[5].xyz, IN[5].xyzz, TEMP[5].xxxx 109: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz 110: RSQ TEMP[6].x, TEMP[6].xxxx 111: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx 112: MUL TEMP[3].xyz, IN[5].wwww, TEMP[3].xyzz 113: MUL TEMP[3].xyz, TEMP[4].yyyy, TEMP[3].xyzz 114: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz 115: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[3].xyzz 116: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 117: RSQ TEMP[3].x, TEMP[3].xxxx 118: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 119: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[3].yyyy, IMM[3].yyyy 120: MOV TEMP[3].w, IMM[0].xxxx 121: MOV TEMP[3].x, TEMP[0].xxxx 122: MOV TEMP[3].y, TEMP[0].yyyy 123: MOV TEMP[3].z, TEMP[0].zzzz 124: MOV TEMP[0].w, IMM[0].xxxx 125: MOV TEMP[0].x, TEMP[1].xxxx 126: MOV TEMP[0].y, TEMP[1].yyyy 127: MOV TEMP[0].z, TEMP[1].zzzz 128: MOV TEMP[1].w, IMM[0].xxxx 129: MUL TEMP[1].x, TEMP[2].xxxx, CONST[1][14].xxxx 130: MOV TEMP[1].y, TEMP[2].yyyy 131: MUL TEMP[2].x, TEMP[2].zzzz, CONST[1][14].yyyy 132: MOV TEMP[1].z, TEMP[2].xxxx 133: MOV OUT[2], IN[0].wwww 134: MOV OUT[0], TEMP[0] 135: MOV OUT[3], TEMP[3] 136: MOV OUT[1], TEMP[1] 137: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %39 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %40 = load <32 x i8>, <32 x i8> addrspace(2)* %39, align 32, !tbaa !0 %41 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %44 = bitcast <8 x i32> addrspace(2)* %43 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %47 = bitcast <4 x i32> addrspace(2)* %46 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %62 = bitcast <8 x i32> addrspace(2)* %61 to <32 x i8> addrspace(2)* %63 = load <32 x i8>, <32 x i8> addrspace(2)* %62, align 32, !tbaa !0 %64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %65 = bitcast <4 x i32> addrspace(2)* %64 to <16 x i8> addrspace(2)* %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %68 = bitcast <8 x i32> addrspace(2)* %67 to <32 x i8> addrspace(2)* %69 = load <32 x i8>, <32 x i8> addrspace(2)* %68, align 32, !tbaa !0 %70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %71 = bitcast <4 x i32> addrspace(2)* %70 to <16 x i8> addrspace(2)* %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %74 = bitcast <8 x i32> addrspace(2)* %73 to <32 x i8> addrspace(2)* %75 = load <32 x i8>, <32 x i8> addrspace(2)* %74, align 32, !tbaa !0 %76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %77 = bitcast <4 x i32> addrspace(2)* %76 to <16 x i8> addrspace(2)* %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)* %81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)* %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %86 = bitcast <8 x i32> addrspace(2)* %85 to <32 x i8> addrspace(2)* %87 = load <32 x i8>, <32 x i8> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %89 = bitcast <4 x i32> addrspace(2)* %88 to <16 x i8> addrspace(2)* %90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %92 = bitcast <8 x i32> addrspace(2)* %91 to <32 x i8> addrspace(2)* %93 = load <32 x i8>, <32 x i8> addrspace(2)* %92, align 32, !tbaa !0 %94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %95 = bitcast <4 x i32> addrspace(2)* %94 to <16 x i8> addrspace(2)* %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %98 = bitcast <8 x i32> addrspace(2)* %97 to <32 x i8> addrspace(2)* %99 = load <32 x i8>, <32 x i8> addrspace(2)* %98, align 32, !tbaa !0 %100 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %101 = bitcast <4 x i32> addrspace(2)* %100 to <16 x i8> addrspace(2)* %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 11 %104 = bitcast <8 x i32> addrspace(2)* %103 to <32 x i8> addrspace(2)* %105 = load <32 x i8>, <32 x i8> addrspace(2)* %104, align 32, !tbaa !0 %106 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 11 %107 = bitcast <4 x i32> addrspace(2)* %106 to <16 x i8> addrspace(2)* %108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0 %109 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %133 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %134 = fmul float %110, %110 %135 = fmul float %111, %111 %136 = fadd float %135, %134 %137 = fmul float %112, %112 %138 = fadd float %136, %137 %139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138) %140 = fmul float %110, %139 %141 = fmul float %111, %139 %142 = fmul float %112, %139 %143 = bitcast float %128 to i32 %144 = bitcast float %129 to i32 %145 = insertelement <2 x i32> undef, i32 %143, i32 0 %146 = insertelement <2 x i32> %145, i32 %144, i32 1 %147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %45, <16 x i8> %48, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 1 %150 = extractelement <4 x float> %147, i32 2 %151 = extractelement <4 x float> %147, i32 3 %152 = bitcast float %130 to i32 %153 = bitcast float %131 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %51, <16 x i8> %54, i32 2) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = bitcast float %132 to i32 %162 = bitcast float %133 to i32 %163 = insertelement <2 x i32> undef, i32 %161, i32 0 %164 = insertelement <2 x i32> %163, i32 %162, i32 1 %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %164, <32 x i8> %57, <16 x i8> %60, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = extractelement <4 x float> %165, i32 1 %168 = extractelement <4 x float> %165, i32 2 %169 = extractelement <4 x float> %165, i32 3 %170 = fsub float %151, %119 %171 = call float @llvm.maxnum.f32(float %170, float 0.000000e+00) %172 = fsub float %116, %171 %173 = fadd float %119, %151 %174 = call float @llvm.minnum.f32(float %173, float 1.000000e+00) %175 = fsub float %174, %171 %176 = fdiv float 1.000000e+00, %175 %177 = fmul float %172, %176 %178 = call float @llvm.AMDIL.clamp.(float %177, float 0.000000e+00, float 1.000000e+00) %179 = fmul float %178, 2.000000e+00 %180 = fsub float 3.000000e+00, %179 %181 = fmul float %178, %180 %182 = fmul float %178, %181 %183 = fsub float 1.000000e+00, %182 %184 = fsub float %160, %120 %185 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00) %186 = fsub float %117, %185 %187 = fadd float %120, %160 %188 = call float @llvm.minnum.f32(float %187, float 1.000000e+00) %189 = fsub float %188, %185 %190 = fdiv float 1.000000e+00, %189 %191 = fmul float %186, %190 %192 = call float @llvm.AMDIL.clamp.(float %191, float 0.000000e+00, float 1.000000e+00) %193 = fmul float %192, 2.000000e+00 %194 = fsub float 3.000000e+00, %193 %195 = fmul float %192, %194 %196 = fmul float %192, %195 %197 = call float @llvm.maxnum.f32(float %196, float 0.000000e+00) %198 = call float @llvm.minnum.f32(float %197, float %183) %199 = fsub float %183, %198 %200 = fsub float %169, %121 %201 = call float @llvm.maxnum.f32(float %200, float 0.000000e+00) %202 = fsub float %118, %201 %203 = fadd float %121, %169 %204 = call float @llvm.minnum.f32(float %203, float 1.000000e+00) %205 = fsub float %204, %201 %206 = fdiv float 1.000000e+00, %205 %207 = fmul float %202, %206 %208 = call float @llvm.AMDIL.clamp.(float %207, float 0.000000e+00, float 1.000000e+00) %209 = fmul float %208, 2.000000e+00 %210 = fsub float 3.000000e+00, %209 %211 = fmul float %208, %210 %212 = fmul float %208, %211 %213 = call float @llvm.maxnum.f32(float %212, float 0.000000e+00) %214 = call float @llvm.minnum.f32(float %213, float %199) %215 = fsub float %199, %214 %216 = fmul float %34, %166 %217 = fmul float %35, %167 %218 = fmul float %36, %168 %219 = fmul float %31, %157 %220 = fmul float %32, %158 %221 = fmul float %33, %159 %222 = fmul float %28, %148 %223 = fmul float %29, %149 %224 = fmul float %30, %150 %225 = bitcast float %126 to i32 %226 = bitcast float %127 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %40, <16 x i8> %42, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = fmul float %25, %230 %234 = fmul float %26, %231 %235 = fmul float %27, %232 %236 = fmul float %215, %233 %237 = fmul float %215, %234 %238 = fmul float %215, %235 %239 = fmul float %182, %222 %240 = fadd float %239, %236 %241 = fmul float %182, %223 %242 = fadd float %241, %237 %243 = fmul float %182, %224 %244 = fadd float %243, %238 %245 = fmul float %198, %219 %246 = fadd float %245, %240 %247 = fmul float %198, %220 %248 = fadd float %247, %242 %249 = fmul float %198, %221 %250 = fadd float %249, %244 %251 = fmul float %216, %214 %252 = fadd float %251, %246 %253 = fmul float %217, %214 %254 = fadd float %253, %248 %255 = fmul float %218, %214 %256 = fadd float %255, %250 %257 = fmul float %252, %113 %258 = fmul float %254, %114 %259 = fmul float %256, %115 %260 = bitcast float %132 to i32 %261 = bitcast float %133 to i32 %262 = insertelement <2 x i32> undef, i32 %260, i32 0 %263 = insertelement <2 x i32> %262, i32 %261, i32 1 %264 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %263, <32 x i8> %81, <16 x i8> %84, i32 2) %265 = extractelement <4 x float> %264, i32 0 %266 = extractelement <4 x float> %264, i32 1 %267 = extractelement <4 x float> %264, i32 2 %268 = bitcast float %130 to i32 %269 = bitcast float %131 to i32 %270 = insertelement <2 x i32> undef, i32 %268, i32 0 %271 = insertelement <2 x i32> %270, i32 %269, i32 1 %272 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %271, <32 x i8> %75, <16 x i8> %78, i32 2) %273 = extractelement <4 x float> %272, i32 0 %274 = extractelement <4 x float> %272, i32 1 %275 = extractelement <4 x float> %272, i32 2 %276 = bitcast float %128 to i32 %277 = bitcast float %129 to i32 %278 = insertelement <2 x i32> undef, i32 %276, i32 0 %279 = insertelement <2 x i32> %278, i32 %277, i32 1 %280 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %279, <32 x i8> %69, <16 x i8> %72, i32 2) %281 = extractelement <4 x float> %280, i32 0 %282 = extractelement <4 x float> %280, i32 1 %283 = extractelement <4 x float> %280, i32 2 %284 = bitcast float %126 to i32 %285 = bitcast float %127 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %63, <16 x i8> %66, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = extractelement <4 x float> %288, i32 2 %292 = fmul float %215, %289 %293 = fmul float %215, %290 %294 = fmul float %215, %291 %295 = fmul float %182, %281 %296 = fadd float %295, %292 %297 = fmul float %182, %282 %298 = fadd float %297, %293 %299 = fmul float %182, %283 %300 = fadd float %299, %294 %301 = fmul float %198, %273 %302 = fadd float %301, %296 %303 = fmul float %198, %274 %304 = fadd float %303, %298 %305 = fmul float %198, %275 %306 = fadd float %305, %300 %307 = fmul float %214, %265 %308 = fadd float %307, %302 %309 = fmul float %214, %266 %310 = fadd float %309, %304 %311 = fmul float %214, %267 %312 = fadd float %311, %306 %313 = fmul float %142, %123 %314 = fmul float %140, %124 %315 = fmul float %141, %122 %316 = fmul float %141, %124 %317 = fsub float %316, %313 %318 = fmul float %142, %122 %319 = fsub float %318, %314 %320 = fmul float %140, %123 %321 = fsub float %320, %315 %322 = bitcast float %132 to i32 %323 = bitcast float %133 to i32 %324 = insertelement <2 x i32> undef, i32 %322, i32 0 %325 = insertelement <2 x i32> %324, i32 %323, i32 1 %326 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %325, <32 x i8> %105, <16 x i8> %108, i32 2) %327 = extractelement <4 x float> %326, i32 1 %328 = extractelement <4 x float> %326, i32 3 %329 = bitcast float %130 to i32 %330 = bitcast float %131 to i32 %331 = insertelement <2 x i32> undef, i32 %329, i32 0 %332 = insertelement <2 x i32> %331, i32 %330, i32 1 %333 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %332, <32 x i8> %99, <16 x i8> %102, i32 2) %334 = extractelement <4 x float> %333, i32 1 %335 = extractelement <4 x float> %333, i32 3 %336 = bitcast float %126 to i32 %337 = bitcast float %127 to i32 %338 = insertelement <2 x i32> undef, i32 %336, i32 0 %339 = insertelement <2 x i32> %338, i32 %337, i32 1 %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %339, <32 x i8> %87, <16 x i8> %90, i32 2) %341 = extractelement <4 x float> %340, i32 1 %342 = extractelement <4 x float> %340, i32 3 %343 = bitcast float %128 to i32 %344 = bitcast float %129 to i32 %345 = insertelement <2 x i32> undef, i32 %343, i32 0 %346 = insertelement <2 x i32> %345, i32 %344, i32 1 %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %346, <32 x i8> %93, <16 x i8> %96, i32 2) %348 = extractelement <4 x float> %347, i32 1 %349 = extractelement <4 x float> %347, i32 3 %350 = fmul float %182, %348 %351 = fmul float %182, %349 %352 = fmul float %215, %341 %353 = fadd float %352, %350 %354 = fmul float %215, %342 %355 = fadd float %354, %351 %356 = fmul float %198, %334 %357 = fadd float %356, %353 %358 = fmul float %198, %335 %359 = fadd float %358, %355 %360 = fmul float %214, %327 %361 = fadd float %360, %357 %362 = fmul float %214, %328 %363 = fadd float %362, %359 %364 = fmul float %363, 2.000000e+00 %365 = fadd float %364, -1.000000e+00 %366 = fmul float %361, 2.000000e+00 %367 = fadd float %366, -1.000000e+00 %368 = fmul float %365, %24 %369 = fmul float %367, %24 %370 = fmul float %365, %365 %371 = fmul float %367, %367 %372 = fadd float %370, %371 %373 = fsub float 1.000000e+00, %372 %374 = call float @llvm.AMDIL.clamp.(float %373, float 0.000000e+00, float 1.000000e+00) %375 = call float @llvm.sqrt.f32(float %374) %376 = fmul float %368, %368 %377 = fmul float %369, %369 %378 = fadd float %377, %376 %379 = fmul float %375, %375 %380 = fadd float %378, %379 %381 = call float @llvm.AMDGPU.rsq.clamped.f32(float %380) %382 = fmul float %368, %381 %383 = fmul float %369, %381 %384 = fsub float -0.000000e+00, %383 %385 = fmul float %375, %381 %386 = fmul float %122, %122 %387 = fmul float %123, %123 %388 = fadd float %387, %386 %389 = fmul float %124, %124 %390 = fadd float %388, %389 %391 = call float @llvm.AMDGPU.rsq.clamped.f32(float %390) %392 = fmul float %122, %391 %393 = fmul float %123, %391 %394 = fmul float %124, %391 %395 = fmul float %317, %317 %396 = fmul float %319, %319 %397 = fadd float %396, %395 %398 = fmul float %321, %321 %399 = fadd float %397, %398 %400 = call float @llvm.AMDGPU.rsq.clamped.f32(float %399) %401 = fmul float %317, %400 %402 = fmul float %319, %400 %403 = fmul float %321, %400 %404 = fmul float %125, %401 %405 = fmul float %125, %402 %406 = fmul float %125, %403 %407 = fmul float %404, %384 %408 = fmul float %405, %384 %409 = fmul float %406, %384 %410 = fmul float %382, %392 %411 = fadd float %410, %407 %412 = fmul float %382, %393 %413 = fadd float %412, %408 %414 = fmul float %382, %394 %415 = fadd float %414, %409 %416 = fmul float %140, %385 %417 = fadd float %416, %411 %418 = fmul float %141, %385 %419 = fadd float %418, %413 %420 = fmul float %142, %385 %421 = fadd float %420, %415 %422 = fmul float %417, %417 %423 = fmul float %419, %419 %424 = fadd float %423, %422 %425 = fmul float %421, %421 %426 = fadd float %424, %425 %427 = call float @llvm.AMDGPU.rsq.clamped.f32(float %426) %428 = fmul float %417, %427 %429 = fmul float %419, %427 %430 = fmul float %421, %427 %431 = fmul float %428, 5.000000e-01 %432 = fadd float %431, 5.000000e-01 %433 = fmul float %429, 5.000000e-01 %434 = fadd float %433, 5.000000e-01 %435 = fmul float %430, 5.000000e-01 %436 = fadd float %435, 5.000000e-01 %437 = fmul float %308, %37 %438 = fmul float %312, %38 %439 = call i32 @llvm.SI.packf16(float %257, float %258) %440 = bitcast i32 %439 to float %441 = call i32 @llvm.SI.packf16(float %259, float 0.000000e+00) %442 = bitcast i32 %441 to float %443 = call i32 @llvm.SI.packf16(float %437, float %310) %444 = bitcast i32 %443 to float %445 = call i32 @llvm.SI.packf16(float %438, float 0.000000e+00) %446 = bitcast i32 %445 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %440, float %442, float %440, float %442) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %444, float %446, float %444, float %446) %447 = call i32 @llvm.SI.packf16(float %432, float %434) %448 = bitcast i32 %447 to float %449 = call i32 @llvm.SI.packf16(float %436, float 0.000000e+00) %450 = bitcast i32 %449 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %109, float %109, float %109, float %109) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %448, float %450, float %448, float %450) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900 v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901 v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00 v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01 v_interp_p1_f32 v9, v0, 0, 3, [m0] ; C8240C00 v_interp_p2_f32 v9, [v9], v1, 0, 3, [m0] ; C8250C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v11, v0, 2, 3, [m0] ; C82C0E00 v_interp_p2_f32 v11, [v11], v1, 2, 3, [m0] ; C82D0E01 v_interp_p1_f32 v12, v0, 0, 4, [m0] ; C8301000 v_interp_p2_f32 v12, [v12], v1, 0, 4, [m0] ; C8311001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 v_interp_p1_f32 v14, v0, 2, 4, [m0] ; C8381200 v_interp_p2_f32 v14, [v14], v1, 2, 4, [m0] ; C8391201 v_interp_p1_f32 v15, v0, 0, 5, [m0] ; C83C1400 v_interp_p2_f32 v15, [v15], v1, 0, 5, [m0] ; C83D1401 v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500 v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501 v_interp_p1_f32 v17, v0, 2, 5, [m0] ; C8441600 v_interp_p2_f32 v17, [v17], v1, 2, 5, [m0] ; C8451601 v_interp_p1_f32 v18, v0, 3, 5, [m0] ; C8481700 v_interp_p2_f32 v18, [v18], v1, 3, 5, [m0] ; C8491701 v_interp_p1_f32 v19, v0, 0, 6, [m0] ; C84C1800 v_interp_p2_f32 v19, [v19], v1, 0, 6, [m0] ; C84D1801 v_interp_p1_f32 v20, v0, 1, 6, [m0] ; C8501900 v_interp_p2_f32 v20, [v20], v1, 1, 6, [m0] ; C8511901 v_interp_p1_f32 v21, v0, 2, 6, [m0] ; C8541A00 v_interp_p2_f32 v21, [v21], v1, 2, 6, [m0] ; C8551A01 v_interp_p1_f32 v22, v0, 3, 6, [m0] ; C8581B00 v_interp_p2_f32 v22, [v22], v1, 3, 6, [m0] ; C8591B01 v_interp_p1_f32 v23, v0, 0, 7, [m0] ; C85C1C00 v_interp_p2_f32 v23, [v23], v1, 0, 7, [m0] ; C85D1C01 v_interp_p1_f32 v24, v0, 1, 7, [m0] ; C8601D00 v_interp_p2_f32 v24, [v24], v1, 1, 7, [m0] ; C8611D01 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710 v_interp_p1_f32 v25, v0, 2, 7, [m0] ; C8641E00 s_load_dwordx8 s[60:67], s[6:7], 0x18 ; C0DE0718 v_interp_p2_f32 v25, [v25], v1, 2, 7, [m0] ; C8651E01 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 v_interp_p1_f32 v26, v0, 3, 7, [m0] ; C8681F00 v_interp_p2_f32 v26, [v26], v1, 3, 7, [m0] ; C8691F01 s_load_dwordx4 s[8:11], s[4:5], 0x10 ; C0840510 s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[44:51], s[32:35] ; F0800F00 010B1B15 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[52:59], s[36:39] ; F0800F00 012D1F17 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[60:67], s[40:43] ; F0800F00 014F2319 s_load_dwordx4 s[44:47], s[4:5], 0x14 ; C0960514 s_load_dwordx4 s[48:51], s[4:5], 0x1c ; C098051C image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[24:31], s[20:23] ; F0800700 00A62713 s_load_dwordx8 s[64:71], s[6:7], 0x30 ; C0E00730 s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738 s_load_dwordx4 s[80:83], s[4:5], 0x18 ; C0A80518 s_load_dwordx8 s[84:91], s[6:7], 0x28 ; C0EA0728 s_load_dwordx4 s[52:55], s[4:5], 0x2c ; C09A052C s_load_dwordx8 s[56:63], s[6:7], 0x58 ; C0DC0758 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[32:35], s[4:5], 0x20 ; C0900520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[36:43], s[6:7], 0x40 ; C0D20740 s_load_dwordx8 s[24:31], s[6:7], 0x48 ; C0CC0748 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[72:79], s[48:51] ; F0800700 01922A19 image_sample v[45:47], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[64:71], s[80:83] ; F0800700 02902D17 image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[84:91], s[44:47] ; F0800700 01753015 image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800700 00433313 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[56:63], s[52:55] ; F0800A00 01AE0019 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 v_subrev_f32_e32 v25, v14, v38 ; 0A324D0E v_add_f32_e32 v14, v38, v14 ; 061C1D26 s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x38 ; C2070138 s_buffer_load_dword s15, s[0:3], 0x39 ; C2078139 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v26, s8, v35 ; 10344608 v_mul_f32_e32 v35, s9, v36 ; 10464809 v_mul_f32_e32 v36, s10, v37 ; 10484A0A s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A v_subrev_f32_e32 v37, v13, v34 ; 0A4A450D v_add_f32_e32 v13, v34, v13 ; 061A1B22 v_mul_f32_e32 v31, s11, v31 ; 103E3E0B v_mul_f32_e32 v32, s12, v32 ; 1040400C v_mul_f32_e32 v33, s13, v33 ; 1042420D v_subrev_f32_e32 v34, v12, v30 ; 0A443D0C v_add_f32_e32 v12, v30, v12 ; 0618191E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v27, s8, v27 ; 10363608 v_mul_f32_e32 v28, s9, v28 ; 10383809 v_mul_f32_e32 v29, s10, v29 ; 103A3A0A v_max_f32_e32 v30, 0, v34 ; 203C4480 v_min_f32_e32 v12, 1.0, v12 ; 1E1818F2 v_subrev_f32_e32 v12, v30, v12 ; 0A18191E v_rcp_f32_e32 v12, v12 ; 7E18550C v_max_f32_e32 v34, 0, v37 ; 20444A80 v_min_f32_e32 v13, 1.0, v13 ; 1E1A1AF2 v_subrev_f32_e32 v13, v34, v13 ; 0A1A1B22 v_rcp_f32_e32 v13, v13 ; 7E1A550D v_subrev_f32_e32 v9, v30, v9 ; 0A12131E v_mul_f32_e32 v9, v12, v9 ; 1012130C v_subrev_f32_e32 v10, v34, v10 ; 0A141522 v_mul_f32_e32 v10, v13, v10 ; 1014150D v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_max_f32_e32 v12, 0, v25 ; 20183280 v_min_f32_e32 v13, 1.0, v14 ; 1E1A1CF2 v_subrev_f32_e32 v13, v12, v13 ; 0A1A1B0C v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 v_mad_f32 v25, -2.0, v10, v14 ; D2820019 043A14F5 v_mul_f32_e32 v25, v25, v10 ; 10321519 v_mul_f32_e32 v10, v25, v10 ; 10141519 v_subrev_f32_e32 v11, v12, v11 ; 0A16170C v_mul_f32_e32 v11, v13, v11 ; 1016170D v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mad_f32 v12, -2.0, v9, v14 ; D282000C 043A12F5 v_mac_f32_e32 v14, -2.0, v11 ; 3E1C16F5 v_mul_f32_e32 v13, v14, v11 ; 101A170E v_mul_f32_e32 v11, v13, v11 ; 1016170D v_mul_f32_e32 v12, v12, v9 ; 1018130C v_max_f32_e32 v10, 0, v10 ; 20141480 v_mad_f32 v13, -v9, v12, 1.0 ; D282000D 23CA1909 v_min_f32_e32 v10, v13, v10 ; 1E14150D v_subrev_f32_e32 v13, v10, v13 ; 0A1A1B0A v_max_f32_e32 v11, 0, v11 ; 20161680 v_min_f32_e32 v11, v13, v11 ; 1E16170D v_subrev_f32_e32 v13, v11, v13 ; 0A1A1B0B s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 v_mul_f32_e32 v14, v51, v13 ; 101C1B33 v_mul_f32_e32 v25, v52, v13 ; 10321B34 v_mul_f32_e32 v30, v53, v13 ; 103C1B35 v_mul_f32_e32 v9, v12, v9 ; 1012130C v_mac_f32_e32 v14, v48, v9 ; 3E1C1330 v_mac_f32_e32 v25, v49, v9 ; 3E321331 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_load_dwordx4 s[16:19], s[4:5], 0x28 ; C0880528 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s8, v39 ; 10184E08 s_load_dwordx8 s[44:51], s[6:7], 0x50 ; C0D60750 v_mac_f32_e32 v30, v50, v9 ; 3E3C1332 v_mac_f32_e32 v14, v45, v10 ; 3E1C152D v_mac_f32_e32 v25, v46, v10 ; 3E32152E v_mac_f32_e32 v30, v47, v10 ; 3E3C152F v_mac_f32_e32 v14, v42, v11 ; 3E1C172A v_mac_f32_e32 v25, v43, v11 ; 3E32172B v_mac_f32_e32 v30, v44, v11 ; 3E3C172C v_mul_f32_e32 v34, s9, v40 ; 10445009 v_mul_f32_e32 v37, s10, v41 ; 104A520A s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[23:24], 10, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[44:51], s[16:19] ; F0800A00 008B1717 image_sample v[19:20], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[36:43], s[32:35] ; F0800A00 01091313 image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[24:31], s[20:23] ; F0800A00 00A61515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v9 ; 102A1315 v_mul_f32_e32 v22, v22, v9 ; 102C1316 v_mac_f32_e32 v21, v19, v13 ; 3E2A1B13 v_mac_f32_e32 v22, v20, v13 ; 3E2C1B14 v_mul_f32_e32 v12, v12, v13 ; 10181B0C v_mul_f32_e32 v19, v34, v13 ; 10261B22 v_mul_f32_e32 v13, v37, v13 ; 101A1B25 v_mac_f32_e32 v12, v27, v9 ; 3E18131B v_mac_f32_e32 v19, v28, v9 ; 3E26131C v_mac_f32_e32 v13, v29, v9 ; 3E1A131D v_mac_f32_e32 v21, v23, v10 ; 3E2A1517 v_mac_f32_e32 v22, v24, v10 ; 3E2C1518 v_mac_f32_e32 v12, v31, v10 ; 3E18151F v_mac_f32_e32 v19, v32, v10 ; 3E261520 v_mac_f32_e32 v13, v33, v10 ; 3E1A1521 v_mac_f32_e32 v21, v0, v11 ; 3E2A1700 v_mac_f32_e32 v22, v1, v11 ; 3E2C1701 v_mac_f32_e32 v12, v11, v26 ; 3E18350B v_mul_f32_e32 v0, v3, v3 ; 10000703 v_mac_f32_e32 v0, v4, v4 ; 3E000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mac_f32_e32 v19, v11, v35 ; 3E26470B s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 v_mac_f32_e32 v13, v11, v36 ; 3E1A490B v_mul_f32_e32 v1, v0, v3 ; 10020700 v_mul_f32_e32 v3, v0, v4 ; 10060900 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mul_f32_e32 v4, v16, v0 ; 10080110 v_mad_f32 v4, v3, v17, -v4 ; D2820004 84122303 v_mul_f32_e32 v5, v17, v1 ; 100A0311 v_mad_f32 v5, v0, v15, -v5 ; D2820005 84161F00 v_mul_f32_e32 v9, v15, v3 ; 1012070F v_mad_f32 v9, v1, v16, -v9 ; D2820009 84262101 v_mul_f32_e32 v10, v15, v15 ; 10141F0F v_mac_f32_e32 v10, v16, v16 ; 3E142110 v_mac_f32_e32 v10, v17, v17 ; 3E142311 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mac_f32_e32 v11, v5, v5 ; 3E160B05 v_mac_f32_e32 v11, v9, v9 ; 3E161309 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v15, v10, v15 ; 101E1F0A v_mul_f32_e32 v16, v10, v16 ; 1020210A v_mul_f32_e32 v10, v10, v17 ; 1014230A v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v9, v11, v9 ; 1012130B v_mad_f32 v11, 2.0, v22, -1.0 ; D282000B 03CE2CF4 v_mad_f32 v17, 2.0, v21, -1.0 ; D2820011 03CE2AF4 v_mad_f32 v20, -v17, v17, 1.0 ; D2820014 23CA2311 v_mad_f32 v20, -v11, v11, v20 ; D2820014 2452170B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s0, v11 ; 10161600 v_mul_f32_e32 v17, s0, v17 ; 10222200 v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 v_sqrt_f32_e32 v20, v20 ; 7E286714 v_mul_f32_e32 v21, v11, v11 ; 102A170B v_mac_f32_e32 v21, v17, v17 ; 3E2A2311 v_mac_f32_e32 v21, v20, v20 ; 3E2A2914 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v4, v4, v18 ; 10082504 v_mul_f32_e32 v5, v5, v18 ; 100A2505 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mul_f32_e32 v17, v21, v17 ; 10222315 v_mul_f32_e32 v4, v17, v4 ; 10080911 v_mul_f32_e32 v5, v17, v5 ; 100A0B11 v_mul_f32_e32 v9, v17, v9 ; 10121311 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mad_f32 v4, v11, v15, -v4 ; D2820004 84121F0B v_mad_f32 v5, v11, v16, -v5 ; D2820005 8416210B v_mad_f32 v9, v11, v10, -v9 ; D2820009 8426150B v_mul_f32_e32 v10, s14, v14 ; 10141C0E v_mul_f32_e32 v11, s15, v30 ; 10163C0F v_mul_f32_e32 v14, v21, v20 ; 101C2915 v_mac_f32_e32 v4, v14, v1 ; 3E08030E v_mac_f32_e32 v5, v14, v3 ; 3E0A070E v_mac_f32_e32 v9, v14, v0 ; 3E12010E v_mul_f32_e32 v0, v6, v12 ; 10001906 v_mul_f32_e32 v1, v7, v19 ; 10022707 v_mul_f32_e32 v3, v8, v13 ; 10061B08 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v3, 0 ; D25E0001 00010103 exp 15, 0, 1, 0, 0, v0, v1, v0, v1 ; F800040F 01000100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, v4, v4 ; 10000904 v_mac_f32_e32 v0, v5, v5 ; 3E000B05 v_mac_f32_e32 v0, v9, v9 ; 3E001309 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_cvt_pkrtz_f16_f32_e32 v1, v10, v25 ; 5E02330A v_cvt_pkrtz_f16_f32_e64 v3, v11, 0 ; D25E0003 0001010B exp 15, 1, 1, 0, 0, v1, v3, v1, v3 ; F800041F 03010301 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v0, v4 ; 10020900 v_mul_f32_e32 v3, v0, v5 ; 10060B00 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 exp 15, 2, 0, 0, 0, v2, v2, v2, v2 ; F800002F 02020202 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 56 Code Size: 1288 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..8], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 12, 28, 44} IMM[5] UINT32 {60, 24, 32, 16} IMM[6] UINT32 {48, 20, 36, 52} IMM[7] UINT32 {8, 40, 56, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[3].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[3].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[3].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4].xy, TEMP[4], IMM[2].yyyy 23: MOV TEMP[7].x, TEMP[4].xxxx 24: MOV TEMP[7].y, TEMP[4].yyyy 25: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 26: ADD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].yyyy 27: MOV TEMP[7].z, TEMP[4].xxxx 28: DP3 TEMP[4].x, TEMP[7].xyzz, TEMP[7].xyzz 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[4].xxxx 31: MUL TEMP[6], TEMP[6], IMM[2].zzzz 32: ADD TEMP[6].xy, IMM[0].wwww, -TEMP[6] 33: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].xyyy 34: MOV TEMP[7].w, IMM[0].xxxx 35: MOV TEMP[7].x, TEMP[6].xxxx 36: MOV TEMP[7].y, TEMP[6].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 38: ADD TEMP[5].x, IMM[0].wwww, -TEMP[5].xxxx 39: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].zzzz 40: MOV TEMP[7].z, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[7], TEMP[0] 42: DP4 TEMP[5].x, TEMP[7], TEMP[2] 43: MOV TEMP[4].y, TEMP[5].xxxx 44: DP4 TEMP[5].x, TEMP[7], TEMP[3] 45: MOV TEMP[4].z, TEMP[5].xxxx 46: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 47: RSQ TEMP[5].x, TEMP[5].xxxx 48: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 49: MOV TEMP[5].w, IMM[0].wwww 50: MOV TEMP[5].x, IN[0].xxxx 51: MOV TEMP[5].y, IN[0].yyyy 52: MOV TEMP[5].z, IN[0].zzzz 53: DP4 TEMP[0].x, TEMP[5], TEMP[0] 54: DP4 TEMP[2].x, TEMP[5], TEMP[2] 55: DP4 TEMP[3].x, TEMP[5], TEMP[3] 56: MOV TEMP[5].x, TEMP[0].xxxx 57: MOV TEMP[5].y, TEMP[2].xxxx 58: MOV TEMP[5].z, TEMP[3].xxxx 59: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[4][19].xyzz 60: MOV TEMP[6].x, TEMP[0].xxxx 61: MOV TEMP[6].y, TEMP[2].xxxx 62: MOV TEMP[6].z, TEMP[3].xxxx 63: DP3 TEMP[7].x, CONST[4][20].xyzz, TEMP[5].xyzz 64: MOV TEMP[6].w, TEMP[7].xxxx 65: MOV TEMP[7].x, TEMP[4].xxxx 66: MOV TEMP[7].y, TEMP[4].yyyy 67: MOV TEMP[7].z, TEMP[4].zzzz 68: DP3 TEMP[8].x, TEMP[5].xyzz, TEMP[5].xyzz 69: RSQ TEMP[8].x, TEMP[8].xxxx 70: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[8].xxxx 71: DP3 TEMP[8].x, TEMP[4].xyzz, CONST[5][0].xyzz 72: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 73: MUL TEMP[4].xyz, IMM[2].zzzz, TEMP[4].xyzz 74: ADD TEMP[4].xyz, CONST[5][0].xyzz, -TEMP[4].xyzz 75: DP3 TEMP[4].x, -TEMP[5].xyzz, TEMP[4].xyzz 76: MOV_SAT TEMP[4].x, TEMP[4].xxxx 77: POW TEMP[4].x, TEMP[4].xxxx, IMM[2].wwww 78: MOV_SAT TEMP[4].x, TEMP[4].xxxx 79: MOV TEMP[7].w, TEMP[4].xxxx 80: MOV TEMP[4].w, IMM[0].wwww 81: MOV TEMP[4].x, TEMP[0].xxxx 82: MOV TEMP[4].y, TEMP[2].xxxx 83: MOV TEMP[4].z, TEMP[3].xxxx 84: MOV TEMP[0].x, CONST[4][0].wwww 85: MOV TEMP[0].y, CONST[4][1].wwww 86: MOV TEMP[0].z, CONST[4][2].wwww 87: MOV TEMP[0].w, CONST[4][3].wwww 88: DP4 TEMP[0].x, TEMP[4], TEMP[0] 89: MAD TEMP[2].xy, IN[2].xyyy, CONST[1][1].zwww, CONST[1][2].xyyy 90: MOV TEMP[3].xy, IN[3].xyyy 91: MOV TEMP[3].w, IMM[0].xxxx 92: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].wyw 93: MOV TEMP[5].x, CONST[4][0].xxxx 94: MOV TEMP[5].y, CONST[4][1].xxxx 95: MOV TEMP[5].z, CONST[4][2].xxxx 96: MOV TEMP[5].w, CONST[4][3].xxxx 97: DP4 TEMP[5].x, TEMP[4], TEMP[5] 98: MOV TEMP[8].x, CONST[4][0].yyyy 99: MOV TEMP[8].y, CONST[4][1].yyyy 100: MOV TEMP[8].z, CONST[4][2].yyyy 101: MOV TEMP[8].w, CONST[4][3].yyyy 102: DP4 TEMP[8].x, TEMP[4], TEMP[8] 103: MOV TEMP[5].y, -TEMP[8].xxxx 104: MOV TEMP[8].x, CONST[4][0].zzzz 105: MOV TEMP[8].y, CONST[4][1].zzzz 106: MOV TEMP[8].z, CONST[4][2].zzzz 107: MOV TEMP[8].w, CONST[4][3].zzzz 108: DP4 TEMP[4].x, TEMP[4], TEMP[8] 109: MAD TEMP[4].x, IMM[2].zzzz, TEMP[4].xxxx, -TEMP[0].xxxx 110: MOV TEMP[5].z, TEMP[4].xxxx 111: MOV TEMP[5].w, TEMP[0].xxxx 112: MOV OUT[1], TEMP[1] 113: MOV OUT[2].xy, TEMP[2].xyxx 114: MOV OUT[4], TEMP[7] 115: MOV OUT[6], IMM[0].xxxx 116: MOV OUT[5], TEMP[3] 117: MOV OUT[0], TEMP[5] 118: MOV OUT[3], TEMP[6] 119: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 0) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 4) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 8) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 12) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 16) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 20) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 24) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 28) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 44) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %41 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = call float @llvm.SI.load.const(<16 x i8> %42, i32 0) %44 = call float @llvm.SI.load.const(<16 x i8> %42, i32 4) %45 = call float @llvm.SI.load.const(<16 x i8> %42, i32 8) %46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0 %48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0 %50 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %10, %6 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = bitcast float %75 to i32 %78 = bitcast float %76 to i32 %79 = insertelement <4 x i32> undef, i32 %77, i32 0 %80 = insertelement <4 x i32> %79, i32 %78, i32 1 %81 = insertelement <4 x i32> %80, i32 0, i32 2 %82 = bitcast <8 x i32> %47 to <32 x i8> %83 = bitcast <4 x i32> %49 to <16 x i8> %84 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %81, <32 x i8> %82, <16 x i8> %83, i32 2) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = extractelement <4 x float> %84, i32 3 %89 = bitcast float %75 to i32 %90 = bitcast float %76 to i32 %91 = insertelement <4 x i32> , i32 %89, i32 1 %92 = insertelement <4 x i32> %91, i32 %90, i32 2 %93 = insertelement <4 x i32> %92, i32 0, i32 3 %94 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %93, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = extractelement <4 x float> %94, i32 3 %99 = bitcast float %75 to i32 %100 = bitcast float %76 to i32 %101 = insertelement <4 x i32> , i32 %99, i32 1 %102 = insertelement <4 x i32> %101, i32 %100, i32 2 %103 = insertelement <4 x i32> %102, i32 0, i32 3 %104 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %103, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = extractelement <4 x float> %104, i32 2 %108 = extractelement <4 x float> %104, i32 3 %109 = fmul float %61, 2.550000e+02 %110 = fadd float %109, -1.280000e+02 %111 = fmul float %62, 2.550000e+02 %112 = fadd float %111, -1.280000e+02 %113 = fmul float %63, 2.550000e+02 %114 = fadd float %113, -1.280000e+02 %115 = fmul float %64, 2.550000e+02 %116 = fadd float %115, -1.280000e+02 %117 = fcmp olt float %110, 0.000000e+00 %118 = fcmp olt float %112, 0.000000e+00 %119 = fcmp olt float %114, 0.000000e+00 %120 = fcmp olt float %116, 0.000000e+00 %121 = select i1 %117, float 1.000000e+00, float 0.000000e+00 %122 = call float @fabs(float %110) %123 = call float @fabs(float %112) %124 = call float @fabs(float %114) %125 = call float @fabs(float %116) %126 = fsub float %122, %121 %127 = select i1 %118, float -1.000000e+00, float -0.000000e+00 %128 = fadd float %123, %127 %129 = select i1 %119, float -1.000000e+00, float -0.000000e+00 %130 = fadd float %124, %129 %131 = select i1 %120, float -1.000000e+00, float -0.000000e+00 %132 = fadd float %125, %131 %133 = fadd float %126, -6.400000e+01 %134 = fadd float %128, -6.400000e+01 %135 = fadd float %130, -6.400000e+01 %136 = fadd float %132, -6.400000e+01 %137 = fcmp olt float %133, 0.000000e+00 %138 = fcmp olt float %134, 0.000000e+00 %139 = select i1 %137, float 1.000000e+00, float 0.000000e+00 %140 = select i1 %138, float 1.000000e+00, float 0.000000e+00 %141 = call float @fabs(float %133) %142 = call float @fabs(float %134) %143 = call float @fabs(float %135) %144 = call float @fabs(float %136) %145 = fsub float %141, %139 %146 = fsub float %142, %140 %147 = fmul float %145, 0x3F90410420000000 %148 = fmul float %146, 0x3F90410420000000 %149 = fsub float 1.000000e+00, %147 %150 = fsub float %149, %148 %151 = fmul float %147, %147 %152 = fmul float %148, %148 %153 = fadd float %152, %151 %154 = fmul float %150, %150 %155 = fadd float %153, %154 %156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155) %157 = fmul float %147, %156 %158 = fmul float %148, %156 %159 = fmul float %150, %156 %160 = fmul float %139, 2.000000e+00 %161 = fmul float %140, 2.000000e+00 %162 = fsub float 1.000000e+00, %160 %163 = fsub float 1.000000e+00, %161 %164 = fmul float %157, %162 %165 = fmul float %158, %163 %166 = fmul float %121, 2.000000e+00 %167 = fsub float 1.000000e+00, %166 %168 = fmul float %167, %159 %169 = fmul float %164, %85 %170 = fmul float %165, %86 %171 = fadd float %169, %170 %172 = fmul float %168, %87 %173 = fadd float %171, %172 %174 = fmul float %88, 0.000000e+00 %175 = fadd float %173, %174 %176 = fmul float %164, %95 %177 = fmul float %165, %96 %178 = fadd float %176, %177 %179 = fmul float %168, %97 %180 = fadd float %178, %179 %181 = fmul float %98, 0.000000e+00 %182 = fadd float %180, %181 %183 = fmul float %164, %105 %184 = fmul float %165, %106 %185 = fadd float %183, %184 %186 = fmul float %168, %107 %187 = fadd float %185, %186 %188 = fmul float %108, 0.000000e+00 %189 = fadd float %187, %188 %190 = fmul float %175, %175 %191 = fmul float %182, %182 %192 = fadd float %191, %190 %193 = fmul float %189, %189 %194 = fadd float %192, %193 %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %175, %195 %197 = fmul float %182, %195 %198 = fmul float %189, %195 %199 = fmul float %54, %85 %200 = fmul float %55, %86 %201 = fadd float %199, %200 %202 = fmul float %56, %87 %203 = fadd float %201, %202 %204 = fadd float %203, %88 %205 = fmul float %54, %95 %206 = fmul float %55, %96 %207 = fadd float %205, %206 %208 = fmul float %56, %97 %209 = fadd float %207, %208 %210 = fadd float %209, %98 %211 = fmul float %54, %105 %212 = fmul float %55, %106 %213 = fadd float %211, %212 %214 = fmul float %56, %107 %215 = fadd float %213, %214 %216 = fadd float %215, %108 %217 = fsub float %204, %35 %218 = fsub float %210, %36 %219 = fsub float %216, %37 %220 = fmul float %38, %217 %221 = fmul float %39, %218 %222 = fadd float %221, %220 %223 = fmul float %40, %219 %224 = fadd float %222, %223 %225 = fmul float %217, %217 %226 = fmul float %218, %218 %227 = fadd float %226, %225 %228 = fmul float %219, %219 %229 = fadd float %227, %228 %230 = call float @llvm.AMDGPU.rsq.clamped.f32(float %229) %231 = fmul float %217, %230 %232 = fmul float %218, %230 %233 = fmul float %219, %230 %234 = fmul float %196, %43 %235 = fmul float %197, %44 %236 = fadd float %235, %234 %237 = fmul float %198, %45 %238 = fadd float %236, %237 %239 = fmul float %238, %196 %240 = fmul float %238, %197 %241 = fmul float %238, %198 %242 = fmul float %239, 2.000000e+00 %243 = fmul float %240, 2.000000e+00 %244 = fmul float %241, 2.000000e+00 %245 = fsub float %43, %242 %246 = fsub float %44, %243 %247 = fsub float %45, %244 %248 = fmul float %231, %245 %249 = fsub float -0.000000e+00, %248 %250 = fmul float %232, %246 %251 = fsub float %249, %250 %252 = fmul float %233, %247 %253 = fsub float %251, %252 %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00) %255 = call float @llvm.pow.f32(float %254, float 1.600000e+01) %256 = call float @llvm.AMDIL.clamp.(float %255, float 0.000000e+00, float 1.000000e+00) %257 = fmul float %204, %22 %258 = fmul float %210, %26 %259 = fadd float %257, %258 %260 = fmul float %216, %30 %261 = fadd float %259, %260 %262 = fadd float %261, %34 %263 = fmul float %69, %13 %264 = fadd float %263, %15 %265 = fmul float %70, %14 %266 = fadd float %265, %16 %267 = bitcast float %75 to i32 %268 = bitcast float %76 to i32 %269 = insertelement <4 x i32> , i32 %267, i32 1 %270 = insertelement <4 x i32> %269, i32 %268, i32 2 %271 = insertelement <4 x i32> %270, i32 0, i32 3 %272 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %271, <8 x i32> %47, <4 x i32> %49, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %273 = extractelement <4 x float> %272, i32 0 %274 = extractelement <4 x float> %272, i32 1 %275 = extractelement <4 x float> %272, i32 2 %276 = extractelement <4 x float> %272, i32 3 %277 = fmul float %204, %19 %278 = fmul float %210, %23 %279 = fadd float %277, %278 %280 = fmul float %216, %27 %281 = fadd float %279, %280 %282 = fadd float %281, %31 %283 = fmul float %204, %20 %284 = fmul float %210, %24 %285 = fadd float %283, %284 %286 = fmul float %216, %28 %287 = fadd float %285, %286 %288 = fadd float %287, %32 %289 = fsub float -0.000000e+00, %288 %290 = fmul float %204, %21 %291 = fmul float %210, %25 %292 = fadd float %290, %291 %293 = fmul float %216, %29 %294 = fadd float %292, %293 %295 = fadd float %294, %33 %296 = fmul float %295, 2.000000e+00 %297 = fsub float %296, %262 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %264, float %266, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %210, float %216, float %224) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %256) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %273, float %274, float %275, float %276) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %282, float %289, float %297, float %262) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v1, 0xc3000000 ; 7E0202FF C3000000 v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mov_b32_e32 v5, 0xc2800000 ; 7E0A02FF C2800000 v_mov_b32_e32 v6, 0x3c820821 ; 7E0C02FF 3C820821 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v3, s11, v3 ; 4A06060B s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_load_dwordx4 s[40:43], s[2:3], 0x4 ; C0940304 s_load_dwordx4 s[44:47], s[2:3], 0x10 ; C0960310 s_load_dwordx4 s[48:51], s[2:3], 0x14 ; C0980314 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[12:15], v0, s[20:23], 0 idxen ; E00C2000 80050C00 buffer_load_format_xyzw v[18:21], v3, s[8:11], 0 idxen ; E00C2000 80021203 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v20, 0 ; 7E280280 s_buffer_load_dword s0, s[44:47], 0xf ; C2002D0F s_buffer_load_dword s24, s[44:47], 0x4c ; C20C2D4C s_buffer_load_dword s22, s[44:47], 0x4d ; C20B2D4D s_buffer_load_dword s23, s[44:47], 0x4e ; C20BAD4E s_buffer_load_dword s9, s[44:47], 0x50 ; C204AD50 s_buffer_load_dword s21, s[48:51], 0x0 ; C20AB100 s_buffer_load_dword s20, s[48:51], 0x1 ; C20A3101 s_buffer_load_dword s19, s[48:51], 0x2 ; C209B102 s_buffer_load_dword s26, s[40:43], 0x6 ; C20D2906 s_buffer_load_dword s25, s[40:43], 0x7 ; C20CA907 s_buffer_load_dword s27, s[40:43], 0x8 ; C20DA908 s_buffer_load_dword s40, s[40:43], 0x9 ; C2142909 s_buffer_load_dword s15, s[44:47], 0x51 ; C207AD51 s_buffer_load_dword s16, s[44:47], 0x52 ; C2082D52 s_buffer_load_dword s3, s[44:47], 0x5 ; C201AD05 s_buffer_load_dword s4, s[44:47], 0x6 ; C2022D06 s_buffer_load_dword s8, s[44:47], 0x7 ; C2042D07 s_buffer_load_dword s2, s[44:47], 0x8 ; C2012D08 s_buffer_load_dword s1, s[44:47], 0x9 ; C200AD09 s_buffer_load_dword s5, s[44:47], 0x0 ; C202AD00 s_buffer_load_dword s6, s[44:47], 0x1 ; C2032D01 s_buffer_load_dword s7, s[44:47], 0x2 ; C203AD02 s_buffer_load_dword s10, s[44:47], 0x3 ; C2052D03 s_buffer_load_dword s17, s[44:47], 0x4 ; C208AD04 s_buffer_load_dword s11, s[44:47], 0xa ; C205AD0A s_buffer_load_dword s18, s[44:47], 0xb ; C2092D0B s_buffer_load_dword s12, s[44:47], 0xc ; C2062D0C s_buffer_load_dword s13, s[44:47], 0xd ; C206AD0D s_buffer_load_dword s14, s[44:47], 0xe ; C2072D0E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s27 ; 7E00021B v_mov_b32_e32 v3, s40 ; 7E060228 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[32:39], s[28:31] ; F0900F00 00E81512 v_mov_b32_e32 v17, 0x10001 ; 7E2202FF 00010001 image_sample_l_o v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E81911 v_mov_b32_e32 v17, 0x20002 ; 7E2202FF 00020002 image_sample_l_o v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E81D11 v_mov_b32_e32 v17, 0x30003 ; 7E2202FF 00030003 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[32:39], s[28:31] ; F0D00F00 00E80E11 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v18, v22, v8 ; 10241116 v_mad_f32 v10, v2, v10, v1 ; D282000A 04061502 v_mac_f32_e32 v1, v2, v11 ; 3E021702 v_mac_f32_e32 v0, s26, v12 ; 3E00181A v_mac_f32_e32 v3, s25, v13 ; 3E061A19 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v2, v4, -1.0, vcc ; D2000002 01A9E704 v_add_f32_e64 v1, |v1|, v2 ; D2060101 00020501 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_cndmask_b32_e64 v2, 0, 1.0, vcc ; D2000002 01A9E480 v_sub_f32_e64 v4, |v10|, v2 ; D2080104 0002050A v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v1, v5, v1 ; 06020305 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v5, 0, 1.0, vcc ; D2000005 01A9E480 v_sub_f32_e64 v10, v5, |v4| ; D208020A 00020905 v_mad_f32 v10, v10, v6, 1.0 ; D282000A 03CA0D0A v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480 v_sub_f32_e64 v12, v11, |v1| ; D208020C 0002030B v_mac_f32_e32 v10, v6, v12 ; 3E141906 v_sub_f32_e64 v4, |v4|, v5 ; D2080104 00020B04 v_sub_f32_e64 v1, |v1|, v11 ; D2080101 00021701 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v1, v6, v1 ; 10020306 v_mac_f32_e32 v18, v21, v7 ; 3E240F15 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v6, v26, v8 ; 100C111A v_mac_f32_e32 v6, v25, v7 ; 3E0C0F19 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v8, v30, v8 ; 1010111E v_mac_f32_e32 v8, v29, v7 ; 3E100F1D v_mul_f32_e32 v7, v4, v4 ; 100E0904 v_mac_f32_e32 v7, v1, v1 ; 3E0E0301 v_mac_f32_e32 v7, v10, v10 ; 3E0E150A v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mac_f32_e32 v18, v23, v9 ; 3E241317 v_mac_f32_e32 v6, v27, v9 ; 3E0C131B v_mac_f32_e32 v8, v31, v9 ; 3E10131F v_mul_f32_e32 v4, v7, v4 ; 10080907 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v7, v7, v10 ; 100E1507 v_mad_f32 v5, -2.0, v5, 1.0 ; D2820005 03CA0AF5 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mad_f32 v5, -2.0, v11, 1.0 ; D2820005 03CA16F5 v_mul_f32_e32 v1, v5, v1 ; 10020305 v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_add_f32_e32 v5, v24, v18 ; 060A2518 v_mul_f32_e32 v7, v22, v1 ; 100E0316 v_mac_f32_e32 v7, v21, v4 ; 3E0E0915 v_mac_f32_e32 v7, v23, v2 ; 3E0E0517 v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mul_f32_e32 v1, v30, v1 ; 1002031E v_mac_f32_e32 v9, v25, v4 ; 3E120919 v_mac_f32_e32 v1, v29, v4 ; 3E02091D v_mac_f32_e32 v9, v27, v2 ; 3E12051B v_mac_f32_e32 v1, v31, v2 ; 3E02051F v_mul_f32_e32 v2, v7, v7 ; 10040F07 v_mac_f32_e32 v2, v9, v9 ; 3E041309 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_add_f32_e32 v4, v28, v6 ; 06080D1C v_add_f32_e32 v6, v32, v8 ; 060C1120 exp 15, 33, 0, 0, 0, v0, v3, v20, v20 ; F800021F 14140300 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v0, v2, v7 ; 10000F02 v_mul_f32_e32 v3, v2, v9 ; 10061302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_subrev_f32_e32 v2, s24, v5 ; 0A040A18 v_subrev_f32_e32 v7, s22, v4 ; 0A0E0816 v_subrev_f32_e32 v8, s23, v6 ; 0A100C17 v_mul_f32_e32 v9, v2, v2 ; 10120502 v_mac_f32_e32 v9, v7, v7 ; 3E120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_mul_f32_e32 v10, s21, v0 ; 10140015 v_mac_f32_e32 v10, s20, v3 ; 3E140614 v_mac_f32_e32 v10, s19, v1 ; 3E140213 v_mul_f32_e32 v11, v0, v10 ; 10161500 v_mad_f32 v11, 2.0, v11, -s21 ; D282000B 805616F4 v_mul_f32_e32 v12, v9, v2 ; 10180509 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_mul_f32_e32 v12, v3, v10 ; 10181503 v_mad_f32 v12, 2.0, v12, -s20 ; D282000C 805218F4 v_mul_f32_e32 v13, v9, v7 ; 101A0F09 v_mac_f32_e32 v11, v12, v13 ; 3E161B0C v_mul_f32_e32 v10, v1, v10 ; 10141501 v_mad_f32 v10, 2.0, v10, -s19 ; D282000A 804E14F4 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mac_f32_e32 v11, v10, v9 ; 3E16130A v_mul_f32_e32 v2, s9, v2 ; 10040409 v_add_f32_e64 v9, 0, v11 clamp ; D2060809 00021680 v_log_f32_e32 v9, v9 ; 7E124F09 v_mac_f32_e32 v2, s15, v7 ; 3E040E0F v_mac_f32_e32 v2, s16, v8 ; 3E041010 exp 15, 34, 0, 0, 0, v5, v4, v6, v2 ; F800022F 02060405 s_waitcnt expcnt(0) ; BF8C070F v_mul_legacy_f32_e32 v2, 0x41800000, v9 ; 0E0412FF 41800000 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 exp 15, 35, 0, 0, 0, v0, v3, v1, v2 ; F800023F 02010300 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v4 ; 10000808 v_mul_f32_e32 v1, s17, v4 ; 10020811 v_mul_f32_e32 v2, s3, v4 ; 10040803 v_mul_f32_e32 v3, s4, v4 ; 10060804 v_mac_f32_e32 v0, s10, v5 ; 3E000A0A v_mac_f32_e32 v1, s5, v5 ; 3E020A05 v_mac_f32_e32 v2, s6, v5 ; 3E040A06 v_mac_f32_e32 v3, s7, v5 ; 3E060A07 v_mac_f32_e32 v0, s18, v6 ; 3E000C12 v_mac_f32_e32 v1, s2, v6 ; 3E020C02 v_mac_f32_e32 v2, s1, v6 ; 3E040C01 v_mac_f32_e32 v3, s11, v6 ; 3E060C0B v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v2, s13, v2 ; 0604040D v_add_f32_e32 v3, s14, v3 ; 0606060E v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 36, 0, 0, 0, v14, v15, v16, v17 ; F800024F 11100F0E exp 15, 37, 0, 0, 0, v20, v20, v20, v20 ; F800025F 14141414 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 36 Code Size: 956 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL OUT[8], GENERIC[6] DCL OUT[9], GENERIC[7] DCL OUT[10], GENERIC[8] DCL OUT[11], GENERIC[9] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..7] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..24], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, -128.0000, 1.0000} IMM[1] INT32 {1, 0, 2, 3} IMM[2] FLT32 { -64.0000, 0.0159, 2.0000, 16.0000} IMM[3] UINT32 {3, 304, 320, 4} IMM[4] UINT32 {0, 20, 36, 52} IMM[5] UINT32 {8, 24, 40, 56} IMM[6] UINT32 {12, 28, 44, 60} IMM[7] UINT32 {16, 32, 48, 348} IMM[8] FLT32 { 0.0175, 0.0078, -0.5000, 0.5000} IMM[9] UINT32 {72, 80, 64, 88} IMM[10] FLT32 { 0.0001, 0.0774, 0.9479, 0.0521} IMM[11] FLT32 { 2.4000, 0.0404, 0.0100, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].xy, IN[5].xyyy 4: MOV TEMP[0].w, IMM[0].xxxx 5: TXL TEMP[0], TEMP[0], SAMP[0], 2D 6: MOV TEMP[2].xy, IN[5].xyyy 7: MOV TEMP[2].w, IMM[0].xxxx 8: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].xyx 9: MOV TEMP[3].xy, IN[5].xyyy 10: MOV TEMP[3].w, IMM[0].xxxx 11: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].zyz 12: MAD TEMP[4], IN[1], IMM[0].yyyy, IMM[0].zzzz 13: FSLT TEMP[5], TEMP[4], IMM[0].xxxx 14: AND TEMP[5], TEMP[5], IMM[0].wwww 15: ABS TEMP[4], TEMP[4] 16: ADD TEMP[4], TEMP[4], -TEMP[5] 17: ADD TEMP[4], TEMP[4], IMM[2].xxxx 18: FSLT TEMP[6], TEMP[4], IMM[0].xxxx 19: AND TEMP[6], TEMP[6], IMM[0].wwww 20: ABS TEMP[4], TEMP[4] 21: ADD TEMP[4], TEMP[4], -TEMP[6] 22: MUL TEMP[4], TEMP[4], IMM[2].yyyy 23: MUL TEMP[6], TEMP[6], IMM[2].zzzz 24: ADD TEMP[6], IMM[0].wwww, -TEMP[6] 25: MUL TEMP[5], IMM[2].zzzz, TEMP[5] 26: ADD TEMP[5].xzw, IMM[0].wwww, -TEMP[5] 27: MOV TEMP[7].x, TEMP[4].xxxx 28: MOV TEMP[7].y, TEMP[4].yyyy 29: ADD TEMP[8].x, IMM[0].wwww, -TEMP[4].xxxx 30: ADD TEMP[8].x, TEMP[8].xxxx, -TEMP[4].yyyy 31: MOV TEMP[7].z, TEMP[8].xxxx 32: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 33: RSQ TEMP[8].x, TEMP[8].xxxx 34: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 35: MUL TEMP[8].xy, TEMP[7].xyyy, TEMP[6].xyyy 36: MOV TEMP[9].x, TEMP[4].zzzz 37: MOV TEMP[9].y, TEMP[4].wwww 38: ADD TEMP[10].x, IMM[0].wwww, -TEMP[4].zzzz 39: ADD TEMP[4].x, TEMP[10].xxxx, -TEMP[4].wwww 40: MOV TEMP[9].z, TEMP[4].xxxx 41: DP3 TEMP[4].x, TEMP[9].xyzz, TEMP[9].xyzz 42: RSQ TEMP[4].x, TEMP[4].xxxx 43: MUL TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].xxxx 44: MUL TEMP[6].xy, TEMP[4].xyyy, TEMP[6].zwww 45: MOV TEMP[9].w, IMM[0].xxxx 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MUL TEMP[7].x, TEMP[7].zzzz, TEMP[5].xxxx 49: MOV TEMP[9].z, TEMP[7].xxxx 50: DP4 TEMP[7].x, TEMP[9], TEMP[0] 51: DP4 TEMP[8].x, TEMP[9], TEMP[2] 52: MOV TEMP[7].y, TEMP[8].xxxx 53: DP4 TEMP[8].x, TEMP[9], TEMP[3] 54: MOV TEMP[7].z, TEMP[8].xxxx 55: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 56: RSQ TEMP[8].x, TEMP[8].xxxx 57: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 58: MOV TEMP[8].w, IMM[0].xxxx 59: MOV TEMP[8].x, TEMP[6].xxxx 60: MOV TEMP[8].y, TEMP[6].yyyy 61: MUL TEMP[4].x, TEMP[4].zzzz, TEMP[5].zzzz 62: MOV TEMP[8].z, TEMP[4].xxxx 63: DP4 TEMP[4].x, TEMP[8], TEMP[0] 64: DP4 TEMP[6].x, TEMP[8], TEMP[2] 65: MOV TEMP[4].y, TEMP[6].xxxx 66: DP4 TEMP[6].x, TEMP[8], TEMP[3] 67: MOV TEMP[4].z, TEMP[6].xxxx 68: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[7].xyzz 69: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[7].xyzz 70: ADD TEMP[4].xyz, TEMP[4].xyzz, -TEMP[6].xyzz 71: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[4].xyzz 72: RSQ TEMP[6].x, TEMP[6].xxxx 73: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[6].xxxx 74: MOV TEMP[6].x, TEMP[4].xxxx 75: MOV TEMP[6].y, TEMP[4].yyyy 76: MOV TEMP[6].z, TEMP[4].zzzz 77: MOV TEMP[6].w, TEMP[5].wwww 78: MOV TEMP[4].w, IMM[0].wwww 79: MOV TEMP[4].x, IN[0].xxxx 80: MOV TEMP[4].y, IN[0].yyyy 81: MOV TEMP[4].z, IN[0].zzzz 82: DP4 TEMP[0].x, TEMP[4], TEMP[0] 83: DP4 TEMP[2].x, TEMP[4], TEMP[2] 84: DP4 TEMP[3].x, TEMP[4], TEMP[3] 85: MOV TEMP[4].x, TEMP[0].xxxx 86: MOV TEMP[4].y, TEMP[2].xxxx 87: MOV TEMP[4].z, TEMP[3].xxxx 88: ADD TEMP[5].xyz, TEMP[4].xyzz, -CONST[4][19].xyzz 89: MOV TEMP[8].x, TEMP[0].xxxx 90: MOV TEMP[8].y, TEMP[2].xxxx 91: MOV TEMP[8].z, TEMP[3].xxxx 92: DP3 TEMP[9].x, CONST[4][20].xyzz, TEMP[5].xyzz 93: MOV TEMP[8].w, TEMP[9].xxxx 94: MOV TEMP[9].x, TEMP[7].xxxx 95: MOV TEMP[9].y, TEMP[7].yyyy 96: MOV TEMP[9].z, TEMP[7].zzzz 97: DP3 TEMP[10].x, TEMP[5].xyzz, TEMP[5].xyzz 98: RSQ TEMP[10].x, TEMP[10].xxxx 99: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[10].xxxx 100: DP3 TEMP[10].x, TEMP[7].xyzz, CONST[5][0].xyzz 101: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[7].xyzz 102: MUL TEMP[10].xyz, IMM[2].zzzz, TEMP[10].xyzz 103: ADD TEMP[10].xyz, CONST[5][0].xyzz, -TEMP[10].xyzz 104: DP3 TEMP[5].x, -TEMP[5].xyzz, TEMP[10].xyzz 105: MOV_SAT TEMP[5].x, TEMP[5].xxxx 106: POW TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww 107: MOV_SAT TEMP[5].x, TEMP[5].xxxx 108: MOV TEMP[9].w, TEMP[5].xxxx 109: MOV TEMP[5].w, IMM[0].wwww 110: MOV TEMP[5].x, TEMP[0].xxxx 111: MOV TEMP[5].y, TEMP[2].xxxx 112: MOV TEMP[5].z, TEMP[3].xxxx 113: MOV TEMP[0].x, CONST[4][0].yyyy 114: MOV TEMP[0].y, CONST[4][1].yyyy 115: MOV TEMP[0].z, CONST[4][2].yyyy 116: MOV TEMP[0].w, CONST[4][3].yyyy 117: DP4 TEMP[0].x, TEMP[5], TEMP[0] 118: MOV TEMP[2].x, CONST[4][0].zzzz 119: MOV TEMP[2].y, CONST[4][1].zzzz 120: MOV TEMP[2].z, CONST[4][2].zzzz 121: MOV TEMP[2].w, CONST[4][3].zzzz 122: DP4 TEMP[2].x, TEMP[5], TEMP[2] 123: MOV TEMP[3].x, CONST[4][0].wwww 124: MOV TEMP[3].y, CONST[4][1].wwww 125: MOV TEMP[3].z, CONST[4][2].wwww 126: MOV TEMP[3].w, CONST[4][3].wwww 127: DP4 TEMP[3].x, TEMP[5], TEMP[3] 128: MOV TEMP[10].x, CONST[4][0].xxxx 129: MOV TEMP[10].y, CONST[4][1].xxxx 130: MOV TEMP[10].z, CONST[4][2].xxxx 131: MOV TEMP[10].w, CONST[4][3].xxxx 132: DP4 TEMP[5].x, TEMP[5], TEMP[10] 133: MOV TEMP[5].w, TEMP[3].xxxx 134: MUL TEMP[10].x, CONST[1][2].xxxx, IMM[8].xxxx 135: MUL TEMP[11].x, IMM[8].xxxx, CONST[1][2].yyyy 136: MUL TEMP[12].x, IMM[8].xxxx, CONST[1][2].zzzz 137: MAD TEMP[13].xy, CONST[4][21].wwww, CONST[1][3].xyyy, CONST[1][4].zwww 138: MAD TEMP[14].xy, CONST[4][21].wwww, CONST[1][3].zwww, CONST[1][5].xyyy 139: MAD TEMP[15].xy, CONST[4][21].wwww, CONST[1][4].xyyy, CONST[1][5].zwww 140: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[8].yyyy 141: MOV TEMP[16].x, -TEMP[4].zzzz 142: MOV TEMP[17].x, TEMP[4].yyyy 143: MOV TEMP[17].y, TEMP[16].xxxx 144: MOV TEMP[18].x, TEMP[4].xxxx 145: MOV TEMP[18].y, TEMP[16].xxxx 146: MOV TEMP[16].x, TEMP[4].xxxx 147: MOV TEMP[16].y, -TEMP[4].yyyy 148: ADD TEMP[4].xy, TEMP[17].xyyy, IMM[8].zzzz 149: COS TEMP[17].x, TEMP[10].xxxx 150: SIN TEMP[10].x, TEMP[10].xxxx 151: MUL TEMP[19].x, TEMP[10].xxxx, TEMP[4].yyyy 152: MAD TEMP[19].x, TEMP[17].xxxx, TEMP[4].xxxx, -TEMP[19].xxxx 153: MUL TEMP[20].x, TEMP[17].xxxx, TEMP[4].yyyy 154: MAD TEMP[20].x, TEMP[10].xxxx, TEMP[4].xxxx, TEMP[20].xxxx 155: MOV TEMP[19].y, TEMP[20].xxxx 156: MAD TEMP[19].xy, CONST[1][1].yyyy, TEMP[19].xyyy, IMM[8].wwww 157: ADD TEMP[19].xy, TEMP[19].xyyy, TEMP[13].xyyy 158: COS TEMP[20].x, TEMP[11].xxxx 159: SIN TEMP[11].x, TEMP[11].xxxx 160: MUL TEMP[21].x, TEMP[11].xxxx, TEMP[4].yyyy 161: MAD TEMP[21].x, TEMP[20].xxxx, TEMP[4].xxxx, -TEMP[21].xxxx 162: MUL TEMP[22].x, TEMP[20].xxxx, TEMP[4].yyyy 163: MAD TEMP[22].x, TEMP[11].xxxx, TEMP[4].xxxx, TEMP[22].xxxx 164: MOV TEMP[21].y, TEMP[22].xxxx 165: MAD TEMP[21].xy, TEMP[21].xyyy, CONST[1][1].zzzz, IMM[8].wwww 166: ADD TEMP[21].xy, TEMP[21].xyyy, TEMP[14].xyyy 167: ADD TEMP[18].xy, IMM[8].zzzz, TEMP[18].xyyy 168: MUL TEMP[22].x, TEMP[10].xxxx, TEMP[18].yyyy 169: MAD TEMP[22].x, TEMP[17].xxxx, TEMP[18].xxxx, -TEMP[22].xxxx 170: MUL TEMP[23].x, TEMP[17].xxxx, TEMP[18].yyyy 171: MAD TEMP[23].x, TEMP[10].xxxx, TEMP[18].xxxx, TEMP[23].xxxx 172: MOV TEMP[22].y, TEMP[23].xxxx 173: MAD TEMP[22].xy, CONST[1][1].yyyy, TEMP[22].xyyy, IMM[8].wwww 174: ADD TEMP[22].xy, TEMP[22].xyyy, TEMP[13].xyyy 175: MOV TEMP[23].x, TEMP[19].xxxx 176: MOV TEMP[23].y, TEMP[19].yyyy 177: MOV TEMP[23].z, TEMP[22].xxxx 178: MOV TEMP[23].w, TEMP[22].yyyy 179: MUL TEMP[19].x, TEMP[11].xxxx, TEMP[18].yyyy 180: MAD TEMP[19].x, TEMP[20].xxxx, TEMP[18].xxxx, -TEMP[19].xxxx 181: MUL TEMP[22].x, TEMP[11].xxxx, TEMP[18].xxxx 182: MAD TEMP[22].x, TEMP[20].xxxx, TEMP[18].yyyy, TEMP[22].xxxx 183: MOV TEMP[19].y, TEMP[22].xxxx 184: MAD TEMP[19].xy, TEMP[19].xyyy, CONST[1][1].zzzz, IMM[8].wwww 185: ADD TEMP[19].xy, TEMP[19].xyyy, TEMP[14].xyyy 186: MOV TEMP[22].x, TEMP[21].xxxx 187: MOV TEMP[22].y, TEMP[21].yyyy 188: MOV TEMP[22].z, TEMP[19].xxxx 189: MOV TEMP[22].w, TEMP[19].yyyy 190: COS TEMP[19].x, TEMP[12].xxxx 191: SIN TEMP[12].x, TEMP[12].xxxx 192: MUL TEMP[21].x, TEMP[12].xxxx, TEMP[4].yyyy 193: MAD TEMP[21].x, TEMP[19].xxxx, TEMP[4].xxxx, -TEMP[21].xxxx 194: MUL TEMP[24].x, TEMP[19].xxxx, TEMP[4].yyyy 195: MAD TEMP[4].x, TEMP[12].xxxx, TEMP[4].xxxx, TEMP[24].xxxx 196: MOV TEMP[21].y, TEMP[4].xxxx 197: MAD TEMP[4].xy, TEMP[21].xyyy, CONST[1][1].wwww, IMM[8].wwww 198: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[15].xyyy 199: MUL TEMP[21].x, TEMP[12].xxxx, TEMP[18].yyyy 200: MAD TEMP[21].x, TEMP[19].xxxx, TEMP[18].xxxx, -TEMP[21].xxxx 201: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].yyyy 202: MAD TEMP[12].x, TEMP[12].xxxx, TEMP[18].xxxx, TEMP[19].xxxx 203: MOV TEMP[21].y, TEMP[12].xxxx 204: MAD TEMP[12].xy, CONST[1][1].wwww, TEMP[21].xyyy, IMM[8].wwww 205: ADD TEMP[12].xy, TEMP[12].xyyy, TEMP[15].xyyy 206: MOV TEMP[15].x, TEMP[4].xxxx 207: MOV TEMP[15].y, TEMP[4].yyyy 208: MOV TEMP[15].z, TEMP[12].xxxx 209: MOV TEMP[15].w, TEMP[12].yyyy 210: ADD TEMP[4].xy, IMM[8].zzzz, TEMP[16].xyyy 211: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[4].yyyy 212: MAD TEMP[12].x, TEMP[17].xxxx, TEMP[4].xxxx, -TEMP[12].xxxx 213: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 214: MAD TEMP[10].x, TEMP[17].xxxx, TEMP[4].yyyy, TEMP[10].xxxx 215: MOV TEMP[12].y, TEMP[10].xxxx 216: MAD TEMP[10].xy, CONST[1][1].yyyy, TEMP[12].xyyy, IMM[8].wwww 217: ADD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xyyy 218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[4].yyyy 219: MAD TEMP[12].x, TEMP[20].xxxx, TEMP[4].xxxx, -TEMP[12].xxxx 220: MUL TEMP[13].x, TEMP[20].xxxx, TEMP[4].yyyy 221: MAD TEMP[4].x, TEMP[11].xxxx, TEMP[4].xxxx, TEMP[13].xxxx 222: MOV TEMP[12].y, TEMP[4].xxxx 223: MAD TEMP[4].xy, TEMP[12].xyyy, CONST[1][1].zzzz, IMM[8].wwww 224: ADD TEMP[4].xy, TEMP[4].xyyy, TEMP[14].xyyy 225: MOV TEMP[11].x, TEMP[10].xxxx 226: MOV TEMP[11].y, TEMP[10].yyyy 227: MOV TEMP[11].z, TEMP[4].xxxx 228: MOV TEMP[11].w, TEMP[4].yyyy 229: MOV TEMP[4].xy, IN[5].xyyy 230: MOV TEMP[4].w, IMM[0].xxxx 231: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].wyw 232: MUL TEMP[10].xyz, IN[3].xyzz, IMM[8].wwww 233: MAX TEMP[10].xyz, TEMP[10].xyzz, IMM[10].xxxx 234: MUL TEMP[12].xyz, IN[4].xyzz, IMM[10].yyyy 235: MAD TEMP[13].xyz, IN[4].xyzz, IMM[10].zzzz, IMM[10].wwww 236: POW TEMP[14].x, TEMP[13].xxxx, IMM[11].xxxx 237: POW TEMP[14].y, TEMP[13].yyyy, IMM[11].xxxx 238: POW TEMP[14].z, TEMP[13].zzzz, IMM[11].xxxx 239: FSLT TEMP[13].x, IMM[11].yyyy, IN[4].xxxx 240: UIF TEMP[13].xxxx :0 241: MOV TEMP[13].x, TEMP[14].xxxx 242: ELSE :0 243: MOV TEMP[13].x, TEMP[12].xxxx 244: ENDIF 245: FSLT TEMP[16].x, IMM[11].yyyy, IN[4].yyyy 246: UIF TEMP[16].xxxx :0 247: MOV TEMP[16].x, TEMP[14].yyyy 248: ELSE :0 249: MOV TEMP[16].x, TEMP[12].yyyy 250: ENDIF 251: FSLT TEMP[17].x, IMM[11].yyyy, IN[4].zzzz 252: UIF TEMP[17].xxxx :0 253: MOV TEMP[14].x, TEMP[14].zzzz 254: ELSE :0 255: MOV TEMP[14].x, TEMP[12].zzzz 256: ENDIF 257: MOV TEMP[12].x, TEMP[13].xxxx 258: MOV TEMP[12].y, TEMP[16].xxxx 259: MOV TEMP[12].z, TEMP[14].xxxx 260: MOV TEMP[13].w, TEMP[4].wwww 261: MUL TEMP[13].xyz, TEMP[12].xyzz, TEMP[4].xyzz 262: ABS TEMP[4].xyz, TEMP[7].xyzz 263: MAX TEMP[7].x, TEMP[4].xxxx, TEMP[4].yyyy 264: MAX TEMP[7].x, TEMP[7].xxxx, TEMP[4].zzzz 265: ADD TEMP[4].yz, TEMP[7].xxxx, -TEMP[4].xyzz 266: FSLT TEMP[7].x, TEMP[4].yyyy, IMM[11].zzzz 267: UIF TEMP[7].xxxx :0 268: MOV TEMP[7].x, IMM[0].wwww 269: ELSE :0 270: MOV TEMP[7].x, IMM[0].xxxx 271: ENDIF 272: MOV TEMP[12].x, IN[2].xxxx 273: MOV TEMP[12].y, IN[2].yyyy 274: MOV TEMP[12].z, IN[2].zzzz 275: MOV TEMP[12].w, TEMP[7].xxxx 276: FSLT TEMP[4].x, TEMP[4].zzzz, IMM[11].zzzz 277: UIF TEMP[4].xxxx :0 278: MOV TEMP[4].x, IMM[0].wwww 279: ELSE :0 280: MOV TEMP[4].x, IMM[0].xxxx 281: ENDIF 282: MOV TEMP[7].x, TEMP[10].xxxx 283: MOV TEMP[7].y, TEMP[10].yyyy 284: MOV TEMP[7].z, TEMP[10].zzzz 285: MOV TEMP[7].w, TEMP[4].xxxx 286: MOV TEMP[4].xw, TEMP[5].xxxw 287: MOV TEMP[0].x, -TEMP[0].xxxx 288: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz, -TEMP[3].xxxx 289: MOV TEMP[0].y, TEMP[2].xxxx 290: MOV TEMP[4].yz, TEMP[0].yxyy 291: MOV OUT[8], TEMP[23] 292: MOV OUT[1], TEMP[1] 293: MOV OUT[9], TEMP[22] 294: MOV OUT[10], TEMP[15] 295: MOV OUT[11], TEMP[11] 296: MOV OUT[3], TEMP[9] 297: MOV OUT[7], TEMP[6] 298: MOV OUT[6], TEMP[7] 299: MOV OUT[4], TEMP[13] 300: MOV OUT[0], TEMP[4] 301: MOV OUT[2], TEMP[8] 302: MOV OUT[5], TEMP[12] 303: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 0) %34 = call float @llvm.SI.load.const(<16 x i8> %32, i32 4) %35 = call float @llvm.SI.load.const(<16 x i8> %32, i32 8) %36 = call float @llvm.SI.load.const(<16 x i8> %32, i32 12) %37 = call float @llvm.SI.load.const(<16 x i8> %32, i32 16) %38 = call float @llvm.SI.load.const(<16 x i8> %32, i32 20) %39 = call float @llvm.SI.load.const(<16 x i8> %32, i32 24) %40 = call float @llvm.SI.load.const(<16 x i8> %32, i32 28) %41 = call float @llvm.SI.load.const(<16 x i8> %32, i32 32) %42 = call float @llvm.SI.load.const(<16 x i8> %32, i32 36) %43 = call float @llvm.SI.load.const(<16 x i8> %32, i32 40) %44 = call float @llvm.SI.load.const(<16 x i8> %32, i32 44) %45 = call float @llvm.SI.load.const(<16 x i8> %32, i32 48) %46 = call float @llvm.SI.load.const(<16 x i8> %32, i32 52) %47 = call float @llvm.SI.load.const(<16 x i8> %32, i32 56) %48 = call float @llvm.SI.load.const(<16 x i8> %32, i32 60) %49 = call float @llvm.SI.load.const(<16 x i8> %32, i32 304) %50 = call float @llvm.SI.load.const(<16 x i8> %32, i32 308) %51 = call float @llvm.SI.load.const(<16 x i8> %32, i32 312) %52 = call float @llvm.SI.load.const(<16 x i8> %32, i32 320) %53 = call float @llvm.SI.load.const(<16 x i8> %32, i32 324) %54 = call float @llvm.SI.load.const(<16 x i8> %32, i32 328) %55 = call float @llvm.SI.load.const(<16 x i8> %32, i32 348) %56 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0 %58 = call float @llvm.SI.load.const(<16 x i8> %57, i32 0) %59 = call float @llvm.SI.load.const(<16 x i8> %57, i32 4) %60 = call float @llvm.SI.load.const(<16 x i8> %57, i32 8) %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = extractelement <4 x float> %68, i32 2 %72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0 %74 = add i32 %5, %7 %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 1 %78 = extractelement <4 x float> %75, i32 2 %79 = extractelement <4 x float> %75, i32 3 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %5, %7 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0 %89 = add i32 %5, %7 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = add i32 %5, %7 %97 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %96) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0 %103 = add i32 %10, %6 %104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103) %105 = extractelement <4 x float> %104, i32 0 %106 = extractelement <4 x float> %104, i32 1 %107 = bitcast float %105 to i32 %108 = bitcast float %106 to i32 %109 = insertelement <4 x i32> undef, i32 %107, i32 0 %110 = insertelement <4 x i32> %109, i32 %108, i32 1 %111 = insertelement <4 x i32> %110, i32 0, i32 2 %112 = bitcast <8 x i32> %62 to <32 x i8> %113 = bitcast <4 x i32> %64 to <16 x i8> %114 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %111, <32 x i8> %112, <16 x i8> %113, i32 2) %115 = extractelement <4 x float> %114, i32 0 %116 = extractelement <4 x float> %114, i32 1 %117 = extractelement <4 x float> %114, i32 2 %118 = extractelement <4 x float> %114, i32 3 %119 = bitcast float %105 to i32 %120 = bitcast float %106 to i32 %121 = insertelement <4 x i32> , i32 %119, i32 1 %122 = insertelement <4 x i32> %121, i32 %120, i32 2 %123 = insertelement <4 x i32> %122, i32 0, i32 3 %124 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %123, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %125 = extractelement <4 x float> %124, i32 0 %126 = extractelement <4 x float> %124, i32 1 %127 = extractelement <4 x float> %124, i32 2 %128 = extractelement <4 x float> %124, i32 3 %129 = bitcast float %105 to i32 %130 = bitcast float %106 to i32 %131 = insertelement <4 x i32> , i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 %130, i32 2 %133 = insertelement <4 x i32> %132, i32 0, i32 3 %134 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %133, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = fmul float %76, 2.550000e+02 %140 = fadd float %139, -1.280000e+02 %141 = fmul float %77, 2.550000e+02 %142 = fadd float %141, -1.280000e+02 %143 = fmul float %78, 2.550000e+02 %144 = fadd float %143, -1.280000e+02 %145 = fmul float %79, 2.550000e+02 %146 = fadd float %145, -1.280000e+02 %147 = fcmp olt float %140, 0.000000e+00 %148 = fcmp olt float %142, 0.000000e+00 %149 = fcmp olt float %144, 0.000000e+00 %150 = fcmp olt float %146, 0.000000e+00 %151 = select i1 %147, float 1.000000e+00, float 0.000000e+00 %152 = select i1 %149, float 1.000000e+00, float 0.000000e+00 %153 = select i1 %150, float 1.000000e+00, float 0.000000e+00 %154 = call float @fabs(float %140) %155 = call float @fabs(float %142) %156 = call float @fabs(float %144) %157 = call float @fabs(float %146) %158 = fsub float %154, %151 %159 = select i1 %148, float -1.000000e+00, float -0.000000e+00 %160 = fadd float %155, %159 %161 = fsub float %156, %152 %162 = fsub float %157, %153 %163 = fadd float %158, -6.400000e+01 %164 = fadd float %160, -6.400000e+01 %165 = fadd float %161, -6.400000e+01 %166 = fadd float %162, -6.400000e+01 %167 = fcmp olt float %163, 0.000000e+00 %168 = fcmp olt float %164, 0.000000e+00 %169 = fcmp olt float %165, 0.000000e+00 %170 = fcmp olt float %166, 0.000000e+00 %171 = select i1 %167, float 1.000000e+00, float 0.000000e+00 %172 = select i1 %168, float 1.000000e+00, float 0.000000e+00 %173 = select i1 %169, float 1.000000e+00, float 0.000000e+00 %174 = select i1 %170, float 1.000000e+00, float 0.000000e+00 %175 = call float @fabs(float %163) %176 = call float @fabs(float %164) %177 = call float @fabs(float %165) %178 = call float @fabs(float %166) %179 = fsub float %175, %171 %180 = fsub float %176, %172 %181 = fsub float %177, %173 %182 = fsub float %178, %174 %183 = fmul float %179, 0x3F90410420000000 %184 = fmul float %180, 0x3F90410420000000 %185 = fmul float %181, 0x3F90410420000000 %186 = fmul float %182, 0x3F90410420000000 %187 = fmul float %171, 2.000000e+00 %188 = fmul float %172, 2.000000e+00 %189 = fmul float %173, 2.000000e+00 %190 = fmul float %174, 2.000000e+00 %191 = fsub float 1.000000e+00, %187 %192 = fsub float 1.000000e+00, %188 %193 = fsub float 1.000000e+00, %189 %194 = fsub float 1.000000e+00, %190 %195 = fmul float %151, 2.000000e+00 %196 = fmul float %152, 2.000000e+00 %197 = fmul float %153, 2.000000e+00 %198 = fsub float 1.000000e+00, %195 %199 = fsub float 1.000000e+00, %196 %200 = fsub float 1.000000e+00, %197 %201 = fsub float 1.000000e+00, %183 %202 = fsub float %201, %184 %203 = fmul float %183, %183 %204 = fmul float %184, %184 %205 = fadd float %204, %203 %206 = fmul float %202, %202 %207 = fadd float %205, %206 %208 = call float @llvm.AMDGPU.rsq.clamped.f32(float %207) %209 = fmul float %183, %208 %210 = fmul float %184, %208 %211 = fmul float %202, %208 %212 = fmul float %209, %191 %213 = fmul float %210, %192 %214 = fsub float 1.000000e+00, %185 %215 = fsub float %214, %186 %216 = fmul float %185, %185 %217 = fmul float %186, %186 %218 = fadd float %217, %216 %219 = fmul float %215, %215 %220 = fadd float %218, %219 %221 = call float @llvm.AMDGPU.rsq.clamped.f32(float %220) %222 = fmul float %185, %221 %223 = fmul float %186, %221 %224 = fmul float %215, %221 %225 = fmul float %222, %193 %226 = fmul float %223, %194 %227 = fmul float %211, %198 %228 = fmul float %212, %115 %229 = fmul float %213, %116 %230 = fadd float %228, %229 %231 = fmul float %227, %117 %232 = fadd float %230, %231 %233 = fmul float %118, 0.000000e+00 %234 = fadd float %232, %233 %235 = fmul float %212, %125 %236 = fmul float %213, %126 %237 = fadd float %235, %236 %238 = fmul float %227, %127 %239 = fadd float %237, %238 %240 = fmul float %128, 0.000000e+00 %241 = fadd float %239, %240 %242 = fmul float %212, %135 %243 = fmul float %213, %136 %244 = fadd float %242, %243 %245 = fmul float %227, %137 %246 = fadd float %244, %245 %247 = fmul float %138, 0.000000e+00 %248 = fadd float %246, %247 %249 = fmul float %234, %234 %250 = fmul float %241, %241 %251 = fadd float %250, %249 %252 = fmul float %248, %248 %253 = fadd float %251, %252 %254 = call float @llvm.AMDGPU.rsq.clamped.f32(float %253) %255 = fmul float %234, %254 %256 = fmul float %241, %254 %257 = fmul float %248, %254 %258 = fmul float %224, %199 %259 = fmul float %225, %115 %260 = fmul float %226, %116 %261 = fadd float %259, %260 %262 = fmul float %258, %117 %263 = fadd float %261, %262 %264 = fmul float %118, 0.000000e+00 %265 = fadd float %263, %264 %266 = fmul float %225, %125 %267 = fmul float %226, %126 %268 = fadd float %266, %267 %269 = fmul float %258, %127 %270 = fadd float %268, %269 %271 = fmul float %128, 0.000000e+00 %272 = fadd float %270, %271 %273 = fmul float %225, %135 %274 = fmul float %226, %136 %275 = fadd float %273, %274 %276 = fmul float %258, %137 %277 = fadd float %275, %276 %278 = fmul float %138, 0.000000e+00 %279 = fadd float %277, %278 %280 = fmul float %265, %255 %281 = fmul float %272, %256 %282 = fadd float %281, %280 %283 = fmul float %279, %257 %284 = fadd float %282, %283 %285 = fmul float %284, %255 %286 = fmul float %284, %256 %287 = fmul float %284, %257 %288 = fsub float %265, %285 %289 = fsub float %272, %286 %290 = fsub float %279, %287 %291 = fmul float %288, %288 %292 = fmul float %289, %289 %293 = fadd float %292, %291 %294 = fmul float %290, %290 %295 = fadd float %293, %294 %296 = call float @llvm.AMDGPU.rsq.clamped.f32(float %295) %297 = fmul float %288, %296 %298 = fmul float %289, %296 %299 = fmul float %290, %296 %300 = fmul float %69, %115 %301 = fmul float %70, %116 %302 = fadd float %300, %301 %303 = fmul float %71, %117 %304 = fadd float %302, %303 %305 = fadd float %304, %118 %306 = fmul float %69, %125 %307 = fmul float %70, %126 %308 = fadd float %306, %307 %309 = fmul float %71, %127 %310 = fadd float %308, %309 %311 = fadd float %310, %128 %312 = fmul float %69, %135 %313 = fmul float %70, %136 %314 = fadd float %312, %313 %315 = fmul float %71, %137 %316 = fadd float %314, %315 %317 = fadd float %316, %138 %318 = fsub float %305, %49 %319 = fsub float %311, %50 %320 = fsub float %317, %51 %321 = fmul float %52, %318 %322 = fmul float %53, %319 %323 = fadd float %322, %321 %324 = fmul float %54, %320 %325 = fadd float %323, %324 %326 = fmul float %318, %318 %327 = fmul float %319, %319 %328 = fadd float %327, %326 %329 = fmul float %320, %320 %330 = fadd float %328, %329 %331 = call float @llvm.AMDGPU.rsq.clamped.f32(float %330) %332 = fmul float %318, %331 %333 = fmul float %319, %331 %334 = fmul float %320, %331 %335 = fmul float %255, %58 %336 = fmul float %256, %59 %337 = fadd float %336, %335 %338 = fmul float %257, %60 %339 = fadd float %337, %338 %340 = fmul float %339, %255 %341 = fmul float %339, %256 %342 = fmul float %339, %257 %343 = fmul float %340, 2.000000e+00 %344 = fmul float %341, 2.000000e+00 %345 = fmul float %342, 2.000000e+00 %346 = fsub float %58, %343 %347 = fsub float %59, %344 %348 = fsub float %60, %345 %349 = fmul float %332, %346 %350 = fsub float -0.000000e+00, %349 %351 = fmul float %333, %347 %352 = fsub float %350, %351 %353 = fmul float %334, %348 %354 = fsub float %352, %353 %355 = call float @llvm.AMDIL.clamp.(float %354, float 0.000000e+00, float 1.000000e+00) %356 = call float @llvm.pow.f32(float %355, float 1.600000e+01) %357 = call float @llvm.AMDIL.clamp.(float %356, float 0.000000e+00, float 1.000000e+00) %358 = fmul float %305, %34 %359 = fmul float %311, %38 %360 = fadd float %358, %359 %361 = fmul float %317, %42 %362 = fadd float %360, %361 %363 = fadd float %362, %46 %364 = fmul float %305, %35 %365 = fmul float %311, %39 %366 = fadd float %364, %365 %367 = fmul float %317, %43 %368 = fadd float %366, %367 %369 = fadd float %368, %47 %370 = fmul float %305, %36 %371 = fmul float %311, %40 %372 = fadd float %370, %371 %373 = fmul float %317, %44 %374 = fadd float %372, %373 %375 = fadd float %374, %48 %376 = fmul float %305, %33 %377 = fmul float %311, %37 %378 = fadd float %376, %377 %379 = fmul float %317, %41 %380 = fadd float %378, %379 %381 = fadd float %380, %45 %382 = fmul float %16, 0x3F91DF4720000000 %383 = fmul float %17, 0x3F91DF4720000000 %384 = fmul float %18, 0x3F91DF4720000000 %385 = fmul float %55, %19 %386 = fadd float %385, %25 %387 = fmul float %55, %20 %388 = fadd float %387, %26 %389 = fmul float %55, %21 %390 = fadd float %389, %27 %391 = fmul float %55, %22 %392 = fadd float %391, %28 %393 = fmul float %55, %23 %394 = fadd float %393, %29 %395 = fmul float %55, %24 %396 = fadd float %395, %30 %397 = fmul float %305, 7.812500e-03 %398 = fmul float %311, 7.812500e-03 %399 = fmul float %317, 7.812500e-03 %400 = fadd float %398, -5.000000e-01 %401 = fsub float -5.000000e-01, %399 %402 = call float @llvm.cos.f32(float %382) %403 = call float @llvm.sin.f32(float %382) %404 = fmul float %403, %401 %405 = fmul float %402, %400 %406 = fsub float %405, %404 %407 = fmul float %402, %401 %408 = fmul float %403, %400 %409 = fadd float %408, %407 %410 = fmul float %13, %406 %411 = fadd float %410, 5.000000e-01 %412 = fmul float %13, %409 %413 = fadd float %412, 5.000000e-01 %414 = fadd float %411, %386 %415 = fadd float %413, %388 %416 = call float @llvm.cos.f32(float %383) %417 = call float @llvm.sin.f32(float %383) %418 = fmul float %417, %401 %419 = fmul float %416, %400 %420 = fsub float %419, %418 %421 = fmul float %416, %401 %422 = fmul float %417, %400 %423 = fadd float %422, %421 %424 = fmul float %420, %14 %425 = fadd float %424, 5.000000e-01 %426 = fmul float %423, %14 %427 = fadd float %426, 5.000000e-01 %428 = fadd float %425, %390 %429 = fadd float %427, %392 %430 = fadd float %397, -5.000000e-01 %431 = fsub float -5.000000e-01, %399 %432 = fmul float %403, %431 %433 = fmul float %402, %430 %434 = fsub float %433, %432 %435 = fmul float %402, %431 %436 = fmul float %403, %430 %437 = fadd float %436, %435 %438 = fmul float %13, %434 %439 = fadd float %438, 5.000000e-01 %440 = fmul float %13, %437 %441 = fadd float %440, 5.000000e-01 %442 = fadd float %439, %386 %443 = fadd float %441, %388 %444 = fmul float %417, %431 %445 = fmul float %416, %430 %446 = fsub float %445, %444 %447 = fmul float %417, %430 %448 = fmul float %416, %431 %449 = fadd float %448, %447 %450 = fmul float %446, %14 %451 = fadd float %450, 5.000000e-01 %452 = fmul float %449, %14 %453 = fadd float %452, 5.000000e-01 %454 = fadd float %451, %390 %455 = fadd float %453, %392 %456 = call float @llvm.cos.f32(float %384) %457 = call float @llvm.sin.f32(float %384) %458 = fmul float %457, %401 %459 = fmul float %456, %400 %460 = fsub float %459, %458 %461 = fmul float %456, %401 %462 = fmul float %457, %400 %463 = fadd float %462, %461 %464 = fmul float %460, %15 %465 = fadd float %464, 5.000000e-01 %466 = fmul float %463, %15 %467 = fadd float %466, 5.000000e-01 %468 = fadd float %465, %394 %469 = fadd float %467, %396 %470 = fmul float %457, %431 %471 = fmul float %456, %430 %472 = fsub float %471, %470 %473 = fmul float %456, %431 %474 = fmul float %457, %430 %475 = fadd float %474, %473 %476 = fmul float %15, %472 %477 = fadd float %476, 5.000000e-01 %478 = fmul float %15, %475 %479 = fadd float %478, 5.000000e-01 %480 = fadd float %477, %394 %481 = fadd float %479, %396 %482 = fadd float %397, -5.000000e-01 %483 = fsub float -5.000000e-01, %398 %484 = fmul float %403, %483 %485 = fmul float %402, %482 %486 = fsub float %485, %484 %487 = fmul float %403, %482 %488 = fmul float %402, %483 %489 = fadd float %488, %487 %490 = fmul float %13, %486 %491 = fadd float %490, 5.000000e-01 %492 = fmul float %13, %489 %493 = fadd float %492, 5.000000e-01 %494 = fadd float %491, %386 %495 = fadd float %493, %388 %496 = fmul float %417, %483 %497 = fmul float %416, %482 %498 = fsub float %497, %496 %499 = fmul float %416, %483 %500 = fmul float %417, %482 %501 = fadd float %500, %499 %502 = fmul float %498, %14 %503 = fadd float %502, 5.000000e-01 %504 = fmul float %501, %14 %505 = fadd float %504, 5.000000e-01 %506 = fadd float %503, %390 %507 = fadd float %505, %392 %508 = bitcast float %105 to i32 %509 = bitcast float %106 to i32 %510 = insertelement <4 x i32> , i32 %508, i32 1 %511 = insertelement <4 x i32> %510, i32 %509, i32 2 %512 = insertelement <4 x i32> %511, i32 0, i32 3 %513 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %512, <8 x i32> %62, <4 x i32> %64, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %514 = extractelement <4 x float> %513, i32 0 %515 = extractelement <4 x float> %513, i32 1 %516 = extractelement <4 x float> %513, i32 2 %517 = extractelement <4 x float> %513, i32 3 %518 = fmul float %91, 5.000000e-01 %519 = fmul float %92, 5.000000e-01 %520 = fmul float %93, 5.000000e-01 %521 = call float @llvm.maxnum.f32(float %518, float 0x3F1A36E2E0000000) %522 = call float @llvm.maxnum.f32(float %519, float 0x3F1A36E2E0000000) %523 = call float @llvm.maxnum.f32(float %520, float 0x3F1A36E2E0000000) %524 = fmul float %98, 0x3FB3D07220000000 %525 = fmul float %99, 0x3FB3D07220000000 %526 = fmul float %100, 0x3FB3D07220000000 %527 = fmul float %98, 0x3FEE54EDE0000000 %528 = fadd float %527, 0x3FAAB12320000000 %529 = fmul float %99, 0x3FEE54EDE0000000 %530 = fadd float %529, 0x3FAAB12320000000 %531 = fmul float %100, 0x3FEE54EDE0000000 %532 = fadd float %531, 0x3FAAB12320000000 %533 = call float @llvm.pow.f32(float %528, float 0x4003333340000000) %534 = call float @llvm.pow.f32(float %530, float 0x4003333340000000) %535 = call float @llvm.pow.f32(float %532, float 0x4003333340000000) %536 = fcmp ogt float %98, 0x3FA4B5DCC0000000 %. = select i1 %536, float %533, float %524 %537 = fcmp ogt float %99, 0x3FA4B5DCC0000000 %temp64.0 = select i1 %537, float %534, float %525 %538 = fcmp ogt float %100, 0x3FA4B5DCC0000000 %.112 = select i1 %538, float %535, float %526 %539 = fmul float %., %514 %540 = fmul float %temp64.0, %515 %541 = fmul float %.112, %516 %542 = call float @fabs(float %255) %543 = call float @fabs(float %256) %544 = call float @fabs(float %257) %545 = call float @llvm.maxnum.f32(float %542, float %543) %546 = call float @llvm.maxnum.f32(float %545, float %544) %547 = fsub float %546, %543 %548 = fsub float %546, %544 %549 = fcmp olt float %547, 0x3F847AE140000000 %temp28.0 = select i1 %549, float 1.000000e+00, float 0.000000e+00 %550 = fcmp olt float %548, 0x3F847AE140000000 %.113 = select i1 %550, float 1.000000e+00, float 0.000000e+00 %551 = fsub float -0.000000e+00, %363 %552 = fmul float %369, 2.000000e+00 %553 = fsub float %552, %375 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %305, float %311, float %317, float %325) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %255, float %256, float %257, float %357) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %539, float %540, float %541, float %517) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %84, float %85, float %86, float %temp28.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %521, float %522, float %523, float %.113) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %297, float %298, float %299, float %200) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %414, float %415, float %442, float %443) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %428, float %429, float %454, float %455) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %468, float %469, float %480, float %481) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %494, float %495, float %506, float %507) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %381, float %551, float %553, float %375) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_mov_b32_e32 v28, 0xc3000000 ; 7E3802FF C3000000 v_mov_b32_e32 v32, 0x437f0000 ; 7E4002FF 437F0000 v_mov_b32_e32 v33, 0x80000000 ; 7E4202FF 80000000 v_mov_b32_e32 v29, 0xc2800000 ; 7E3A02FF C2800000 v_mov_b32_e32 v27, 0x3c820821 ; 7E3602FF 3C820821 v_mov_b32_e32 v31, 0x3d558919 ; 7E3E02FF 3D558919 v_add_i32_e32 v1, s10, v0 ; 4A02000A v_add_i32_e32 v2, s11, v3 ; 4A04060B s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500 v_mov_b32_e32 v34, 0x3f72a76f ; 7E4402FF 3F72A76F v_mov_b32_e32 v30, 0x4019999a ; 7E3C02FF 4019999A v_mov_b32_e32 v3, 0x3b360b65 ; 7E0602FF 3B360B65 v_mov_b32_e32 v35, 0x3d9e8391 ; 7E4602FF 3D9E8391 v_mov_b32_e32 v36, 0x3d25aee6 ; 7E4802FF 3D25AEE6 v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000 v_mov_b32_e32 v16, 0x3c23d70a ; 7E2002FF 3C23D70A v_mov_b32_e32 v13, 0x38d1b717 ; 7E1A02FF 38D1B717 s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_load_dwordx4 s[48:51], s[2:3], 0x4 ; C0980304 s_load_dwordx4 s[52:55], s[2:3], 0x10 ; C09A0310 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[37:40], v1, s[12:15], 0 idxen ; E00C2000 80032501 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[40:43], v1, s[16:19], 0 idxen ; E00C2000 80042801 buffer_load_format_xyzw v[23:26], v1, s[20:23], 0 idxen ; E00C2000 80051701 buffer_load_format_xyzw v[19:22], v1, s[24:27], 0 idxen ; E00C2000 80061301 buffer_load_format_xyzw v[44:47], v1, s[28:31], 0 idxen ; E00C2000 80072C01 buffer_load_format_xyzw v[4:7], v2, s[8:11], 0 idxen ; E00C2000 80020402 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_buffer_load_dword s6, s[52:55], 0xf ; C203350F s_buffer_load_dword s33, s[52:55], 0x4c ; C210B54C s_buffer_load_dword s32, s[52:55], 0x4d ; C210354D s_buffer_load_dword s31, s[52:55], 0x4e ; C20FB54E s_buffer_load_dword s25, s[52:55], 0x50 ; C20CB550 s_buffer_load_dword s30, s[0:3], 0x0 ; C20F0100 s_buffer_load_dword s29, s[0:3], 0x1 ; C20E8101 s_buffer_load_dword s28, s[0:3], 0x2 ; C20E0102 s_buffer_load_dword s26, s[52:55], 0x51 ; C20D3551 s_buffer_load_dword s27, s[52:55], 0x52 ; C20DB552 s_buffer_load_dword s0, s[52:55], 0x57 ; C2003557 s_buffer_load_dword s8, s[48:51], 0x5 ; C2043105 s_buffer_load_dword s7, s[48:51], 0x6 ; C203B106 s_buffer_load_dword s9, s[48:51], 0x7 ; C204B107 s_buffer_load_dword s1, s[48:51], 0x8 ; C200B108 s_buffer_load_dword s2, s[48:51], 0x9 ; C2013109 s_buffer_load_dword s3, s[48:51], 0xa ; C201B10A s_buffer_load_dword s4, s[48:51], 0xc ; C202310C s_buffer_load_dword s5, s[48:51], 0xd ; C202B10D s_buffer_load_dword s16, s[48:51], 0xe ; C208310E s_buffer_load_dword s20, s[48:51], 0xf ; C20A310F s_buffer_load_dword s21, s[48:51], 0x10 ; C20AB110 s_buffer_load_dword s34, s[48:51], 0x11 ; C2113111 s_buffer_load_dword s10, s[48:51], 0x12 ; C2053112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s1, v3 ; 10020601 v_fract_f32_e32 v1, v1 ; 7E024101 s_buffer_load_dword s1, s[48:51], 0x13 ; C200B113 v_mul_f32_e32 v2, s2, v3 ; 10040602 v_mul_f32_e32 v3, s3, v3 ; 10060603 v_fract_f32_e32 v2, v2 ; 7E044102 v_fract_f32_e32 v3, v3 ; 7E064103 s_buffer_load_dword s2, s[48:51], 0x14 ; C2013114 s_buffer_load_dword s3, s[48:51], 0x15 ; C201B115 v_cos_f32_e32 v11, v1 ; 7E166D01 v_sin_f32_e32 v12, v1 ; 7E186B01 v_cos_f32_e32 v7, v2 ; 7E0E6D02 v_sin_f32_e32 v8, v2 ; 7E106B02 v_mov_b32_e32 v1, s10 ; 7E02020A s_buffer_load_dword s23, s[48:51], 0x16 ; C20BB116 s_buffer_load_dword s24, s[48:51], 0x17 ; C20C3117 s_buffer_load_dword s14, s[52:55], 0x5 ; C2073505 s_buffer_load_dword s13, s[52:55], 0x6 ; C206B506 s_buffer_load_dword s12, s[52:55], 0x7 ; C2063507 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s1 ; 7E040201 s_buffer_load_dword s10, s[52:55], 0x8 ; C2053508 v_cos_f32_e32 v15, v3 ; 7E1E6D03 v_sin_f32_e32 v14, v3 ; 7E1C6B03 v_mov_b32_e32 v3, s4 ; 7E060204 v_mac_f32_e32 v1, s0, v3 ; 3E020600 s_buffer_load_dword s11, s[52:55], 0x9 ; C205B509 v_mov_b32_e32 v3, s5 ; 7E060205 v_mac_f32_e32 v2, s0, v3 ; 3E040600 s_buffer_load_dword s15, s[52:55], 0x0 ; C207B500 v_mov_b32_e32 v9, s2 ; 7E120202 s_buffer_load_dword s17, s[52:55], 0x1 ; C208B501 v_mov_b32_e32 v10, s3 ; 7E140203 s_buffer_load_dword s18, s[52:55], 0x2 ; C2093502 v_mov_b32_e32 v3, s16 ; 7E060210 v_mac_f32_e32 v9, s0, v3 ; 3E120600 s_buffer_load_dword s19, s[52:55], 0x3 ; C209B503 v_mov_b32_e32 v3, s20 ; 7E060214 v_mac_f32_e32 v10, s0, v3 ; 3E140600 s_buffer_load_dword s22, s[52:55], 0x4 ; C20B3504 v_mov_b32_e32 v17, s23 ; 7E220217 s_buffer_load_dword s23, s[52:55], 0xa ; C20BB50A v_mov_b32_e32 v18, s24 ; 7E240218 s_buffer_load_dword s24, s[52:55], 0xb ; C20C350B v_mov_b32_e32 v3, s21 ; 7E060215 v_mac_f32_e32 v17, s0, v3 ; 3E220600 s_buffer_load_dword s16, s[52:55], 0xc ; C208350C v_mov_b32_e32 v3, s34 ; 7E060222 v_mac_f32_e32 v18, s0, v3 ; 3E240600 s_buffer_load_dword s20, s[52:55], 0xd ; C20A350D s_buffer_load_dword s21, s[52:55], 0xe ; C20AB50E image_sample_l v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[40:47], s[36:39] ; F0900F00 012A2F04 v_mov_b32_e32 v3, 0x10001 ; 7E0602FF 00010001 image_sample_l_o v[51:54], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[40:47], s[36:39] ; F0D00F00 012A3303 v_mov_b32_e32 v3, 0x20002 ; 7E0602FF 00020002 image_sample_l_o v[55:58], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[40:47], s[36:39] ; F0D00F00 012A3703 v_mov_b32_e32 v3, 0x30003 ; 7E0602FF 00030003 image_sample_l_o v[59:62], 15, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[40:47], s[36:39] ; F0D00F00 012A3B03 exp 15, 32, 0, 0, 0, v6, v6, v6, v6 ; F800020F 06060606 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v3, v48, v38 ; 10064D30 v_mad_f32 v4, v32, v40, v28 ; D2820004 04725120 v_mad_f32 v5, v32, v41, v28 ; D2820005 04725320 v_mad_f32 v22, v32, v42, v28 ; D2820016 04725520 v_mac_f32_e32 v28, v32, v43 ; 3E385720 v_mad_f32 v26, v34, v44, v31 ; D282001A 047E5922 v_mad_f32 v32, v34, v45, v31 ; D2820020 047E5B22 v_mac_f32_e32 v31, v34, v46 ; 3E3E5D22 v_cmp_gt_f32_e32 vcc, v44, v36 ; 7C08492C v_cmp_gt_f32_e64 s[0:1], v45, v36 ; D0080000 0002492D v_cmp_gt_f32_e64 s[2:3], v46, v36 ; D0080002 0002492E v_cmp_gt_f32_e64 s[4:5], 0, v5 ; D0080004 00020A80 v_cndmask_b32_e64 v33, v33, -1.0, s[4:5] ; D2000021 0011E721 v_add_f32_e64 v5, |v5|, v33 ; D2060105 00024305 v_cmp_gt_f32_e64 s[4:5], 0, v4 ; D0080004 00020880 v_cndmask_b32_e64 v33, 0, 1.0, s[4:5] ; D2000021 0011E480 v_sub_f32_e64 v4, |v4|, v33 ; D2080104 00024304 v_add_f32_e32 v4, v29, v4 ; 0608091D v_add_f32_e32 v5, v29, v5 ; 060A0B1D v_cmp_gt_f32_e64 s[4:5], 0, v4 ; D0080004 00020880 v_cndmask_b32_e64 v34, 0, 1.0, s[4:5] ; D2000022 0011E480 v_sub_f32_e64 v36, v34, |v4| ; D2080224 00020922 v_mad_f32 v36, v36, v27, 1.0 ; D2820024 03CA3724 v_cmp_gt_f32_e64 s[4:5], 0, v5 ; D0080004 00020A80 v_cndmask_b32_e64 v40, 0, 1.0, s[4:5] ; D2000028 0011E480 v_sub_f32_e64 v41, v40, |v5| ; D2080229 00020B28 v_mac_f32_e32 v36, v27, v41 ; 3E48531B v_sub_f32_e64 v4, |v4|, v34 ; D2080104 00024504 v_sub_f32_e64 v5, |v5|, v40 ; D2080105 00025105 v_mul_f32_e32 v4, v27, v4 ; 1008091B v_mul_f32_e32 v5, v27, v5 ; 100A0B1B v_mul_f32_e32 v41, v4, v4 ; 10520904 v_mac_f32_e32 v41, v5, v5 ; 3E520B05 v_mac_f32_e32 v41, v36, v36 ; 3E524924 v_rsq_clamp_f32_e32 v41, v41 ; 7E525929 v_mul_f32_e32 v42, v35, v44 ; 10545923 v_mul_f32_e32 v43, v35, v45 ; 10565B23 v_mul_f32_e32 v35, v35, v46 ; 10465D23 v_mul_f32_e32 v4, v41, v4 ; 10080929 v_mul_f32_e32 v5, v41, v5 ; 100A0B29 v_mul_f32_e32 v36, v41, v36 ; 10484929 v_mad_f32 v34, -2.0, v34, 1.0 ; D2820022 03CA44F5 v_mul_f32_e32 v4, v34, v4 ; 10080922 v_mad_f32 v34, -2.0, v40, 1.0 ; D2820022 03CA50F5 v_mul_f32_e32 v5, v34, v5 ; 100A0B22 v_mad_f32 v33, -2.0, v33, 1.0 ; D2820021 03CA42F5 v_mul_f32_e32 v33, v33, v36 ; 10424921 v_mul_f32_e32 v34, v48, v5 ; 10440B30 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e32 v36, v52, v5 ; 10480B34 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v5, v56, v5 ; 100A0B38 v_mac_f32_e32 v34, v47, v4 ; 3E44092F v_mac_f32_e32 v36, v51, v4 ; 3E480933 v_mac_f32_e32 v5, v55, v4 ; 3E0A0937 v_mac_f32_e32 v34, v49, v33 ; 3E444331 v_mac_f32_e32 v36, v53, v33 ; 3E484335 v_mac_f32_e32 v5, v57, v33 ; 3E0A4339 v_mac_f32_e32 v3, v47, v37 ; 3E064B2F v_mul_f32_e32 v4, v52, v38 ; 10084D34 v_mac_f32_e32 v4, v51, v37 ; 3E084B33 v_mul_f32_e32 v33, v56, v38 ; 10424D38 v_mac_f32_e32 v33, v55, v37 ; 3E424B37 v_mul_f32_e32 v37, v34, v34 ; 104A4522 v_mac_f32_e32 v37, v36, v36 ; 3E4A4924 v_mac_f32_e32 v37, v5, v5 ; 3E4A0B05 v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_mac_f32_e32 v3, v49, v39 ; 3E064F31 v_mac_f32_e32 v4, v53, v39 ; 3E084F35 v_mac_f32_e32 v33, v57, v39 ; 3E424F39 v_mul_f32_e32 v34, v37, v34 ; 10444525 v_mul_f32_e32 v36, v37, v36 ; 10484925 v_mul_f32_e32 v5, v37, v5 ; 100A0B25 v_add_f32_e32 v3, v50, v3 ; 06060732 v_add_f32_e32 v4, v54, v4 ; 06080936 v_add_f32_e32 v33, v58, v33 ; 0642433A v_subrev_f32_e32 v37, s33, v3 ; 0A4A0621 v_subrev_f32_e32 v38, s32, v4 ; 0A4C0820 v_mul_f32_e32 v39, v37, v37 ; 104E4B25 v_mac_f32_e32 v39, v38, v38 ; 3E4E4D26 v_subrev_f32_e32 v40, s31, v33 ; 0A50421F v_mac_f32_e32 v39, v40, v40 ; 3E4E5128 v_rsq_clamp_f32_e32 v39, v39 ; 7E4E5927 v_mul_f32_e32 v41, s30, v34 ; 1052441E v_mac_f32_e32 v41, s29, v36 ; 3E52481D v_mac_f32_e32 v41, s28, v5 ; 3E520A1C v_mul_f32_e32 v44, v34, v41 ; 10585322 v_mad_f32 v44, 2.0, v44, -s30 ; D282002C 807A58F4 v_mul_f32_e32 v45, v39, v37 ; 105A4B27 v_mul_f32_e32 v44, v44, v45 ; 10585B2C v_mul_f32_e32 v45, v36, v41 ; 105A5324 v_mad_f32 v45, 2.0, v45, -s29 ; D282002D 80765AF4 v_mul_f32_e32 v46, v39, v38 ; 105C4D27 v_mac_f32_e32 v44, v45, v46 ; 3E585D2D v_mul_f32_e32 v41, v5, v41 ; 10525305 v_mad_f32 v41, 2.0, v41, -s28 ; D2820029 807252F4 v_mul_f32_e32 v39, v39, v40 ; 104E5127 v_mac_f32_e32 v44, v41, v39 ; 3E584F29 v_log_f32_e32 v26, v26 ; 7E344F1A v_log_f32_e32 v32, v32 ; 7E404F20 v_log_f32_e32 v31, v31 ; 7E3E4F1F v_cmp_gt_f32_e64 s[4:5], 0, v22 ; D0080004 00022C80 v_cndmask_b32_e64 v39, 0, 1.0, s[4:5] ; D2000027 0011E480 v_sub_f32_e64 v22, |v22|, v39 ; D2080116 00024F16 v_cmp_gt_f32_e64 s[4:5], 0, v28 ; D0080004 00023880 v_cndmask_b32_e64 v41, 0, 1.0, s[4:5] ; D2000029 0011E480 v_sub_f32_e64 v28, |v28|, v41 ; D208011C 0002531C v_add_f32_e32 v22, v29, v22 ; 062C2D1D v_add_f32_e32 v28, v29, v28 ; 0638391D v_mul_legacy_f32_e32 v26, v30, v26 ; 0E34351E v_mul_legacy_f32_e32 v29, v30, v32 ; 0E3A411E v_mul_legacy_f32_e32 v30, v30, v31 ; 0E3C3F1E v_mul_f32_e32 v31, s25, v37 ; 103E4A19 v_mac_f32_e32 v31, s26, v38 ; 3E3E4C1A v_mac_f32_e32 v31, s27, v40 ; 3E3E501B v_exp_f32_e32 v26, v26 ; 7E344B1A v_cndmask_b32_e32 v26, v42, v26 ; 0034352A v_exp_f32_e32 v29, v29 ; 7E3A4B1D v_cndmask_b32_e64 v29, v43, v29, s[0:1] ; D200001D 00023B2B v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_cndmask_b32_e64 v30, v35, v30, s[2:3] ; D200001E 000A3D23 v_cmp_gt_f32_e32 vcc, 0, v22 ; 7C082C80 v_cndmask_b32_e64 v32, 0, 1.0, vcc ; D2000020 01A9E480 v_sub_f32_e64 v35, v32, |v22| ; D2080223 00022D20 v_mad_f32 v35, v35, v27, 1.0 ; D2820023 03CA3723 v_cmp_gt_f32_e32 vcc, 0, v28 ; 7C083880 v_cndmask_b32_e64 v37, 0, 1.0, vcc ; D2000025 01A9E480 v_sub_f32_e64 v38, v37, |v28| ; D2080226 00023925 v_mac_f32_e32 v35, v27, v38 ; 3E464D1B v_sub_f32_e64 v22, |v22|, v32 ; D2080116 00024116 v_sub_f32_e64 v28, |v28|, v37 ; D208011C 00024B1C v_add_f32_e64 v38, 0, v44 clamp ; D2060826 00025880 v_log_f32_e32 v38, v38 ; 7E4C4F26 v_mul_f32_e32 v22, v27, v22 ; 102C2D1B v_mul_f32_e32 v27, v27, v28 ; 1036391B exp 15, 33, 0, 0, 0, v3, v4, v33, v31 ; F800021F 1F210403 v_mul_legacy_f32_e32 v28, 0x41800000, v38 ; 0E384CFF 41800000 v_exp_f32_e32 v28, v28 ; 7E384B1C v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 exp 15, 34, 0, 0, 0, v34, v36, v5, v28 ; F800022F 1C052422 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v59, v26 ; 1034353B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v28, v22, v22 ; 10382D16 v_mac_f32_e32 v28, v27, v27 ; 3E38371B v_mac_f32_e32 v28, v35, v35 ; 3E384723 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v29, v60, v29 ; 103A3B3C v_mul_f32_e32 v30, v61, v30 ; 103C3D3D exp 15, 35, 0, 0, 0, v26, v29, v30, v62 ; F800023F 3E1E1D1A v_mul_f32_e32 v22, v28, v22 ; 102C2D1C s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v26, v28, v27 ; 1034371C v_mul_f32_e32 v27, v28, v35 ; 1036471C v_mad_f32 v28, -2.0, v32, 1.0 ; D282001C 03CA40F5 v_mul_f32_e32 v22, v28, v22 ; 102C2D1C v_mad_f32 v28, -2.0, v37, 1.0 ; D282001C 03CA4AF5 v_mul_f32_e32 v26, v28, v26 ; 1034351C v_mad_f32 v28, -2.0, v39, 1.0 ; D282001C 03CA4EF5 v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mul_f32_e32 v28, v48, v26 ; 10383530 v_mac_f32_e32 v28, v47, v22 ; 3E382D2F v_mac_f32_e32 v28, v49, v27 ; 3E383731 v_mul_f32_e32 v29, v52, v26 ; 103A3534 v_mac_f32_e32 v29, v51, v22 ; 3E3A2D33 v_mac_f32_e32 v29, v53, v27 ; 3E3A3735 v_mul_f32_e32 v26, v56, v26 ; 10343538 v_mac_f32_e32 v26, v55, v22 ; 3E342D37 v_mac_f32_e32 v26, v57, v27 ; 3E343739 v_max3_f32 v22, |v34|, |v36|, |v5| ; D2A80716 04164922 v_sub_f32_e64 v27, v22, |v36| ; D208021B 00024916 v_cmp_lt_f32_e32 vcc, v27, v16 ; 7C02211B v_cndmask_b32_e64 v27, 0, 1.0, vcc ; D200001B 01A9E480 exp 15, 36, 0, 0, 0, v23, v24, v25, v27 ; F800024F 1B191817 v_mul_f32_e32 v19, 0.5, v19 ; 102626F0 v_mul_f32_e32 v20, 0.5, v20 ; 102828F0 v_mul_f32_e32 v21, 0.5, v21 ; 102A2AF0 v_sub_f32_e64 v22, v22, |v5| ; D2080216 00020B16 v_cmp_lt_f32_e32 vcc, v22, v16 ; 7C022116 v_mul_f32_e32 v16, v34, v28 ; 10203922 v_mac_f32_e32 v16, v36, v29 ; 3E203B24 v_mac_f32_e32 v16, v5, v26 ; 3E203505 v_mad_f32 v22, -v16, v34, v28 ; D2820016 24724510 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v23, -v16, v36, v29 ; D2820017 24764910 v_mad_f32 v5, -v16, v5, v26 ; D2820005 246A0B10 v_max_f32_e32 v16, v13, v19 ; 2020270D v_max_f32_e32 v19, v13, v20 ; 2026290D v_mul_f32_e32 v20, v22, v22 ; 10282D16 v_mac_f32_e32 v20, v23, v23 ; 3E282F17 v_mac_f32_e32 v20, v5, v5 ; 3E280B05 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_max_f32_e32 v13, v13, v21 ; 201A2B0D v_cndmask_b32_e64 v21, 0, 1.0, vcc ; D2000015 01A9E480 exp 15, 37, 0, 0, 0, v16, v19, v13, v21 ; F800025F 150D1310 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v13, v20, v22 ; 101A2D14 v_mul_f32_e32 v16, v20, v23 ; 10202F14 v_mul_f32_e32 v5, v20, v5 ; 100A0B14 v_mad_f32 v19, -2.0, v41, 1.0 ; D2820013 03CA52F5 exp 15, 38, 0, 0, 0, v13, v16, v5, v19 ; F800026F 1305100D s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v5, v4, v0, -0.5 ; D2820005 03C60104 v_mad_f32 v13, -v33, v0, -0.5 ; D282000D 23C60121 v_mad_f32 v16, v3, v0, -0.5 ; D2820010 03C60103 v_mul_f32_e32 v19, v13, v12 ; 1026190D v_mad_f32 v20, v11, v5, -v19 ; D2820014 844E0B0B v_mad_f32 v19, v11, v16, -v19 ; D2820013 844E210B v_mul_f32_e32 v21, v13, v11 ; 102A170D v_mad_f32 v22, v5, v12, v21 ; D2820016 04561905 v_mac_f32_e32 v21, v16, v12 ; 3E2A1910 v_mad_f32 v20, s8, v20, 0.5 ; D2820014 03C22808 v_mad_f32 v22, s8, v22, 0.5 ; D2820016 03C22C08 v_mad_f32 v19, s8, v19, 0.5 ; D2820013 03C22608 v_mad_f32 v21, s8, v21, 0.5 ; D2820015 03C22A08 v_add_f32_e32 v20, v1, v20 ; 06282901 v_add_f32_e32 v22, v2, v22 ; 062C2D02 v_add_f32_e32 v19, v1, v19 ; 06262701 v_add_f32_e32 v21, v2, v21 ; 062A2B02 exp 15, 39, 0, 0, 0, v20, v22, v19, v21 ; F800027F 15131614 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v19, v13, v7 ; 10260F0D v_mac_f32_e32 v19, v5, v8 ; 3E261105 v_mul_f32_e32 v20, v13, v8 ; 1028110D v_mad_f32 v21, v7, v5, -v20 ; D2820015 84520B07 v_mad_f32 v20, v7, v16, -v20 ; D2820014 84522107 v_mul_f32_e32 v22, v16, v8 ; 102C1110 v_mac_f32_e32 v22, v13, v7 ; 3E2C0F0D v_mad_f32 v21, v21, s7, 0.5 ; D2820015 03C00F15 v_mad_f32 v19, v19, s7, 0.5 ; D2820013 03C00F13 v_mad_f32 v20, v20, s7, 0.5 ; D2820014 03C00F14 v_mad_f32 v22, v22, s7, 0.5 ; D2820016 03C00F16 v_add_f32_e32 v21, v9, v21 ; 062A2B09 v_add_f32_e32 v19, v10, v19 ; 0626270A v_add_f32_e32 v20, v9, v20 ; 06282909 v_add_f32_e32 v22, v10, v22 ; 062C2D0A exp 15, 40, 0, 0, 0, v21, v19, v20, v22 ; F800028F 16141315 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v19, v13, v14 ; 10261D0D v_mul_f32_e32 v13, v13, v15 ; 101A1F0D v_mad_f32 v20, v15, v5, -v19 ; D2820014 844E0B0F v_mad_f32 v15, v15, v16, -v19 ; D282000F 844E210F v_mad_f32 v5, v5, v14, v13 ; D2820005 04361D05 v_mac_f32_e32 v13, v16, v14 ; 3E1A1D10 v_mad_f32 v14, v20, s9, 0.5 ; D282000E 03C01314 v_mad_f32 v15, s9, v15, 0.5 ; D282000F 03C21E09 v_add_f32_e32 v14, v17, v14 ; 061C1D11 v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_mad_f32 v5, v5, s9, 0.5 ; D2820005 03C01305 v_mad_f32 v13, s9, v13, 0.5 ; D282000D 03C21A09 v_add_f32_e32 v5, v18, v5 ; 060A0B12 v_add_f32_e32 v13, v18, v13 ; 061A1B12 exp 15, 41, 0, 0, 0, v14, v5, v15, v13 ; F800029F 0D0F050E s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v5, s14, v4 ; 100A080E v_mad_f32 v0, -v4, v0, -0.5 ; D2820000 23C60104 v_mul_f32_e32 v13, s13, v4 ; 101A080D v_mul_f32_e32 v14, s12, v4 ; 101C080C v_mul_f32_e32 v4, s22, v4 ; 10080816 v_mac_f32_e32 v5, s17, v3 ; 3E0A0611 v_mac_f32_e32 v13, s18, v3 ; 3E1A0612 v_mac_f32_e32 v14, s19, v3 ; 3E1C0613 v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s11, v33 ; 3E0A420B v_mac_f32_e32 v13, s23, v33 ; 3E1A4217 v_mac_f32_e32 v14, s24, v33 ; 3E1C4218 v_mac_f32_e32 v4, s10, v33 ; 3E08420A v_mul_f32_e32 v3, v16, v12 ; 10061910 v_mul_f32_e32 v12, v0, v12 ; 10181900 v_mad_f32 v12, v11, v16, -v12 ; D282000C 8432210B v_mac_f32_e32 v3, v0, v11 ; 3E061700 v_mul_f32_e32 v11, v0, v8 ; 10161100 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mad_f32 v7, v7, v16, -v11 ; D2820007 842E2107 v_mac_f32_e32 v0, v16, v8 ; 3E001110 v_add_f32_e32 v5, s20, v5 ; 060A0A14 v_add_f32_e32 v8, s21, v13 ; 06101A15 v_add_f32_e32 v11, s6, v14 ; 06161C06 v_add_f32_e32 v4, s16, v4 ; 06080810 v_mad_f32 v12, s8, v12, 0.5 ; D282000C 03C21808 v_mad_f32 v3, s8, v3, 0.5 ; D2820003 03C20608 v_mad_f32 v7, v7, s7, 0.5 ; D2820007 03C00F07 v_mad_f32 v0, v0, s7, 0.5 ; D2820000 03C00F00 v_add_f32_e32 v1, v1, v12 ; 06021901 v_add_f32_e32 v2, v2, v3 ; 06040702 v_add_f32_e32 v3, v9, v7 ; 06060F09 v_add_f32_e32 v0, v10, v0 ; 0600010A exp 15, 42, 0, 0, 0, v1, v2, v3, v0 ; F80002AF 00030201 s_waitcnt expcnt(0) ; BF8C070F v_xor_b32_e32 v0, 0x80000000, v5 ; 3A000AFF 80000000 v_mad_f32 v1, 2.0, v8, -v11 ; D2820001 842E10F4 exp 15, 12, 0, 0, 0, v4, v0, v1, v11 ; F80000CF 0B010004 exp 15, 13, 0, 1, 0, v6, v6, v6, v6 ; F80008DF 06060606 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 64 Code Size: 2184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL IN[4], GENERIC[4], PERSPECTIVE DCL IN[5], GENERIC[6], PERSPECTIVE DCL IN[6], GENERIC[7], PERSPECTIVE DCL IN[7], GENERIC[8], PERSPECTIVE DCL IN[8], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL SVIEW[6], 2D, FLOAT DCL SVIEW[7], 2D, FLOAT DCL SVIEW[8], 2D, FLOAT DCL SVIEW[9], 2D, FLOAT DCL SVIEW[10], 2D, FLOAT DCL SVIEW[11], 2D, FLOAT DCL CONST[1][0..24] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..18], LOCAL IMM[0] FLT32 { 0.0078, 1.0000, 2.0000, 0.0000} IMM[1] UINT32 {0, 368, 384, 304} IMM[2] UINT32 {320, 64, 48, 32} IMM[3] FLT32 { 3.0000, -1.0000, 0.0000, 0.0000} IMM[4] UINT32 {16, 224, 228, 0} IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MUL TEMP[1].xyz, IN[0].xyzz, IMM[0].xxxx 4: MOV TEMP[2].x, -TEMP[1].zzzz 5: MOV TEMP[3].z, IMM[0].yyyy 6: MOV TEMP[3].x, TEMP[1].yyyy 7: MOV TEMP[3].y, TEMP[2].xxxx 8: MOV TEMP[4].x, TEMP[1].xxxx 9: MOV TEMP[5].y, IMM[0].yyyy 10: MOV TEMP[5].x, TEMP[2].xxxx 11: MOV TEMP[4].yz, TEMP[5].yxyy 12: MOV TEMP[2].x, TEMP[1].xxxx 13: MOV TEMP[5].y, IMM[0].yyyy 14: MOV TEMP[5].x, -TEMP[1].yyyy 15: MOV TEMP[2].yz, TEMP[5].yxyy 16: DP3 TEMP[1].x, CONST[1][23].xyzz, TEMP[3].xyzz 17: DP3 TEMP[3].x, CONST[1][24].xyzz, TEMP[3].xyzz 18: MOV TEMP[1].y, TEMP[3].xxxx 19: DP3 TEMP[3].x, CONST[1][23].xyzz, TEMP[4].xyzz 20: DP3 TEMP[4].x, CONST[1][24].xyzz, TEMP[4].xyzz 21: MOV TEMP[3].y, TEMP[4].xxxx 22: DP3 TEMP[4].x, CONST[1][23].xyzz, TEMP[2].xyzz 23: DP3 TEMP[5].x, CONST[1][24].xyzz, TEMP[2].xyzz 24: MOV TEMP[4].y, TEMP[5].xxxx 25: DP3 TEMP[5].x, CONST[1][19].xyzz, TEMP[2].xyzz 26: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[2].xyzz 27: MOV TEMP[5].y, TEMP[2].xxxx 28: MUL TEMP[2].x, IN[3].wwww, IMM[0].zzzz 29: MOV_SAT TEMP[2].x, TEMP[2].xxxx 30: MUL TEMP[6].x, IMM[0].zzzz, IN[4].wwww 31: MOV_SAT TEMP[6].x, TEMP[6].xxxx 32: MOV TEMP[7].xy, IN[6].xyyy 33: TEX TEMP[7], TEMP[7], SAMP[1], 2D 34: MOV TEMP[8].xy, IN[6].zwww 35: TEX TEMP[8], TEMP[8], SAMP[1], 2D 36: LRP TEMP[7], TEMP[2].xxxx, TEMP[8], TEMP[7] 37: MOV TEMP[8].xy, IN[8].zwww 38: TEX TEMP[8], TEMP[8], SAMP[1], 2D 39: LRP TEMP[7], TEMP[6].xxxx, TEMP[8], TEMP[7] 40: MOV TEMP[8].xy, IN[7].xyyy 41: TEX TEMP[8], TEMP[8], SAMP[2], 2D 42: MOV TEMP[9].xy, IN[7].zwww 43: TEX TEMP[9], TEMP[9], SAMP[2], 2D 44: LRP TEMP[8], TEMP[2].xxxx, TEMP[9], TEMP[8] 45: MOV TEMP[9].xy, TEMP[5].xyyy 46: TEX TEMP[9], TEMP[9], SAMP[2], 2D 47: LRP TEMP[8], TEMP[6].xxxx, TEMP[9], TEMP[8] 48: MOV TEMP[9].xy, TEMP[1].xyyy 49: TEX TEMP[9], TEMP[9], SAMP[3], 2D 50: MOV TEMP[10].xy, TEMP[3].xyyy 51: TEX TEMP[10], TEMP[10], SAMP[3], 2D 52: LRP TEMP[9], TEMP[2].xxxx, TEMP[10], TEMP[9] 53: MOV TEMP[10].xy, TEMP[4].xyyy 54: TEX TEMP[10], TEMP[10], SAMP[3], 2D 55: LRP TEMP[9], TEMP[6].xxxx, TEMP[10], TEMP[9] 56: ADD TEMP[10].x, TEMP[7].wwww, -IN[4].xxxx 57: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx 58: ADD TEMP[11].x, IN[3].xxxx, -TEMP[10].xxxx 59: ADD TEMP[12].x, IN[4].xxxx, TEMP[7].wwww 60: MIN TEMP[12].x, IMM[0].yyyy, TEMP[12].xxxx 61: ADD TEMP[10].x, TEMP[12].xxxx, -TEMP[10].xxxx 62: RCP TEMP[10].x, TEMP[10].xxxx 63: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 64: MOV_SAT TEMP[10].x, TEMP[10].xxxx 65: MUL TEMP[11].x, IMM[0].zzzz, TEMP[10].xxxx 66: ADD TEMP[11].x, IMM[3].xxxx, -TEMP[11].xxxx 67: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[11].xxxx 68: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 69: ADD TEMP[11].x, IMM[0].yyyy, -TEMP[10].xxxx 70: ADD TEMP[12].x, TEMP[8].wwww, -IN[4].yyyy 71: MAX TEMP[12].x, IMM[0].wwww, TEMP[12].xxxx 72: ADD TEMP[13].x, IN[3].yyyy, -TEMP[12].xxxx 73: ADD TEMP[14].x, IN[4].yyyy, TEMP[8].wwww 74: MIN TEMP[14].x, IMM[0].yyyy, TEMP[14].xxxx 75: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 76: RCP TEMP[12].x, TEMP[12].xxxx 77: MUL TEMP[12].x, TEMP[13].xxxx, TEMP[12].xxxx 78: MOV_SAT TEMP[12].x, TEMP[12].xxxx 79: MUL TEMP[13].x, IMM[0].zzzz, TEMP[12].xxxx 80: ADD TEMP[13].x, IMM[3].xxxx, -TEMP[13].xxxx 81: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[13].xxxx 82: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 83: MAX TEMP[12].x, TEMP[12].xxxx, IMM[0].wwww 84: MIN TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 85: ADD TEMP[11].x, TEMP[11].xxxx, -TEMP[12].xxxx 86: ADD TEMP[13].x, TEMP[9].wwww, -IN[4].zzzz 87: MAX TEMP[13].x, IMM[0].wwww, TEMP[13].xxxx 88: ADD TEMP[14].x, IN[3].zzzz, -TEMP[13].xxxx 89: ADD TEMP[15].x, IN[4].zzzz, TEMP[9].wwww 90: MIN TEMP[15].x, IMM[0].yyyy, TEMP[15].xxxx 91: ADD TEMP[13].x, TEMP[15].xxxx, -TEMP[13].xxxx 92: RCP TEMP[13].x, TEMP[13].xxxx 93: MUL TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx 94: MOV_SAT TEMP[13].x, TEMP[13].xxxx 95: MUL TEMP[14].x, IMM[0].zzzz, TEMP[13].xxxx 96: ADD TEMP[14].x, IMM[3].xxxx, -TEMP[14].xxxx 97: MUL TEMP[14].x, TEMP[13].xxxx, TEMP[14].xxxx 98: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx 99: MAX TEMP[13].x, TEMP[13].xxxx, IMM[0].wwww 100: MIN TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx 101: ADD TEMP[11].x, TEMP[11].xxxx, -TEMP[13].xxxx 102: MUL TEMP[9].xyz, CONST[1][4].xyzz, TEMP[9].xyzz 103: MUL TEMP[8].xyz, CONST[1][3].xyzz, TEMP[8].xyzz 104: MUL TEMP[7].xyz, CONST[1][2].xyzz, TEMP[7].xyzz 105: MOV TEMP[14].xy, IN[5].xyyy 106: TEX TEMP[14], TEMP[14], SAMP[0], 2D 107: MOV TEMP[15].xy, IN[5].zwww 108: TEX TEMP[15], TEMP[15], SAMP[0], 2D 109: LRP TEMP[14], TEMP[2].xxxx, TEMP[15], TEMP[14] 110: MOV TEMP[15].xy, IN[8].xyyy 111: TEX TEMP[15], TEMP[15], SAMP[0], 2D 112: LRP TEMP[14].xyz, TEMP[6].xxxx, TEMP[15], TEMP[14] 113: MUL TEMP[14].xyz, CONST[1][1].xyzz, TEMP[14].xyzz 114: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[11].xxxx 115: MAD TEMP[7].xyz, TEMP[7].xyzz, TEMP[10].xxxx, TEMP[14].xyzz 116: MAD TEMP[7].xyz, TEMP[12].xxxx, TEMP[8].xyzz, TEMP[7].xyzz 117: MAD TEMP[7].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[7].xyzz 118: MUL TEMP[7].xyz, TEMP[7].xyzz, IN[2].xyzz 119: MOV TEMP[8].xy, TEMP[1].xyyy 120: TEX TEMP[8], TEMP[8], SAMP[7], 2D 121: MOV TEMP[9].xy, TEMP[3].xyyy 122: TEX TEMP[9], TEMP[9], SAMP[7], 2D 123: LRP TEMP[8], TEMP[2].xxxx, TEMP[9], TEMP[8] 124: MOV TEMP[9].xy, TEMP[4].xyyy 125: TEX TEMP[9], TEMP[9], SAMP[7], 2D 126: LRP TEMP[8].xyz, TEMP[6].xxxx, TEMP[9], TEMP[8] 127: MOV TEMP[9].xy, IN[7].xyyy 128: TEX TEMP[9], TEMP[9], SAMP[6], 2D 129: MOV TEMP[14].xy, IN[7].zwww 130: TEX TEMP[14], TEMP[14], SAMP[6], 2D 131: LRP TEMP[9], TEMP[2].xxxx, TEMP[14], TEMP[9] 132: MOV TEMP[14].xy, TEMP[5].xyyy 133: TEX TEMP[14], TEMP[14], SAMP[6], 2D 134: LRP TEMP[9].xyz, TEMP[6].xxxx, TEMP[14], TEMP[9] 135: MOV TEMP[14].xy, IN[6].xyyy 136: TEX TEMP[14], TEMP[14], SAMP[5], 2D 137: MOV TEMP[15].xy, IN[6].zwww 138: TEX TEMP[15], TEMP[15], SAMP[5], 2D 139: LRP TEMP[14], TEMP[2].xxxx, TEMP[15], TEMP[14] 140: MOV TEMP[15].xy, IN[8].zwww 141: TEX TEMP[15], TEMP[15], SAMP[5], 2D 142: LRP TEMP[14].xyz, TEMP[6].xxxx, TEMP[15], TEMP[14] 143: MOV TEMP[15].xy, IN[5].xyyy 144: TEX TEMP[15], TEMP[15], SAMP[4], 2D 145: MOV TEMP[16].xy, IN[5].zwww 146: TEX TEMP[16], TEMP[16], SAMP[4], 2D 147: LRP TEMP[15], TEMP[2].xxxx, TEMP[16], TEMP[15] 148: MOV TEMP[16].xy, IN[8].xyyy 149: TEX TEMP[16], TEMP[16], SAMP[4], 2D 150: LRP TEMP[15].xyz, TEMP[6].xxxx, TEMP[16], TEMP[15] 151: MUL TEMP[15].xyz, TEMP[11].xxxx, TEMP[15].xyzz 152: MAD TEMP[14].xyz, TEMP[10].xxxx, TEMP[14].xyzz, TEMP[15].xyzz 153: MAD TEMP[9].xyz, TEMP[12].xxxx, TEMP[9].xyzz, TEMP[14].xyzz 154: MAD TEMP[8].xyz, TEMP[13].xxxx, TEMP[8].xyzz, TEMP[9].xyzz 155: MUL TEMP[9].x, TEMP[8].xxxx, CONST[1][14].xxxx 156: MUL TEMP[14].x, TEMP[8].zzzz, CONST[1][14].yyyy 157: MOV TEMP[1].xy, TEMP[1].xyyy 158: TEX TEMP[1].yw, TEMP[1], SAMP[11], 2D 159: MOV TEMP[15].xy, IN[7].xyyy 160: TEX TEMP[15].yw, TEMP[15], SAMP[10], 2D 161: MOV TEMP[16].xy, IN[5].xyyy 162: TEX TEMP[16].yw, TEMP[16], SAMP[8], 2D 163: MOV TEMP[17].xy, IN[6].xyyy 164: TEX TEMP[17].yw, TEMP[17], SAMP[9], 2D 165: MUL TEMP[17].xy, TEMP[10].xxxx, TEMP[17].wyyy 166: MAD TEMP[16].xy, TEMP[11].xxxx, TEMP[16].wyyy, TEMP[17].xyyy 167: MAD TEMP[15].xy, TEMP[12].xxxx, TEMP[15].wyyy, TEMP[16].xyyy 168: MAD TEMP[1].xy, TEMP[13].xxxx, TEMP[1].wyyy, TEMP[15].xyyy 169: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[3].yyyy 170: MOV TEMP[15].x, TEMP[1].xxxx 171: MOV TEMP[15].y, -TEMP[1].yyyy 172: MUL TEMP[15].xy, TEMP[15].xyyy, CONST[1][0].xxxx 173: MOV TEMP[16].x, TEMP[15].xxxx 174: MOV TEMP[16].y, TEMP[15].yyyy 175: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy 176: ADD TEMP[1].x, IMM[0].yyyy, -TEMP[1].xxxx 177: MOV_SAT TEMP[1].x, TEMP[1].xxxx 178: SQRT TEMP[1].x, TEMP[1].xxxx 179: MOV TEMP[16].z, TEMP[1].xxxx 180: DP3 TEMP[1].x, TEMP[16].xyzz, TEMP[16].xyzz 181: RSQ TEMP[1].x, TEMP[1].xxxx 182: MUL TEMP[1].xyz, TEMP[16].xyzz, TEMP[1].xxxx 183: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[0].wyww 184: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 185: ADD TEMP[15].xyz, IMM[0].wyww, -TEMP[15].xyzz 186: DP3 TEMP[16].x, TEMP[15].xyzz, TEMP[15].xyzz 187: SQRT TEMP[16].x, TEMP[16].xxxx 188: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[3].zzzz 189: UIF TEMP[16].xxxx :0 190: MOV TEMP[16].xyz, IMM[0].wyww 191: ELSE :0 192: MOV TEMP[16].xyz, TEMP[15].xyzx 193: ENDIF 194: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[3].wwyy 195: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 196: ADD TEMP[15].xyz, IMM[3].wwyy, -TEMP[15].xyzz 197: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[15].xyzz 198: SQRT TEMP[17].x, TEMP[17].xxxx 199: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[3].zzzz 200: UIF TEMP[17].xxxx :0 201: MOV TEMP[17].xyz, IMM[3].wwyw 202: ELSE :0 203: MOV TEMP[17].xyz, TEMP[15].xyzx 204: ENDIF 205: MUL TEMP[15].xyz, TEMP[17].xyzz, TEMP[1].yyyy 206: MAD TEMP[15].xyz, TEMP[16].xyzz, TEMP[1].xxxx, TEMP[15].xyzz 207: MAD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].zzzz, TEMP[15].xyzz 208: MOV TEMP[3].xy, TEMP[3].xyyy 209: TEX TEMP[3].yw, TEMP[3], SAMP[11], 2D 210: MOV TEMP[15].xy, IN[7].zwww 211: TEX TEMP[15].yw, TEMP[15], SAMP[10], 2D 212: MOV TEMP[16].xy, IN[5].zwww 213: TEX TEMP[16].yw, TEMP[16], SAMP[8], 2D 214: MOV TEMP[18].xy, IN[6].zwww 215: TEX TEMP[18].yw, TEMP[18], SAMP[9], 2D 216: MUL TEMP[18].xy, TEMP[10].xxxx, TEMP[18].wyyy 217: MAD TEMP[16].xy, TEMP[11].xxxx, TEMP[16].wyyy, TEMP[18].xyyy 218: MAD TEMP[15].xy, TEMP[12].xxxx, TEMP[15].wyyy, TEMP[16].xyyy 219: MAD TEMP[3].xy, TEMP[13].xxxx, TEMP[3].wyyy, TEMP[15].xyyy 220: MAD TEMP[3].xy, IMM[0].zzzz, TEMP[3].xyyy, IMM[3].yyyy 221: MOV TEMP[15].x, TEMP[3].xxxx 222: MOV TEMP[15].y, -TEMP[3].yyyy 223: MUL TEMP[15].xy, CONST[1][0].xxxx, TEMP[15].xyyy 224: MOV TEMP[16].x, TEMP[15].xxxx 225: MOV TEMP[16].y, TEMP[15].yyyy 226: DP2 TEMP[3].x, TEMP[3].xyyy, TEMP[3].xyyy 227: ADD TEMP[3].x, IMM[0].yyyy, -TEMP[3].xxxx 228: MOV_SAT TEMP[3].x, TEMP[3].xxxx 229: SQRT TEMP[3].x, TEMP[3].xxxx 230: MOV TEMP[16].z, TEMP[3].xxxx 231: DP3 TEMP[3].x, TEMP[16].xyzz, TEMP[16].xyzz 232: RSQ TEMP[3].x, TEMP[3].xxxx 233: MUL TEMP[3].xyz, TEMP[16].xyzz, TEMP[3].xxxx 234: DP3 TEMP[15].x, TEMP[0].xyzz, IMM[0].ywww 235: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[0].xyzz 236: ADD TEMP[15].xyz, IMM[0].ywww, -TEMP[15].xyzz 237: DP3 TEMP[16].x, TEMP[15].xyzz, TEMP[15].xyzz 238: SQRT TEMP[16].x, TEMP[16].xxxx 239: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[3].zzzz 240: UIF TEMP[16].xxxx :0 241: MOV TEMP[16].xyz, IMM[0].ywwy 242: ELSE :0 243: MOV TEMP[16].xyz, TEMP[15].xyzx 244: ENDIF 245: MUL TEMP[15].xyz, TEMP[17].xyzz, TEMP[3].yyyy 246: MAD TEMP[15].xyz, TEMP[16].xyzz, TEMP[3].xxxx, TEMP[15].xyzz 247: MAD TEMP[3].xyz, TEMP[0].xyzz, TEMP[3].zzzz, TEMP[15].xyzz 248: MOV TEMP[4].xy, TEMP[4].xyyy 249: TEX TEMP[4].yw, TEMP[4], SAMP[11], 2D 250: MOV TEMP[5].xy, TEMP[5].xyyy 251: TEX TEMP[5].yw, TEMP[5], SAMP[10], 2D 252: MOV TEMP[15].xy, IN[8].zwww 253: TEX TEMP[15].yw, TEMP[15], SAMP[9], 2D 254: MOV TEMP[17].xy, IN[8].xyyy 255: TEX TEMP[17].yw, TEMP[17], SAMP[8], 2D 256: MUL TEMP[11].xy, TEMP[11].xxxx, TEMP[17].wyyy 257: MAD TEMP[10].xy, TEMP[10].xxxx, TEMP[15].wyyy, TEMP[11].xyyy 258: MAD TEMP[5].xy, TEMP[12].xxxx, TEMP[5].wyyy, TEMP[10].xyyy 259: MAD TEMP[4].xy, TEMP[13].xxxx, TEMP[4].wyyy, TEMP[5].xyyy 260: MAD TEMP[4].xy, IMM[0].zzzz, TEMP[4].xyyy, IMM[3].yyyy 261: MOV TEMP[5].x, TEMP[4].xxxx 262: MOV TEMP[5].y, -TEMP[4].yyyy 263: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[1][0].xxxx 264: MOV TEMP[10].x, TEMP[5].xxxx 265: MOV TEMP[10].y, TEMP[5].yyyy 266: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 267: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx 268: MOV_SAT TEMP[4].x, TEMP[4].xxxx 269: SQRT TEMP[4].x, TEMP[4].xxxx 270: MOV TEMP[10].z, TEMP[4].xxxx 271: DP3 TEMP[4].x, TEMP[10].xyzz, TEMP[10].xyzz 272: RSQ TEMP[4].x, TEMP[4].xxxx 273: MUL TEMP[4].xyz, TEMP[10].xyzz, TEMP[4].xxxx 274: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[3].wyww 275: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz 276: ADD TEMP[5].xyz, IMM[3].wyww, -TEMP[5].xyzz 277: DP3 TEMP[10].x, TEMP[5].xyzz, TEMP[5].xyzz 278: SQRT TEMP[10].x, TEMP[10].xxxx 279: FSLT TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz 280: UIF TEMP[10].xxxx :0 281: MOV TEMP[10].xyz, IMM[3].wyww 282: ELSE :0 283: MOV TEMP[10].xyz, TEMP[5].xyzx 284: ENDIF 285: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[1].xyzz 286: MUL TEMP[2].xyz, TEMP[10].xyzz, TEMP[4].yyyy 287: MAD TEMP[2].xyz, TEMP[16].xyzz, TEMP[4].xxxx, TEMP[2].xyzz 288: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].zzzz, TEMP[2].xyzz 289: LRP TEMP[0].xyz, TEMP[6].xxxx, TEMP[0].xyzz, TEMP[1].xyzz 290: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[5].xxxx, IMM[5].xxxx 291: MOV TEMP[1].w, IMM[0].wwww 292: MOV TEMP[1].x, TEMP[0].xxxx 293: MOV TEMP[1].y, TEMP[0].yyyy 294: MOV TEMP[1].z, TEMP[0].zzzz 295: MOV TEMP[0].w, IMM[0].wwww 296: MOV TEMP[0].x, TEMP[7].xxxx 297: MOV TEMP[0].y, TEMP[7].yyyy 298: MOV TEMP[0].z, TEMP[7].zzzz 299: MOV TEMP[2].w, IMM[0].wwww 300: MOV TEMP[2].x, TEMP[9].xxxx 301: MOV TEMP[2].y, TEMP[8].yyyy 302: MOV TEMP[2].z, TEMP[14].xxxx 303: MOV OUT[2], IN[0].wwww 304: MOV OUT[0], TEMP[0] 305: MOV OUT[3], TEMP[1] 306: MOV OUT[1], TEMP[2] 307: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392) %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0 %53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0 %57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0 %61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0 %63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0 %67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0 %73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0 %75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6 %76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0 %77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6 %78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0 %79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7 %80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0 %81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7 %82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0 %83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8 %84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0 %85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8 %86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0 %87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9 %88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0 %89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9 %90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0 %91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10 %92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10 %94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0 %95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 11 %96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0 %97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 11 %98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %113 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %114 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %115 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %116 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7) %117 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 0, i32 8, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 1, i32 8, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 2, i32 8, i32 %5, <2 x i32> %7) %132 = call float @llvm.SI.fs.interp(i32 3, i32 8, i32 %5, <2 x i32> %7) %133 = fmul float %103, %103 %134 = fmul float %104, %104 %135 = fadd float %134, %133 %136 = fmul float %105, %105 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %103, %138 %140 = fmul float %104, %138 %141 = fmul float %105, %138 %142 = fmul float %99, 7.812500e-03 %143 = fmul float %100, 7.812500e-03 %144 = fmul float %101, 7.812500e-03 %145 = fsub float -0.000000e+00, %144 %146 = fsub float -0.000000e+00, %143 %147 = fmul float %45, %143 %148 = fmul float %46, %145 %149 = fadd float %148, %147 %150 = fadd float %149, %47 %151 = fmul float %48, %143 %152 = fmul float %49, %145 %153 = fadd float %152, %151 %154 = fadd float %153, %50 %155 = fmul float %45, %142 %156 = fmul float %46, %145 %157 = fadd float %156, %155 %158 = fadd float %157, %47 %159 = fmul float %48, %142 %160 = fmul float %49, %145 %161 = fadd float %160, %159 %162 = fadd float %161, %50 %163 = fmul float %45, %142 %164 = fmul float %46, %146 %165 = fadd float %164, %163 %166 = fadd float %165, %47 %167 = fmul float %48, %142 %168 = fmul float %49, %146 %169 = fadd float %168, %167 %170 = fadd float %169, %50 %171 = fmul float %39, %142 %172 = fmul float %40, %146 %173 = fadd float %172, %171 %174 = fadd float %173, %41 %175 = fmul float %42, %142 %176 = fmul float %43, %146 %177 = fadd float %176, %175 %178 = fadd float %177, %44 %179 = fmul float %112, 2.000000e+00 %180 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) %181 = fmul float %116, 2.000000e+00 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = bitcast float %121 to i32 %184 = bitcast float %122 to i32 %185 = insertelement <2 x i32> undef, i32 %183, i32 0 %186 = insertelement <2 x i32> %185, i32 %184, i32 1 %187 = bitcast <8 x i32> %56 to <32 x i8> %188 = bitcast <4 x i32> %58 to <16 x i8> %189 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %186, <32 x i8> %187, <16 x i8> %188, i32 2) %190 = extractelement <4 x float> %189, i32 0 %191 = extractelement <4 x float> %189, i32 1 %192 = extractelement <4 x float> %189, i32 2 %193 = extractelement <4 x float> %189, i32 3 %194 = bitcast float %123 to i32 %195 = bitcast float %124 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %56 to <32 x i8> %199 = bitcast <4 x i32> %58 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = extractelement <4 x float> %200, i32 3 %205 = call float @llvm.AMDGPU.lrp(float %180, float %201, float %190) %206 = call float @llvm.AMDGPU.lrp(float %180, float %202, float %191) %207 = call float @llvm.AMDGPU.lrp(float %180, float %203, float %192) %208 = call float @llvm.AMDGPU.lrp(float %180, float %204, float %193) %209 = bitcast float %131 to i32 %210 = bitcast float %132 to i32 %211 = insertelement <2 x i32> undef, i32 %209, i32 0 %212 = insertelement <2 x i32> %211, i32 %210, i32 1 %213 = bitcast <8 x i32> %56 to <32 x i8> %214 = bitcast <4 x i32> %58 to <16 x i8> %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %213, <16 x i8> %214, i32 2) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = call float @llvm.AMDGPU.lrp(float %182, float %216, float %205) %221 = call float @llvm.AMDGPU.lrp(float %182, float %217, float %206) %222 = call float @llvm.AMDGPU.lrp(float %182, float %218, float %207) %223 = call float @llvm.AMDGPU.lrp(float %182, float %219, float %208) %224 = bitcast float %125 to i32 %225 = bitcast float %126 to i32 %226 = insertelement <2 x i32> undef, i32 %224, i32 0 %227 = insertelement <2 x i32> %226, i32 %225, i32 1 %228 = bitcast <8 x i32> %60 to <32 x i8> %229 = bitcast <4 x i32> %62 to <16 x i8> %230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2) %231 = extractelement <4 x float> %230, i32 0 %232 = extractelement <4 x float> %230, i32 1 %233 = extractelement <4 x float> %230, i32 2 %234 = extractelement <4 x float> %230, i32 3 %235 = bitcast float %127 to i32 %236 = bitcast float %128 to i32 %237 = insertelement <2 x i32> undef, i32 %235, i32 0 %238 = insertelement <2 x i32> %237, i32 %236, i32 1 %239 = bitcast <8 x i32> %60 to <32 x i8> %240 = bitcast <4 x i32> %62 to <16 x i8> %241 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %238, <32 x i8> %239, <16 x i8> %240, i32 2) %242 = extractelement <4 x float> %241, i32 0 %243 = extractelement <4 x float> %241, i32 1 %244 = extractelement <4 x float> %241, i32 2 %245 = extractelement <4 x float> %241, i32 3 %246 = call float @llvm.AMDGPU.lrp(float %180, float %242, float %231) %247 = call float @llvm.AMDGPU.lrp(float %180, float %243, float %232) %248 = call float @llvm.AMDGPU.lrp(float %180, float %244, float %233) %249 = call float @llvm.AMDGPU.lrp(float %180, float %245, float %234) %250 = bitcast float %174 to i32 %251 = bitcast float %178 to i32 %252 = insertelement <2 x i32> undef, i32 %250, i32 0 %253 = insertelement <2 x i32> %252, i32 %251, i32 1 %254 = bitcast <8 x i32> %60 to <32 x i8> %255 = bitcast <4 x i32> %62 to <16 x i8> %256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %254, <16 x i8> %255, i32 2) %257 = extractelement <4 x float> %256, i32 0 %258 = extractelement <4 x float> %256, i32 1 %259 = extractelement <4 x float> %256, i32 2 %260 = extractelement <4 x float> %256, i32 3 %261 = call float @llvm.AMDGPU.lrp(float %182, float %257, float %246) %262 = call float @llvm.AMDGPU.lrp(float %182, float %258, float %247) %263 = call float @llvm.AMDGPU.lrp(float %182, float %259, float %248) %264 = call float @llvm.AMDGPU.lrp(float %182, float %260, float %249) %265 = bitcast float %150 to i32 %266 = bitcast float %154 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = bitcast <8 x i32> %64 to <32 x i8> %270 = bitcast <4 x i32> %66 to <16 x i8> %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = extractelement <4 x float> %271, i32 2 %275 = extractelement <4 x float> %271, i32 3 %276 = bitcast float %158 to i32 %277 = bitcast float %162 to i32 %278 = insertelement <2 x i32> undef, i32 %276, i32 0 %279 = insertelement <2 x i32> %278, i32 %277, i32 1 %280 = bitcast <8 x i32> %64 to <32 x i8> %281 = bitcast <4 x i32> %66 to <16 x i8> %282 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = extractelement <4 x float> %282, i32 3 %287 = call float @llvm.AMDGPU.lrp(float %180, float %283, float %272) %288 = call float @llvm.AMDGPU.lrp(float %180, float %284, float %273) %289 = call float @llvm.AMDGPU.lrp(float %180, float %285, float %274) %290 = call float @llvm.AMDGPU.lrp(float %180, float %286, float %275) %291 = bitcast float %166 to i32 %292 = bitcast float %170 to i32 %293 = insertelement <2 x i32> undef, i32 %291, i32 0 %294 = insertelement <2 x i32> %293, i32 %292, i32 1 %295 = bitcast <8 x i32> %64 to <32 x i8> %296 = bitcast <4 x i32> %66 to <16 x i8> %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %294, <32 x i8> %295, <16 x i8> %296, i32 2) %298 = extractelement <4 x float> %297, i32 0 %299 = extractelement <4 x float> %297, i32 1 %300 = extractelement <4 x float> %297, i32 2 %301 = extractelement <4 x float> %297, i32 3 %302 = call float @llvm.AMDGPU.lrp(float %182, float %298, float %287) %303 = call float @llvm.AMDGPU.lrp(float %182, float %299, float %288) %304 = call float @llvm.AMDGPU.lrp(float %182, float %300, float %289) %305 = call float @llvm.AMDGPU.lrp(float %182, float %301, float %290) %306 = fsub float %223, %113 %307 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00) %308 = fsub float %109, %307 %309 = fadd float %113, %223 %310 = call float @llvm.minnum.f32(float %309, float 1.000000e+00) %311 = fsub float %310, %307 %312 = fdiv float 1.000000e+00, %311 %313 = fmul float %308, %312 %314 = call float @llvm.AMDIL.clamp.(float %313, float 0.000000e+00, float 1.000000e+00) %315 = fmul float %314, 2.000000e+00 %316 = fsub float 3.000000e+00, %315 %317 = fmul float %314, %316 %318 = fmul float %314, %317 %319 = fsub float 1.000000e+00, %318 %320 = fsub float %264, %114 %321 = call float @llvm.maxnum.f32(float %320, float 0.000000e+00) %322 = fsub float %110, %321 %323 = fadd float %114, %264 %324 = call float @llvm.minnum.f32(float %323, float 1.000000e+00) %325 = fsub float %324, %321 %326 = fdiv float 1.000000e+00, %325 %327 = fmul float %322, %326 %328 = call float @llvm.AMDIL.clamp.(float %327, float 0.000000e+00, float 1.000000e+00) %329 = fmul float %328, 2.000000e+00 %330 = fsub float 3.000000e+00, %329 %331 = fmul float %328, %330 %332 = fmul float %328, %331 %333 = call float @llvm.maxnum.f32(float %332, float 0.000000e+00) %334 = call float @llvm.minnum.f32(float %333, float %319) %335 = fsub float %319, %334 %336 = fsub float %305, %115 %337 = call float @llvm.maxnum.f32(float %336, float 0.000000e+00) %338 = fsub float %111, %337 %339 = fadd float %115, %305 %340 = call float @llvm.minnum.f32(float %339, float 1.000000e+00) %341 = fsub float %340, %337 %342 = fdiv float 1.000000e+00, %341 %343 = fmul float %338, %342 %344 = call float @llvm.AMDIL.clamp.(float %343, float 0.000000e+00, float 1.000000e+00) %345 = fmul float %344, 2.000000e+00 %346 = fsub float 3.000000e+00, %345 %347 = fmul float %344, %346 %348 = fmul float %344, %347 %349 = call float @llvm.maxnum.f32(float %348, float 0.000000e+00) %350 = call float @llvm.minnum.f32(float %349, float %335) %351 = fsub float %335, %350 %352 = fmul float %34, %302 %353 = fmul float %35, %303 %354 = fmul float %36, %304 %355 = fmul float %31, %261 %356 = fmul float %32, %262 %357 = fmul float %33, %263 %358 = fmul float %28, %220 %359 = fmul float %29, %221 %360 = fmul float %30, %222 %361 = bitcast float %117 to i32 %362 = bitcast float %118 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %52 to <32 x i8> %366 = bitcast <4 x i32> %54 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = extractelement <4 x float> %367, i32 1 %370 = extractelement <4 x float> %367, i32 2 %371 = extractelement <4 x float> %367, i32 3 %372 = bitcast float %119 to i32 %373 = bitcast float %120 to i32 %374 = insertelement <2 x i32> undef, i32 %372, i32 0 %375 = insertelement <2 x i32> %374, i32 %373, i32 1 %376 = bitcast <8 x i32> %52 to <32 x i8> %377 = bitcast <4 x i32> %54 to <16 x i8> %378 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %375, <32 x i8> %376, <16 x i8> %377, i32 2) %379 = extractelement <4 x float> %378, i32 0 %380 = extractelement <4 x float> %378, i32 1 %381 = extractelement <4 x float> %378, i32 2 %382 = extractelement <4 x float> %378, i32 3 %383 = call float @llvm.AMDGPU.lrp(float %180, float %379, float %368) %384 = call float @llvm.AMDGPU.lrp(float %180, float %380, float %369) %385 = call float @llvm.AMDGPU.lrp(float %180, float %381, float %370) %386 = call float @llvm.AMDGPU.lrp(float %180, float %382, float %371) %387 = bitcast float %129 to i32 %388 = bitcast float %130 to i32 %389 = insertelement <2 x i32> undef, i32 %387, i32 0 %390 = insertelement <2 x i32> %389, i32 %388, i32 1 %391 = bitcast <8 x i32> %52 to <32 x i8> %392 = bitcast <4 x i32> %54 to <16 x i8> %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %390, <32 x i8> %391, <16 x i8> %392, i32 2) %394 = extractelement <4 x float> %393, i32 0 %395 = extractelement <4 x float> %393, i32 1 %396 = extractelement <4 x float> %393, i32 2 %397 = call float @llvm.AMDGPU.lrp(float %182, float %394, float %383) %398 = call float @llvm.AMDGPU.lrp(float %182, float %395, float %384) %399 = call float @llvm.AMDGPU.lrp(float %182, float %396, float %385) %400 = fmul float %25, %397 %401 = fmul float %26, %398 %402 = fmul float %27, %399 %403 = fmul float %400, %351 %404 = fmul float %401, %351 %405 = fmul float %402, %351 %406 = fmul float %358, %318 %407 = fadd float %406, %403 %408 = fmul float %359, %318 %409 = fadd float %408, %404 %410 = fmul float %360, %318 %411 = fadd float %410, %405 %412 = fmul float %334, %355 %413 = fadd float %412, %407 %414 = fmul float %334, %356 %415 = fadd float %414, %409 %416 = fmul float %334, %357 %417 = fadd float %416, %411 %418 = fmul float %352, %350 %419 = fadd float %418, %413 %420 = fmul float %353, %350 %421 = fadd float %420, %415 %422 = fmul float %354, %350 %423 = fadd float %422, %417 %424 = fmul float %419, %106 %425 = fmul float %421, %107 %426 = fmul float %423, %108 %427 = bitcast float %150 to i32 %428 = bitcast float %154 to i32 %429 = insertelement <2 x i32> undef, i32 %427, i32 0 %430 = insertelement <2 x i32> %429, i32 %428, i32 1 %431 = bitcast <8 x i32> %80 to <32 x i8> %432 = bitcast <4 x i32> %82 to <16 x i8> %433 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2) %434 = extractelement <4 x float> %433, i32 0 %435 = extractelement <4 x float> %433, i32 1 %436 = extractelement <4 x float> %433, i32 2 %437 = extractelement <4 x float> %433, i32 3 %438 = bitcast float %158 to i32 %439 = bitcast float %162 to i32 %440 = insertelement <2 x i32> undef, i32 %438, i32 0 %441 = insertelement <2 x i32> %440, i32 %439, i32 1 %442 = bitcast <8 x i32> %80 to <32 x i8> %443 = bitcast <4 x i32> %82 to <16 x i8> %444 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %441, <32 x i8> %442, <16 x i8> %443, i32 2) %445 = extractelement <4 x float> %444, i32 0 %446 = extractelement <4 x float> %444, i32 1 %447 = extractelement <4 x float> %444, i32 2 %448 = extractelement <4 x float> %444, i32 3 %449 = call float @llvm.AMDGPU.lrp(float %180, float %445, float %434) %450 = call float @llvm.AMDGPU.lrp(float %180, float %446, float %435) %451 = call float @llvm.AMDGPU.lrp(float %180, float %447, float %436) %452 = call float @llvm.AMDGPU.lrp(float %180, float %448, float %437) %453 = bitcast float %166 to i32 %454 = bitcast float %170 to i32 %455 = insertelement <2 x i32> undef, i32 %453, i32 0 %456 = insertelement <2 x i32> %455, i32 %454, i32 1 %457 = bitcast <8 x i32> %80 to <32 x i8> %458 = bitcast <4 x i32> %82 to <16 x i8> %459 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %456, <32 x i8> %457, <16 x i8> %458, i32 2) %460 = extractelement <4 x float> %459, i32 0 %461 = extractelement <4 x float> %459, i32 1 %462 = extractelement <4 x float> %459, i32 2 %463 = call float @llvm.AMDGPU.lrp(float %182, float %460, float %449) %464 = call float @llvm.AMDGPU.lrp(float %182, float %461, float %450) %465 = call float @llvm.AMDGPU.lrp(float %182, float %462, float %451) %466 = bitcast float %125 to i32 %467 = bitcast float %126 to i32 %468 = insertelement <2 x i32> undef, i32 %466, i32 0 %469 = insertelement <2 x i32> %468, i32 %467, i32 1 %470 = bitcast <8 x i32> %76 to <32 x i8> %471 = bitcast <4 x i32> %78 to <16 x i8> %472 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2) %473 = extractelement <4 x float> %472, i32 0 %474 = extractelement <4 x float> %472, i32 1 %475 = extractelement <4 x float> %472, i32 2 %476 = extractelement <4 x float> %472, i32 3 %477 = bitcast float %127 to i32 %478 = bitcast float %128 to i32 %479 = insertelement <2 x i32> undef, i32 %477, i32 0 %480 = insertelement <2 x i32> %479, i32 %478, i32 1 %481 = bitcast <8 x i32> %76 to <32 x i8> %482 = bitcast <4 x i32> %78 to <16 x i8> %483 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %480, <32 x i8> %481, <16 x i8> %482, i32 2) %484 = extractelement <4 x float> %483, i32 0 %485 = extractelement <4 x float> %483, i32 1 %486 = extractelement <4 x float> %483, i32 2 %487 = extractelement <4 x float> %483, i32 3 %488 = call float @llvm.AMDGPU.lrp(float %180, float %484, float %473) %489 = call float @llvm.AMDGPU.lrp(float %180, float %485, float %474) %490 = call float @llvm.AMDGPU.lrp(float %180, float %486, float %475) %491 = call float @llvm.AMDGPU.lrp(float %180, float %487, float %476) %492 = bitcast float %174 to i32 %493 = bitcast float %178 to i32 %494 = insertelement <2 x i32> undef, i32 %492, i32 0 %495 = insertelement <2 x i32> %494, i32 %493, i32 1 %496 = bitcast <8 x i32> %76 to <32 x i8> %497 = bitcast <4 x i32> %78 to <16 x i8> %498 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 2) %499 = extractelement <4 x float> %498, i32 0 %500 = extractelement <4 x float> %498, i32 1 %501 = extractelement <4 x float> %498, i32 2 %502 = call float @llvm.AMDGPU.lrp(float %182, float %499, float %488) %503 = call float @llvm.AMDGPU.lrp(float %182, float %500, float %489) %504 = call float @llvm.AMDGPU.lrp(float %182, float %501, float %490) %505 = bitcast float %121 to i32 %506 = bitcast float %122 to i32 %507 = insertelement <2 x i32> undef, i32 %505, i32 0 %508 = insertelement <2 x i32> %507, i32 %506, i32 1 %509 = bitcast <8 x i32> %72 to <32 x i8> %510 = bitcast <4 x i32> %74 to <16 x i8> %511 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 2) %512 = extractelement <4 x float> %511, i32 0 %513 = extractelement <4 x float> %511, i32 1 %514 = extractelement <4 x float> %511, i32 2 %515 = extractelement <4 x float> %511, i32 3 %516 = bitcast float %123 to i32 %517 = bitcast float %124 to i32 %518 = insertelement <2 x i32> undef, i32 %516, i32 0 %519 = insertelement <2 x i32> %518, i32 %517, i32 1 %520 = bitcast <8 x i32> %72 to <32 x i8> %521 = bitcast <4 x i32> %74 to <16 x i8> %522 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %519, <32 x i8> %520, <16 x i8> %521, i32 2) %523 = extractelement <4 x float> %522, i32 0 %524 = extractelement <4 x float> %522, i32 1 %525 = extractelement <4 x float> %522, i32 2 %526 = extractelement <4 x float> %522, i32 3 %527 = call float @llvm.AMDGPU.lrp(float %180, float %523, float %512) %528 = call float @llvm.AMDGPU.lrp(float %180, float %524, float %513) %529 = call float @llvm.AMDGPU.lrp(float %180, float %525, float %514) %530 = call float @llvm.AMDGPU.lrp(float %180, float %526, float %515) %531 = bitcast float %131 to i32 %532 = bitcast float %132 to i32 %533 = insertelement <2 x i32> undef, i32 %531, i32 0 %534 = insertelement <2 x i32> %533, i32 %532, i32 1 %535 = bitcast <8 x i32> %72 to <32 x i8> %536 = bitcast <4 x i32> %74 to <16 x i8> %537 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %534, <32 x i8> %535, <16 x i8> %536, i32 2) %538 = extractelement <4 x float> %537, i32 0 %539 = extractelement <4 x float> %537, i32 1 %540 = extractelement <4 x float> %537, i32 2 %541 = call float @llvm.AMDGPU.lrp(float %182, float %538, float %527) %542 = call float @llvm.AMDGPU.lrp(float %182, float %539, float %528) %543 = call float @llvm.AMDGPU.lrp(float %182, float %540, float %529) %544 = bitcast float %117 to i32 %545 = bitcast float %118 to i32 %546 = insertelement <2 x i32> undef, i32 %544, i32 0 %547 = insertelement <2 x i32> %546, i32 %545, i32 1 %548 = bitcast <8 x i32> %68 to <32 x i8> %549 = bitcast <4 x i32> %70 to <16 x i8> %550 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 2) %551 = extractelement <4 x float> %550, i32 0 %552 = extractelement <4 x float> %550, i32 1 %553 = extractelement <4 x float> %550, i32 2 %554 = extractelement <4 x float> %550, i32 3 %555 = bitcast float %119 to i32 %556 = bitcast float %120 to i32 %557 = insertelement <2 x i32> undef, i32 %555, i32 0 %558 = insertelement <2 x i32> %557, i32 %556, i32 1 %559 = bitcast <8 x i32> %68 to <32 x i8> %560 = bitcast <4 x i32> %70 to <16 x i8> %561 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %558, <32 x i8> %559, <16 x i8> %560, i32 2) %562 = extractelement <4 x float> %561, i32 0 %563 = extractelement <4 x float> %561, i32 1 %564 = extractelement <4 x float> %561, i32 2 %565 = extractelement <4 x float> %561, i32 3 %566 = call float @llvm.AMDGPU.lrp(float %180, float %562, float %551) %567 = call float @llvm.AMDGPU.lrp(float %180, float %563, float %552) %568 = call float @llvm.AMDGPU.lrp(float %180, float %564, float %553) %569 = call float @llvm.AMDGPU.lrp(float %180, float %565, float %554) %570 = bitcast float %129 to i32 %571 = bitcast float %130 to i32 %572 = insertelement <2 x i32> undef, i32 %570, i32 0 %573 = insertelement <2 x i32> %572, i32 %571, i32 1 %574 = bitcast <8 x i32> %68 to <32 x i8> %575 = bitcast <4 x i32> %70 to <16 x i8> %576 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %573, <32 x i8> %574, <16 x i8> %575, i32 2) %577 = extractelement <4 x float> %576, i32 0 %578 = extractelement <4 x float> %576, i32 1 %579 = extractelement <4 x float> %576, i32 2 %580 = call float @llvm.AMDGPU.lrp(float %182, float %577, float %566) %581 = call float @llvm.AMDGPU.lrp(float %182, float %578, float %567) %582 = call float @llvm.AMDGPU.lrp(float %182, float %579, float %568) %583 = fmul float %351, %580 %584 = fmul float %351, %581 %585 = fmul float %351, %582 %586 = fmul float %318, %541 %587 = fadd float %586, %583 %588 = fmul float %318, %542 %589 = fadd float %588, %584 %590 = fmul float %318, %543 %591 = fadd float %590, %585 %592 = fmul float %334, %502 %593 = fadd float %592, %587 %594 = fmul float %334, %503 %595 = fadd float %594, %589 %596 = fmul float %334, %504 %597 = fadd float %596, %591 %598 = fmul float %350, %463 %599 = fadd float %598, %593 %600 = fmul float %350, %464 %601 = fadd float %600, %595 %602 = fmul float %350, %465 %603 = fadd float %602, %597 %604 = fmul float %599, %37 %605 = fmul float %603, %38 %606 = bitcast float %150 to i32 %607 = bitcast float %154 to i32 %608 = insertelement <2 x i32> undef, i32 %606, i32 0 %609 = insertelement <2 x i32> %608, i32 %607, i32 1 %610 = bitcast <8 x i32> %96 to <32 x i8> %611 = bitcast <4 x i32> %98 to <16 x i8> %612 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %609, <32 x i8> %610, <16 x i8> %611, i32 2) %613 = extractelement <4 x float> %612, i32 1 %614 = extractelement <4 x float> %612, i32 3 %615 = bitcast float %125 to i32 %616 = bitcast float %126 to i32 %617 = insertelement <2 x i32> undef, i32 %615, i32 0 %618 = insertelement <2 x i32> %617, i32 %616, i32 1 %619 = bitcast <8 x i32> %92 to <32 x i8> %620 = bitcast <4 x i32> %94 to <16 x i8> %621 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %618, <32 x i8> %619, <16 x i8> %620, i32 2) %622 = extractelement <4 x float> %621, i32 1 %623 = extractelement <4 x float> %621, i32 3 %624 = bitcast float %117 to i32 %625 = bitcast float %118 to i32 %626 = insertelement <2 x i32> undef, i32 %624, i32 0 %627 = insertelement <2 x i32> %626, i32 %625, i32 1 %628 = bitcast <8 x i32> %84 to <32 x i8> %629 = bitcast <4 x i32> %86 to <16 x i8> %630 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %627, <32 x i8> %628, <16 x i8> %629, i32 2) %631 = extractelement <4 x float> %630, i32 1 %632 = extractelement <4 x float> %630, i32 3 %633 = bitcast float %121 to i32 %634 = bitcast float %122 to i32 %635 = insertelement <2 x i32> undef, i32 %633, i32 0 %636 = insertelement <2 x i32> %635, i32 %634, i32 1 %637 = bitcast <8 x i32> %88 to <32 x i8> %638 = bitcast <4 x i32> %90 to <16 x i8> %639 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %636, <32 x i8> %637, <16 x i8> %638, i32 2) %640 = extractelement <4 x float> %639, i32 1 %641 = extractelement <4 x float> %639, i32 3 %642 = fmul float %318, %641 %643 = fmul float %318, %640 %644 = fmul float %351, %632 %645 = fadd float %644, %642 %646 = fmul float %351, %631 %647 = fadd float %646, %643 %648 = fmul float %334, %623 %649 = fadd float %648, %645 %650 = fmul float %334, %622 %651 = fadd float %650, %647 %652 = fmul float %350, %614 %653 = fadd float %652, %649 %654 = fmul float %350, %613 %655 = fadd float %654, %651 %656 = fmul float %653, 2.000000e+00 %657 = fadd float %656, -1.000000e+00 %658 = fmul float %655, 2.000000e+00 %659 = fadd float %658, -1.000000e+00 %660 = fmul float %657, %24 %661 = fmul float %659, %24 %662 = fmul float %657, %657 %663 = fmul float %659, %659 %664 = fadd float %662, %663 %665 = fsub float 1.000000e+00, %664 %666 = call float @llvm.AMDIL.clamp.(float %665, float 0.000000e+00, float 1.000000e+00) %667 = call float @llvm.sqrt.f32(float %666) %668 = fmul float %660, %660 %669 = fmul float %661, %661 %670 = fadd float %669, %668 %671 = fmul float %667, %667 %672 = fadd float %670, %671 %673 = call float @llvm.AMDGPU.rsq.clamped.f32(float %672) %674 = fmul float %660, %673 %675 = fmul float %661, %673 %676 = fsub float -0.000000e+00, %675 %677 = fmul float %667, %673 %678 = fmul float %139, 0.000000e+00 %679 = fadd float %140, %678 %680 = fmul float %141, 0.000000e+00 %681 = fadd float %679, %680 %682 = fmul float %681, %139 %683 = fmul float %681, %140 %684 = fmul float %681, %141 %685 = fsub float 0.000000e+00, %682 %686 = fsub float 1.000000e+00, %683 %687 = fsub float 0.000000e+00, %684 %688 = fmul float %685, %685 %689 = fmul float %686, %686 %690 = fadd float %689, %688 %691 = fmul float %687, %687 %692 = fadd float %690, %691 %693 = call float @llvm.sqrt.f32(float %692) %694 = fcmp olt float %693, 0x3EE4F8B580000000 %. = select i1 %694, float 0.000000e+00, float %685 %.85 = select i1 %694, float 1.000000e+00, float %686 %.86 = select i1 %694, float 0.000000e+00, float %687 %695 = fmul float %139, 0.000000e+00 %696 = fmul float %140, 0.000000e+00 %697 = fadd float %696, %695 %698 = fsub float %697, %141 %699 = fmul float %698, %139 %700 = fmul float %698, %140 %701 = fmul float %698, %141 %702 = fsub float 0.000000e+00, %699 %703 = fsub float 0.000000e+00, %700 %704 = fsub float -1.000000e+00, %701 %705 = fmul float %702, %702 %706 = fmul float %703, %703 %707 = fadd float %706, %705 %708 = fmul float %704, %704 %709 = fadd float %707, %708 %710 = call float @llvm.sqrt.f32(float %709) %711 = fcmp olt float %710, 0x3EE4F8B580000000 %temp68.0 = select i1 %711, float 0.000000e+00, float %702 %temp69.0 = select i1 %711, float 0.000000e+00, float %703 %temp70.0 = select i1 %711, float -1.000000e+00, float %704 %712 = fmul float %temp68.0, %676 %713 = fmul float %temp69.0, %676 %714 = fmul float %temp70.0, %676 %715 = fmul float %., %674 %716 = fadd float %715, %712 %717 = fmul float %.85, %674 %718 = fadd float %717, %713 %719 = fmul float %.86, %674 %720 = fadd float %719, %714 %721 = fmul float %139, %677 %722 = fadd float %721, %716 %723 = fmul float %140, %677 %724 = fadd float %723, %718 %725 = fmul float %141, %677 %726 = fadd float %725, %720 %727 = bitcast float %158 to i32 %728 = bitcast float %162 to i32 %729 = insertelement <2 x i32> undef, i32 %727, i32 0 %730 = insertelement <2 x i32> %729, i32 %728, i32 1 %731 = bitcast <8 x i32> %96 to <32 x i8> %732 = bitcast <4 x i32> %98 to <16 x i8> %733 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %730, <32 x i8> %731, <16 x i8> %732, i32 2) %734 = extractelement <4 x float> %733, i32 1 %735 = extractelement <4 x float> %733, i32 3 %736 = bitcast float %127 to i32 %737 = bitcast float %128 to i32 %738 = insertelement <2 x i32> undef, i32 %736, i32 0 %739 = insertelement <2 x i32> %738, i32 %737, i32 1 %740 = bitcast <8 x i32> %92 to <32 x i8> %741 = bitcast <4 x i32> %94 to <16 x i8> %742 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %739, <32 x i8> %740, <16 x i8> %741, i32 2) %743 = extractelement <4 x float> %742, i32 1 %744 = extractelement <4 x float> %742, i32 3 %745 = bitcast float %119 to i32 %746 = bitcast float %120 to i32 %747 = insertelement <2 x i32> undef, i32 %745, i32 0 %748 = insertelement <2 x i32> %747, i32 %746, i32 1 %749 = bitcast <8 x i32> %84 to <32 x i8> %750 = bitcast <4 x i32> %86 to <16 x i8> %751 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %748, <32 x i8> %749, <16 x i8> %750, i32 2) %752 = extractelement <4 x float> %751, i32 1 %753 = extractelement <4 x float> %751, i32 3 %754 = bitcast float %123 to i32 %755 = bitcast float %124 to i32 %756 = insertelement <2 x i32> undef, i32 %754, i32 0 %757 = insertelement <2 x i32> %756, i32 %755, i32 1 %758 = bitcast <8 x i32> %88 to <32 x i8> %759 = bitcast <4 x i32> %90 to <16 x i8> %760 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %757, <32 x i8> %758, <16 x i8> %759, i32 2) %761 = extractelement <4 x float> %760, i32 1 %762 = extractelement <4 x float> %760, i32 3 %763 = fmul float %318, %762 %764 = fmul float %318, %761 %765 = fmul float %351, %753 %766 = fadd float %765, %763 %767 = fmul float %351, %752 %768 = fadd float %767, %764 %769 = fmul float %334, %744 %770 = fadd float %769, %766 %771 = fmul float %334, %743 %772 = fadd float %771, %768 %773 = fmul float %350, %735 %774 = fadd float %773, %770 %775 = fmul float %350, %734 %776 = fadd float %775, %772 %777 = fmul float %774, 2.000000e+00 %778 = fadd float %777, -1.000000e+00 %779 = fmul float %776, 2.000000e+00 %780 = fadd float %779, -1.000000e+00 %781 = fmul float %24, %778 %782 = fmul float %780, %24 %783 = fmul float %778, %778 %784 = fmul float %780, %780 %785 = fadd float %783, %784 %786 = fsub float 1.000000e+00, %785 %787 = call float @llvm.AMDIL.clamp.(float %786, float 0.000000e+00, float 1.000000e+00) %788 = call float @llvm.sqrt.f32(float %787) %789 = fmul float %781, %781 %790 = fmul float %782, %782 %791 = fadd float %790, %789 %792 = fmul float %788, %788 %793 = fadd float %791, %792 %794 = call float @llvm.AMDGPU.rsq.clamped.f32(float %793) %795 = fmul float %781, %794 %796 = fmul float %782, %794 %797 = fsub float -0.000000e+00, %796 %798 = fmul float %788, %794 %799 = fmul float %140, 0.000000e+00 %800 = fadd float %799, %139 %801 = fmul float %141, 0.000000e+00 %802 = fadd float %800, %801 %803 = fmul float %802, %139 %804 = fmul float %802, %140 %805 = fmul float %802, %141 %806 = fsub float 1.000000e+00, %803 %807 = fsub float 0.000000e+00, %804 %808 = fsub float 0.000000e+00, %805 %809 = fmul float %806, %806 %810 = fmul float %807, %807 %811 = fadd float %810, %809 %812 = fmul float %808, %808 %813 = fadd float %811, %812 %814 = call float @llvm.sqrt.f32(float %813) %815 = fcmp olt float %814, 0x3EE4F8B580000000 %.87 = select i1 %815, float 1.000000e+00, float %806 %.88 = select i1 %815, float 0.000000e+00, float %807 %.89 = select i1 %815, float 0.000000e+00, float %808 %816 = fmul float %temp68.0, %797 %817 = fmul float %temp69.0, %797 %818 = fmul float %temp70.0, %797 %819 = fmul float %.87, %795 %820 = fadd float %819, %816 %821 = fmul float %.88, %795 %822 = fadd float %821, %817 %823 = fmul float %.89, %795 %824 = fadd float %823, %818 %825 = fmul float %139, %798 %826 = fadd float %825, %820 %827 = fmul float %140, %798 %828 = fadd float %827, %822 %829 = fmul float %141, %798 %830 = fadd float %829, %824 %831 = bitcast float %166 to i32 %832 = bitcast float %170 to i32 %833 = insertelement <2 x i32> undef, i32 %831, i32 0 %834 = insertelement <2 x i32> %833, i32 %832, i32 1 %835 = bitcast <8 x i32> %96 to <32 x i8> %836 = bitcast <4 x i32> %98 to <16 x i8> %837 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %834, <32 x i8> %835, <16 x i8> %836, i32 2) %838 = extractelement <4 x float> %837, i32 1 %839 = extractelement <4 x float> %837, i32 3 %840 = bitcast float %174 to i32 %841 = bitcast float %178 to i32 %842 = insertelement <2 x i32> undef, i32 %840, i32 0 %843 = insertelement <2 x i32> %842, i32 %841, i32 1 %844 = bitcast <8 x i32> %92 to <32 x i8> %845 = bitcast <4 x i32> %94 to <16 x i8> %846 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %843, <32 x i8> %844, <16 x i8> %845, i32 2) %847 = extractelement <4 x float> %846, i32 1 %848 = extractelement <4 x float> %846, i32 3 %849 = bitcast float %131 to i32 %850 = bitcast float %132 to i32 %851 = insertelement <2 x i32> undef, i32 %849, i32 0 %852 = insertelement <2 x i32> %851, i32 %850, i32 1 %853 = bitcast <8 x i32> %88 to <32 x i8> %854 = bitcast <4 x i32> %90 to <16 x i8> %855 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %852, <32 x i8> %853, <16 x i8> %854, i32 2) %856 = extractelement <4 x float> %855, i32 1 %857 = extractelement <4 x float> %855, i32 3 %858 = bitcast float %129 to i32 %859 = bitcast float %130 to i32 %860 = insertelement <2 x i32> undef, i32 %858, i32 0 %861 = insertelement <2 x i32> %860, i32 %859, i32 1 %862 = bitcast <8 x i32> %84 to <32 x i8> %863 = bitcast <4 x i32> %86 to <16 x i8> %864 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %861, <32 x i8> %862, <16 x i8> %863, i32 2) %865 = extractelement <4 x float> %864, i32 1 %866 = extractelement <4 x float> %864, i32 3 %867 = fmul float %351, %866 %868 = fmul float %351, %865 %869 = fmul float %318, %857 %870 = fadd float %869, %867 %871 = fmul float %318, %856 %872 = fadd float %871, %868 %873 = fmul float %334, %848 %874 = fadd float %873, %870 %875 = fmul float %334, %847 %876 = fadd float %875, %872 %877 = fmul float %350, %839 %878 = fadd float %877, %874 %879 = fmul float %350, %838 %880 = fadd float %879, %876 %881 = fmul float %878, 2.000000e+00 %882 = fadd float %881, -1.000000e+00 %883 = fmul float %880, 2.000000e+00 %884 = fadd float %883, -1.000000e+00 %885 = fmul float %882, %24 %886 = fmul float %884, %24 %887 = fmul float %882, %882 %888 = fmul float %884, %884 %889 = fadd float %887, %888 %890 = fsub float 1.000000e+00, %889 %891 = call float @llvm.AMDIL.clamp.(float %890, float 0.000000e+00, float 1.000000e+00) %892 = call float @llvm.sqrt.f32(float %891) %893 = fmul float %885, %885 %894 = fmul float %886, %886 %895 = fadd float %894, %893 %896 = fmul float %892, %892 %897 = fadd float %895, %896 %898 = call float @llvm.AMDGPU.rsq.clamped.f32(float %897) %899 = fmul float %885, %898 %900 = fmul float %886, %898 %901 = fsub float -0.000000e+00, %900 %902 = fmul float %892, %898 %903 = fmul float %139, 0.000000e+00 %904 = fsub float %903, %140 %905 = fmul float %141, 0.000000e+00 %906 = fadd float %904, %905 %907 = fmul float %906, %139 %908 = fmul float %906, %140 %909 = fmul float %906, %141 %910 = fsub float 0.000000e+00, %907 %911 = fsub float -1.000000e+00, %908 %912 = fsub float 0.000000e+00, %909 %913 = fmul float %910, %910 %914 = fmul float %911, %911 %915 = fadd float %914, %913 %916 = fmul float %912, %912 %917 = fadd float %915, %916 %918 = call float @llvm.sqrt.f32(float %917) %919 = fcmp olt float %918, 0x3EE4F8B580000000 %temp40.0 = select i1 %919, float 0.000000e+00, float %910 %temp41.0 = select i1 %919, float -1.000000e+00, float %911 %temp42.0 = select i1 %919, float 0.000000e+00, float %912 %920 = call float @llvm.AMDGPU.lrp(float %180, float %826, float %722) %921 = call float @llvm.AMDGPU.lrp(float %180, float %828, float %724) %922 = call float @llvm.AMDGPU.lrp(float %180, float %830, float %726) %923 = fmul float %temp40.0, %901 %924 = fmul float %temp41.0, %901 %925 = fmul float %temp42.0, %901 %926 = fmul float %.87, %899 %927 = fadd float %926, %923 %928 = fmul float %.88, %899 %929 = fadd float %928, %924 %930 = fmul float %.89, %899 %931 = fadd float %930, %925 %932 = fmul float %139, %902 %933 = fadd float %932, %927 %934 = fmul float %140, %902 %935 = fadd float %934, %929 %936 = fmul float %141, %902 %937 = fadd float %936, %931 %938 = call float @llvm.AMDGPU.lrp(float %182, float %933, float %920) %939 = call float @llvm.AMDGPU.lrp(float %182, float %935, float %921) %940 = call float @llvm.AMDGPU.lrp(float %182, float %937, float %922) %941 = fmul float %938, 5.000000e-01 %942 = fadd float %941, 5.000000e-01 %943 = fmul float %939, 5.000000e-01 %944 = fadd float %943, 5.000000e-01 %945 = fmul float %940, 5.000000e-01 %946 = fadd float %945, 5.000000e-01 %947 = call i32 @llvm.SI.packf16(float %424, float %425) %948 = bitcast i32 %947 to float %949 = call i32 @llvm.SI.packf16(float %426, float 0.000000e+00) %950 = bitcast i32 %949 to float %951 = call i32 @llvm.SI.packf16(float %604, float %601) %952 = bitcast i32 %951 to float %953 = call i32 @llvm.SI.packf16(float %605, float 0.000000e+00) %954 = bitcast i32 %953 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %948, float %950, float %948, float %950) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %952, float %954, float %952, float %954) %955 = call i32 @llvm.SI.packf16(float %942, float %944) %956 = bitcast i32 %955 to float %957 = call i32 @llvm.SI.packf16(float %946, float 0.000000e+00) %958 = bitcast i32 %957 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %102, float %102, float %102, float %102) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %956, float %958, float %956, float %958) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v41, v0, 0, 0, [m0] ; C8A40000 v_interp_p2_f32 v41, [v41], v1, 0, 0, [m0] ; C8A50001 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v42, v0, 1, 0, [m0] ; C8A80100 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p2_f32 v42, [v42], v1, 1, 0, [m0] ; C8A90101 v_interp_p1_f32 v53, v0, 2, 0, [m0] ; C8D40200 v_interp_p2_f32 v53, [v53], v1, 2, 0, [m0] ; C8D50201 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s25, s[0:3], 0x4d ; C20C814D s_buffer_load_dword s19, s[0:3], 0x4e ; C209814E s_buffer_load_dword s24, s[0:3], 0x50 ; C20C0150 s_buffer_load_dword s26, s[0:3], 0x51 ; C20D0151 s_buffer_load_dword s27, s[0:3], 0x52 ; C20D8152 s_buffer_load_dword s14, s[0:3], 0x5c ; C207015C s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E s_buffer_load_dword s12, s[0:3], 0x60 ; C2060160 s_buffer_load_dword s13, s[0:3], 0x61 ; C2068161 s_buffer_load_dword s17, s[0:3], 0x62 ; C2088162 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v3, v0, 0, 2, [m0] ; C80C0800 v_interp_p2_f32 v3, [v3], v1, 0, 2, [m0] ; C80D0801 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00 v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01 v_interp_p1_f32 v10, v0, 1, 3, [m0] ; C8280D00 v_interp_p2_f32 v10, [v10], v1, 1, 3, [m0] ; C8290D01 v_interp_p1_f32 v9, v0, 2, 3, [m0] ; C8240E00 v_interp_p2_f32 v9, [v9], v1, 2, 3, [m0] ; C8250E01 v_interp_p1_f32 v16, v0, 3, 3, [m0] ; C8400F00 v_interp_p2_f32 v16, [v16], v1, 3, 3, [m0] ; C8410F01 v_interp_p1_f32 v14, v0, 0, 4, [m0] ; C8381000 v_interp_p2_f32 v14, [v14], v1, 0, 4, [m0] ; C8391001 v_interp_p1_f32 v13, v0, 1, 4, [m0] ; C8341100 v_interp_p2_f32 v13, [v13], v1, 1, 4, [m0] ; C8351101 v_interp_p1_f32 v11, v0, 2, 4, [m0] ; C82C1200 v_interp_p2_f32 v11, [v11], v1, 2, 4, [m0] ; C82D1201 v_interp_p1_f32 v15, v0, 3, 4, [m0] ; C83C1300 v_interp_p2_f32 v15, [v15], v1, 3, 4, [m0] ; C83D1301 v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400 v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401 v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500 v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501 s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 v_interp_p1_f32 v23, v0, 2, 5, [m0] ; C85C1600 v_interp_p2_f32 v23, [v23], v1, 2, 5, [m0] ; C85D1601 v_interp_p1_f32 v24, v0, 3, 5, [m0] ; C8601700 v_interp_p2_f32 v24, [v24], v1, 3, 5, [m0] ; C8611701 v_interp_p1_f32 v54, v0, 0, 6, [m0] ; C8D81800 v_interp_p2_f32 v54, [v54], v1, 0, 6, [m0] ; C8D91801 v_interp_p1_f32 v55, v0, 1, 6, [m0] ; C8DC1900 v_interp_p2_f32 v55, [v55], v1, 1, 6, [m0] ; C8DD1901 v_interp_p1_f32 v25, v0, 2, 6, [m0] ; C8641A00 v_interp_p2_f32 v25, [v25], v1, 2, 6, [m0] ; C8651A01 v_interp_p1_f32 v26, v0, 3, 6, [m0] ; C8681B00 v_interp_p2_f32 v26, [v26], v1, 3, 6, [m0] ; C8691B01 v_interp_p1_f32 v56, v0, 0, 7, [m0] ; C8E01C00 v_interp_p2_f32 v56, [v56], v1, 0, 7, [m0] ; C8E11C01 v_interp_p1_f32 v57, v0, 1, 7, [m0] ; C8E41D00 v_interp_p2_f32 v57, [v57], v1, 1, 7, [m0] ; C8E51D01 v_interp_p1_f32 v31, v0, 2, 7, [m0] ; C87C1E00 v_interp_p2_f32 v31, [v31], v1, 2, 7, [m0] ; C87D1E01 v_interp_p1_f32 v32, v0, 3, 7, [m0] ; C8801F00 v_interp_p2_f32 v32, [v32], v1, 3, 7, [m0] ; C8811F01 v_interp_p1_f32 v17, v0, 0, 8, [m0] ; C8442000 v_interp_p2_f32 v17, [v17], v1, 0, 8, [m0] ; C8452001 v_interp_p1_f32 v18, v0, 1, 8, [m0] ; C8482100 v_interp_p2_f32 v18, [v18], v1, 1, 8, [m0] ; C8492101 s_load_dwordx8 s[64:71], s[6:7], 0x8 ; C0E00708 v_interp_p1_f32 v19, v0, 2, 8, [m0] ; C84C2200 v_interp_p2_f32 v19, [v19], v1, 2, 8, [m0] ; C84D2201 v_interp_p1_f32 v20, v0, 3, 8, [m0] ; C8502300 v_interp_p2_f32 v20, [v20], v1, 3, 8, [m0] ; C8512301 s_load_dwordx4 s[44:47], s[4:5], 0xc ; C096050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx8 s[56:63], s[6:7], 0x10 ; C0DC0710 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[64:71], s[28:31] ; F0800F00 00F01B36 image_sample v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[64:71], s[28:31] ; F0800F00 00F02119 image_sample v[37:40], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[64:71], s[28:31] ; F0800F00 00F02513 v_mov_b32_e32 v58, 0x3c000000 ; 7E7402FF 3C000000 v_mul_f32_e32 v59, v58, v42 ; 1076553A v_mul_f32_e32 v0, s25, v59 ; 10007619 v_mul_f32_e32 v60, v58, v41 ; 1078533A v_mad_f32 v0, s18, v60, -v0 ; D2820000 84027812 v_add_f32_e32 v0, s19, v0 ; 06000013 v_mul_f32_e32 v1, s26, v59 ; 1002761A v_mad_f32 v1, s24, v60, -v1 ; D2820001 84067818 v_add_f32_e32 v1, s27, v1 ; 0602021B image_sample v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[56:63], s[20:23] ; F0800F00 00AE2938 image_sample v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[56:63], s[20:23] ; F0800F00 00AE2D1F image_sample v[49:52], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[56:63], s[20:23] ; F0800F00 00AE3100 v_mul_f32_e32 v53, v58, v53 ; 106A6B3A v_mul_f32_e32 v58, s15, v53 ; 10746A0F v_mad_f32 v58, s14, v60, -v58 ; D282003A 84EA780E v_mul_f32_e32 v61, s13, v53 ; 107A6A0D v_mad_f32 v61, s12, v60, -v61 ; D282003D 84F6780C v_add_f32_e32 v62, s16, v58 ; 067C7410 v_add_f32_e32 v63, s17, v61 ; 067E7A11 v_mov_b32_e32 v58, s16 ; 7E740210 v_mad_f32 v58, s14, v59, v58 ; D282003A 04EA760E v_mul_f32_e32 v61, s15, v59 ; 107A760F v_mad_f32 v61, s14, v60, -v61 ; D282003D 84F6780E v_add_f32_e32 v64, s16, v61 ; 06807A10 v_mad_f32 v66, s15, -v53, v58 ; D2820042 44EA6A0F v_mul_f32_e32 v58, s13, v59 ; 1074760D s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 v_mad_f32 v58, s12, v60, -v58 ; D282003A 84EA780C v_add_f32_e32 v65, s17, v58 ; 06827411 v_mov_b32_e32 v58, s17 ; 7E740211 v_mad_f32 v58, s12, v59, v58 ; D282003A 04EA760C v_mad_f32 v67, s13, -v53, v58 ; D2820043 44EA6A0D image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[48:55], s[44:47] ; F0800F00 016C3A42 image_sample v[68:71], 15, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[48:55], s[44:47] ; F0800F00 016C443E image_sample v[72:75], 15, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[48:55], s[44:47] ; F0800F00 016C4840 s_load_dwordx4 s[44:47], s[4:5], 0x1c ; C096051C s_load_dwordx8 s[48:55], s[6:7], 0x38 ; C0D80738 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 image_sample v[76:78], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[20:27], s[8:11] ; F0800700 00454C15 image_sample v[79:81], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[20:27], s[8:11] ; F0800700 00454F17 image_sample v[82:84], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[20:27], s[8:11] ; F0800700 00455211 s_load_dwordx4 s[68:71], s[4:5], 0x18 ; C0A20518 s_load_dwordx8 s[72:79], s[6:7], 0x30 ; C0E40730 s_load_dwordx4 s[8:11], s[4:5], 0x20 ; C0840520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[12:19], s[6:7], 0x40 ; C0C60740 s_load_dwordx8 s[24:31], s[6:7], 0x48 ; C0CC0748 s_waitcnt vmcnt(3) ; BF8C0773 image_sample v[85:87], 7, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[48:55], s[44:47] ; F0800700 016C5542 s_load_dwordx4 s[80:83], s[4:5], 0x14 ; C0A80514 s_load_dwordx8 s[84:91], s[6:7], 0x28 ; C0EA0728 image_sample v[88:90], 7, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[48:55], s[44:47] ; F0800700 016C583E image_sample v[91:93], 7, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[48:55], s[44:47] ; F0800700 016C5B40 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 image_sample v[94:96], 7, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[72:79], s[68:71] ; F0800700 02325E38 s_load_dwordx4 s[44:47], s[4:5], 0x28 ; C0960528 s_load_dwordx4 s[56:59], s[4:5], 0x2c ; C09C052C s_load_dwordx8 s[60:67], s[6:7], 0x58 ; C0DE0758 s_load_dwordx8 s[48:55], s[6:7], 0x50 ; C0D80750 image_sample v[97:99], 7, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[72:79], s[68:71] ; F0800700 0232611F image_sample v[100:102], 7, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[72:79], s[68:71] ; F0800700 02326400 s_waitcnt vmcnt(5) ; BF8C0775 image_sample v[103:105], 7, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[84:91], s[80:83] ; F0800700 02956736 image_sample v[106:108], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[84:91], s[80:83] ; F0800700 02956A19 image_sample v[109:111], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[84:91], s[80:83] ; F0800700 02956D13 image_sample v[112:114], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[36:43], s[32:35] ; F0800700 01097015 image_sample v[115:117], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[36:43], s[32:35] ; F0800700 01097317 image_sample v[118:120], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[32:35] ; F0800700 01097611 s_waitcnt vmcnt(8) lgkmcnt(0) ; BF8C0078 image_sample v[66:67], 10, 0, 0, 0, 0, 0, 0, 0, v[66:67], s[60:67], s[56:59] ; F0800A00 01CF4242 image_sample v[56:57], 10, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[48:55], s[44:47] ; F0800A00 016C3838 image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[12:19], s[8:11] ; F0800A00 00431515 image_sample v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[54:55], s[24:31], s[20:23] ; F0800A00 00A63536 image_sample v[62:63], 10, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[60:67], s[56:59] ; F0800A00 01CF3E3E image_sample v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[48:55], s[44:47] ; F0800A00 016C1F1F image_sample v[23:24], 10, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[12:19], s[8:11] ; F0800A00 00431717 image_sample v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[24:31], s[20:23] ; F0800A00 00A61919 image_sample v[64:65], 10, 0, 0, 0, 0, 0, 0, 0, v[64:65], s[60:67], s[56:59] ; F0800A00 01CF4040 v_add_f32_e32 v16, v16, v16 ; 06202110 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sub_f32_e32 v55, 1.0, v16 ; 086E20F2 v_mul_f32_e32 v27, v27, v55 ; 10366F1B v_mul_f32_e32 v28, v28, v55 ; 10386F1C v_mul_f32_e32 v29, v29, v55 ; 103A6F1D v_mul_f32_e32 v30, v30, v55 ; 103C6F1E v_mac_f32_e32 v27, v33, v16 ; 3E362121 v_mac_f32_e32 v28, v34, v16 ; 3E382122 v_mac_f32_e32 v29, v35, v16 ; 3E3A2123 v_mac_f32_e32 v30, v36, v16 ; 3E3C2124 v_mul_f32_e32 v33, v41, v55 ; 10426F29 v_mul_f32_e32 v34, v42, v55 ; 10446F2A v_mul_f32_e32 v35, v43, v55 ; 10466F2B v_mul_f32_e32 v36, v44, v55 ; 10486F2C v_mac_f32_e32 v33, v45, v16 ; 3E42212D v_mac_f32_e32 v34, v46, v16 ; 3E44212E v_mac_f32_e32 v35, v47, v16 ; 3E46212F v_mac_f32_e32 v36, v48, v16 ; 3E482130 v_mul_f32_e32 v41, v58, v55 ; 10526F3A v_mul_f32_e32 v42, v59, v55 ; 10546F3B v_mul_f32_e32 v43, v60, v55 ; 10566F3C v_mul_f32_e32 v44, v61, v55 ; 10586F3D v_mac_f32_e32 v41, v68, v16 ; 3E522144 v_mac_f32_e32 v42, v69, v16 ; 3E542145 v_mac_f32_e32 v43, v70, v16 ; 3E562146 v_mac_f32_e32 v44, v71, v16 ; 3E582147 v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_sub_f32_e32 v45, 1.0, v15 ; 085A1EF2 v_mad_f32 v46, v45, v30, -v14 ; D282002E 843A3D2D v_mac_f32_e32 v14, v30, v45 ; 3E1C5B1E v_mul_f32_e32 v27, v27, v45 ; 10365B1B v_mac_f32_e32 v27, v37, v15 ; 3E361F25 v_mul_f32_e32 v28, v28, v45 ; 10385B1C v_mac_f32_e32 v28, v38, v15 ; 3E381F26 v_mul_f32_e32 v29, v29, v45 ; 103A5B1D v_mac_f32_e32 v29, v39, v15 ; 3E3A1F27 v_mac_f32_e32 v46, v40, v15 ; 3E5C1F28 v_mac_f32_e32 v14, v40, v15 ; 3E1C1F28 v_mad_f32 v30, v45, v36, -v13 ; D282001E 8436492D v_mac_f32_e32 v13, v36, v45 ; 3E1A5B24 v_mul_f32_e32 v33, v33, v45 ; 10425B21 v_mac_f32_e32 v33, v49, v15 ; 3E421F31 v_mul_f32_e32 v34, v34, v45 ; 10445B22 v_mac_f32_e32 v34, v50, v15 ; 3E441F32 v_mul_f32_e32 v35, v35, v45 ; 10465B23 v_mac_f32_e32 v35, v51, v15 ; 3E461F33 v_mac_f32_e32 v30, v52, v15 ; 3E3C1F34 v_mac_f32_e32 v13, v52, v15 ; 3E1A1F34 v_mad_f32 v36, v45, v44, -v11 ; D2820024 842E592D v_mac_f32_e32 v11, v44, v45 ; 3E165B2C v_mul_f32_e32 v37, v41, v45 ; 104A5B29 v_mac_f32_e32 v37, v72, v15 ; 3E4A1F48 v_mul_f32_e32 v38, v42, v45 ; 104C5B2A v_mac_f32_e32 v38, v73, v15 ; 3E4C1F49 v_mul_f32_e32 v39, v43, v45 ; 104E5B2B v_mac_f32_e32 v39, v74, v15 ; 3E4E1F4A v_mac_f32_e32 v36, v75, v15 ; 3E481F4B v_mac_f32_e32 v11, v75, v15 ; 3E161F4B v_mul_f32_e32 v40, v76, v55 ; 10506F4C v_mul_f32_e32 v41, v77, v55 ; 10526F4D v_mul_f32_e32 v42, v78, v55 ; 10546F4E v_mac_f32_e32 v40, v79, v16 ; 3E50214F v_mac_f32_e32 v41, v80, v16 ; 3E522150 v_mac_f32_e32 v42, v81, v16 ; 3E542151 v_mul_f32_e32 v40, v40, v45 ; 10505B28 v_mac_f32_e32 v40, v82, v15 ; 3E501F52 v_mul_f32_e32 v41, v41, v45 ; 10525B29 v_mac_f32_e32 v41, v83, v15 ; 3E521F53 v_mul_f32_e32 v42, v42, v45 ; 10545B2A v_mac_f32_e32 v42, v84, v15 ; 3E541F54 v_mul_f32_e32 v43, v85, v55 ; 10566F55 v_mul_f32_e32 v44, v86, v55 ; 10586F56 v_mul_f32_e32 v47, v87, v55 ; 105E6F57 v_mac_f32_e32 v43, v88, v16 ; 3E562158 v_mac_f32_e32 v44, v89, v16 ; 3E582159 v_mac_f32_e32 v47, v90, v16 ; 3E5E215A v_mul_f32_e32 v43, v43, v45 ; 10565B2B v_mac_f32_e32 v43, v91, v15 ; 3E561F5B v_mul_f32_e32 v44, v44, v45 ; 10585B2C v_mac_f32_e32 v44, v92, v15 ; 3E581F5C v_mul_f32_e32 v47, v47, v45 ; 105E5B2F v_mac_f32_e32 v47, v93, v15 ; 3E5E1F5D v_mul_f32_e32 v48, v94, v55 ; 10606F5E v_mul_f32_e32 v49, v95, v55 ; 10626F5F v_mul_f32_e32 v50, v96, v55 ; 10646F60 s_waitcnt ; BF8C077F v_mac_f32_e32 v48, v97, v16 ; 3E602161 v_mac_f32_e32 v49, v98, v16 ; 3E622162 v_mac_f32_e32 v50, v99, v16 ; 3E642163 v_mul_f32_e32 v48, v48, v45 ; 10605B30 v_mac_f32_e32 v48, v100, v15 ; 3E601F64 v_mul_f32_e32 v49, v49, v45 ; 10625B31 v_mac_f32_e32 v49, v101, v15 ; 3E621F65 v_mul_f32_e32 v50, v50, v45 ; 10645B32 v_mac_f32_e32 v50, v102, v15 ; 3E641F66 s_waitcnt vmcnt(14) ; BF8C077E v_mul_f32_e32 v51, v103, v55 ; 10666F67 v_mul_f32_e32 v52, v104, v55 ; 10686F68 v_mul_f32_e32 v58, v105, v55 ; 10746F69 s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v51, v106, v16 ; 3E66216A v_mac_f32_e32 v52, v107, v16 ; 3E68216B v_mac_f32_e32 v58, v108, v16 ; 3E74216C v_mul_f32_e32 v51, v51, v45 ; 10665B33 s_waitcnt vmcnt(12) ; BF8C077C v_mac_f32_e32 v51, v109, v15 ; 3E661F6D v_mul_f32_e32 v52, v52, v45 ; 10685B34 v_mac_f32_e32 v52, v110, v15 ; 3E681F6E v_mul_f32_e32 v58, v58, v45 ; 10745B3A v_mac_f32_e32 v58, v111, v15 ; 3E741F6F s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v59, v112, v55 ; 10766F70 v_mul_f32_e32 v60, v113, v55 ; 10786F71 v_mul_f32_e32 v61, v114, v55 ; 107A6F72 s_waitcnt vmcnt(10) ; BF8C077A v_mac_f32_e32 v59, v115, v16 ; 3E762173 v_mac_f32_e32 v60, v116, v16 ; 3E782174 v_mac_f32_e32 v61, v117, v16 ; 3E7A2175 v_max_f32_e32 v46, 0, v46 ; 205C5C80 v_min_f32_e32 v14, 1.0, v14 ; 1E1C1CF2 v_subrev_f32_e32 v14, v46, v14 ; 0A1C1D2E v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mul_f32_e32 v59, v59, v45 ; 10765B3B s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v59, v118, v15 ; 3E761F76 v_mul_f32_e32 v60, v60, v45 ; 10785B3C v_mac_f32_e32 v60, v119, v15 ; 3E781F77 v_mul_f32_e32 v61, v61, v45 ; 107A5B3D v_mac_f32_e32 v61, v120, v15 ; 3E7A1F78 v_subrev_f32_e32 v12, v46, v12 ; 0A18192E v_mul_f32_e32 v12, v14, v12 ; 1018190E v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mov_b32_e32 v14, 0x40400000 ; 7E1C02FF 40400000 v_mad_f32 v46, -2.0, v12, v14 ; D282002E 043A18F5 v_mul_f32_e32 v46, v46, v12 ; 105C192E v_max_f32_e32 v30, 0, v30 ; 203C3C80 v_min_f32_e32 v13, 1.0, v13 ; 1E1A1AF2 v_subrev_f32_e32 v13, v30, v13 ; 0A1A1B1E v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mul_f32_e32 v68, v46, v12 ; 1088192E v_mad_f32 v12, -v12, v46, 1.0 ; D282000C 23CA5D0C v_subrev_f32_e32 v10, v30, v10 ; 0A14151E v_mul_f32_e32 v10, v13, v10 ; 1014150D v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_max_f32_e32 v13, 0, v36 ; 201A4880 v_min_f32_e32 v11, 1.0, v11 ; 1E1616F2 v_subrev_f32_e32 v11, v13, v11 ; 0A16170D v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v30, -2.0, v10, v14 ; D282001E 043A14F5 v_mul_f32_e32 v30, v30, v10 ; 103C151E v_mul_f32_e32 v10, v30, v10 ; 1014151E v_subrev_f32_e32 v9, v13, v9 ; 0A12130D v_mul_f32_e32 v9, v11, v9 ; 1012130B v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mac_f32_e32 v14, -2.0, v9 ; 3E1C12F5 v_mul_f32_e32 v11, v14, v9 ; 1016130E v_mul_f32_e32 v9, v11, v9 ; 1012130B s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v11, v54, v68 ; 10168936 v_mul_f32_e32 v13, v53, v68 ; 101A8935 v_max_f32_e32 v10, 0, v10 ; 20141480 v_min_f32_e32 v10, v12, v10 ; 1E14150C v_subrev_f32_e32 v12, v10, v12 ; 0A18190A v_max_f32_e32 v9, 0, v9 ; 20121280 v_min_f32_e32 v9, v12, v9 ; 1E12130C v_subrev_f32_e32 v12, v9, v12 ; 0A181909 v_mac_f32_e32 v11, v22, v12 ; 3E161916 v_mac_f32_e32 v13, v21, v12 ; 3E1A1915 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v14, v26, v68 ; 101C891A v_mul_f32_e32 v21, v25, v68 ; 102A8919 v_mac_f32_e32 v14, v24, v12 ; 3E1C1918 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 v_mac_f32_e32 v21, v23, v12 ; 3E2A1917 image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[48:55], s[44:47] ; F0800A00 016C0000 image_sample v[19:20], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[24:31], s[20:23] ; F0800A00 00A61313 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 image_sample v[17:18], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[12:19], s[8:11] ; F0800A00 00431111 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v18, v18, v12 ; 10241912 v_mul_f32_e32 v17, v17, v12 ; 10221911 v_mac_f32_e32 v18, v20, v68 ; 3E248914 v_mac_f32_e32 v17, v19, v68 ; 3E228913 v_mul_f32_e32 v19, s4, v27 ; 10263604 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 v_mul_f32_e32 v20, s5, v40 ; 10285005 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A v_mul_f32_e32 v20, v12, v20 ; 1028290C v_mac_f32_e32 v20, v68, v19 ; 3E282744 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s10, s[0:3], 0xe ; C205010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s4, v28 ; 10263804 v_mul_f32_e32 v22, s6, v41 ; 102C5206 v_mul_f32_e32 v22, v12, v22 ; 102C2D0C v_mac_f32_e32 v22, v68, v19 ; 3E2C2744 v_mul_f32_e32 v19, s5, v29 ; 10263A05 v_mul_f32_e32 v23, s7, v42 ; 102E5407 v_mul_f32_e32 v23, v12, v23 ; 102E2F0C v_mac_f32_e32 v23, v68, v19 ; 3E2E2744 v_mul_f32_e32 v19, v59, v12 ; 1026193B v_mul_f32_e32 v24, v60, v12 ; 1030193C v_mul_f32_e32 v12, v61, v12 ; 1018193D v_mac_f32_e32 v19, v51, v68 ; 3E268933 v_mac_f32_e32 v24, v52, v68 ; 3E308934 v_mac_f32_e32 v12, v58, v68 ; 3E18893A v_mac_f32_e32 v11, v57, v10 ; 3E161539 v_mac_f32_e32 v13, v56, v10 ; 3E1A1538 v_mac_f32_e32 v14, v32, v10 ; 3E1C1520 v_mac_f32_e32 v21, v31, v10 ; 3E2A151F v_mac_f32_e32 v18, v1, v10 ; 3E241501 v_mac_f32_e32 v17, v0, v10 ; 3E221500 v_mul_f32_e32 v0, s8, v33 ; 10004208 v_mac_f32_e32 v20, v0, v10 ; 3E281500 v_mul_f32_e32 v0, s9, v34 ; 10004409 v_mac_f32_e32 v22, v0, v10 ; 3E2C1500 v_mul_f32_e32 v0, s10, v35 ; 1000460A v_mac_f32_e32 v23, v0, v10 ; 3E2E1500 v_mac_f32_e32 v19, v48, v10 ; 3E261530 v_mac_f32_e32 v24, v49, v10 ; 3E301531 v_mac_f32_e32 v12, v50, v10 ; 3E181532 v_mac_f32_e32 v11, v67, v9 ; 3E161343 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 v_mac_f32_e32 v13, v66, v9 ; 3E1A1342 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 v_mac_f32_e32 v14, v63, v9 ; 3E1C133F s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 v_mac_f32_e32 v21, v62, v9 ; 3E2A133E v_mac_f32_e32 v18, v65, v9 ; 3E241341 v_mac_f32_e32 v17, v64, v9 ; 3E221340 s_buffer_load_dword s7, s[0:3], 0x38 ; C2038138 s_buffer_load_dword s8, s[0:3], 0x39 ; C2040139 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v37 ; 10004A04 v_mac_f32_e32 v20, v9, v0 ; 3E280109 v_mul_f32_e32 v0, s5, v38 ; 10004C05 v_mac_f32_e32 v22, v9, v0 ; 3E2C0109 v_mul_f32_e32 v0, s6, v39 ; 10004E06 v_mac_f32_e32 v23, v9, v0 ; 3E2E0109 v_mul_f32_e32 v0, v6, v6 ; 10000D06 v_mac_f32_e32 v0, v7, v7 ; 3E000F07 v_mac_f32_e32 v0, v8, v8 ; 3E001108 v_rsq_clamp_f32_e32 v0, v0 ; 7E005900 v_mac_f32_e32 v19, v43, v9 ; 3E26132B v_mac_f32_e32 v24, v44, v9 ; 3E30132C v_mac_f32_e32 v12, v47, v9 ; 3E18132F v_mul_f32_e32 v1, v0, v6 ; 10020D00 v_mul_f32_e32 v6, v0, v7 ; 100C0F00 v_mul_f32_e32 v0, v0, v8 ; 10001100 s_buffer_load_dword s2, s[0:3], 0x0 ; C2010100 v_mul_f32_e32 v7, v1, v6 ; 100E0D01 v_mad_f32 v8, -v6, v6, 1.0 ; D2820008 23CA0D06 v_mul_f32_e32 v9, v7, v7 ; 10120F07 v_mac_f32_e32 v9, v8, v8 ; 3E121108 v_mul_f32_e32 v10, v0, v6 ; 10140D00 v_mac_f32_e32 v9, v10, v10 ; 3E12150A v_sqrt_f32_e32 v9, v9 ; 7E126709 v_mov_b32_e32 v25, 0x3727c5ac ; 7E3202FF 3727C5AC v_cmp_lt_f32_e32 vcc, v9, v25 ; 7C023309 v_mad_f32 v9, 2.0, v11, -1.0 ; D2820009 03CE16F4 v_mad_f32 v11, 2.0, v13, -1.0 ; D282000B 03CE1AF4 v_mad_f32 v13, -v11, v11, 1.0 ; D282000D 23CA170B v_mad_f32 v13, -v9, v9, v13 ; D282000D 24361309 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s2, v9 ; 10121202 v_mul_f32_e32 v11, s2, v11 ; 10161602 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_sqrt_f32_e32 v13, v13 ; 7E1A670D v_mul_f32_e32 v26, v9, v9 ; 10341309 v_mac_f32_e32 v26, v11, v11 ; 3E34170B v_mac_f32_e32 v26, v13, v13 ; 3E341B0D v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e64 v27, -v0, v1 ; D210001B 20020300 v_mul_f32_e64 v28, -v0, v6 ; D210001C 20020D00 v_mul_f32_e32 v29, v27, v27 ; 103A371B v_mac_f32_e32 v29, v28, v28 ; 3E3A391C v_mad_f32 v30, v0, v0, -1.0 ; D282001E 03CE0100 v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E v_sqrt_f32_e32 v29, v29 ; 7E3A671D v_cmp_lt_f32_e64 s[0:1], v29, v25 ; D0020000 0002331D v_mul_f32_e32 v9, v26, v9 ; 1012131A v_mul_f32_e32 v11, v26, v11 ; 1016171A v_mov_b32_e32 v29, 0x80000000 ; 7E3A02FF 80000000 v_xor_b32_e32 v27, v27, v29 ; 3A363B1B v_cndmask_b32_e64 v27, v27, 0, s[0:1] ; D200001B 0001011B v_xor_b32_e32 v31, v7, v29 ; 3A3E3B07 v_cndmask_b32_e64 v32, v31, 0, vcc ; D2000020 01A9011F v_mul_f32_e32 v33, v11, v27 ; 1042370B v_mad_f32 v32, v32, v9, -v33 ; D2820020 84861320 v_cndmask_b32_e64 v8, v8, 1.0, vcc ; D2000008 01A9E508 v_xor_b32_e32 v28, v28, v29 ; 3A383B1C v_cndmask_b32_e64 v28, v28, 0, s[0:1] ; D200001C 0001011C v_mul_f32_e32 v33, v11, v28 ; 1042390B v_mad_f32 v8, v8, v9, -v33 ; D2820008 84861308 v_xor_b32_e32 v10, v10, v29 ; 3A143B0A v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_cndmask_b32_e64 v30, v30, -1.0, s[0:1] ; D200001E 0001E71E v_mul_f32_e32 v11, v11, v30 ; 10163D0B v_mad_f32 v9, v10, v9, -v11 ; D2820009 842E130A v_mad_f32 v10, -v1, v1, 1.0 ; D282000A 23CA0301 v_mul_f32_e32 v11, v10, v10 ; 1016150A v_mac_f32_e32 v11, v7, v7 ; 3E160F07 v_mul_f32_e32 v7, v0, v1 ; 100E0300 v_mac_f32_e32 v11, v7, v7 ; 3E160F07 v_sqrt_f32_e32 v11, v11 ; 7E16670B v_cmp_lt_f32_e32 vcc, v11, v25 ; 7C02330B v_mul_f32_e64 v11, -v6, v1 ; D210000B 20020306 v_mad_f32 v33, v6, v6, -1.0 ; D2820021 03CE0D06 v_mul_f32_e32 v34, v11, v11 ; 1044170B v_mac_f32_e32 v34, v33, v33 ; 3E444321 v_mul_f32_e64 v35, -v6, v0 ; D2100023 20020106 v_mac_f32_e32 v34, v35, v35 ; 3E444723 v_sqrt_f32_e32 v34, v34 ; 7E446722 v_cmp_lt_f32_e64 s[0:1], v34, v25 ; D0020000 00023322 v_mul_f32_e32 v13, v26, v13 ; 101A1B1A v_mad_f32 v14, 2.0, v14, -1.0 ; D282000E 03CE1CF4 v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4 v_mad_f32 v25, -v21, v21, 1.0 ; D2820019 23CA2B15 v_mad_f32 v25, -v14, v14, v25 ; D2820019 24661D0E v_mul_f32_e32 v14, s2, v14 ; 101C1C02 v_mul_f32_e32 v21, s2, v21 ; 102A2A02 v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 v_sqrt_f32_e32 v25, v25 ; 7E326719 v_mul_f32_e32 v26, v14, v14 ; 10341D0E v_mac_f32_e32 v26, v21, v21 ; 3E342B15 v_mac_f32_e32 v26, v25, v25 ; 3E343319 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mac_f32_e32 v32, v13, v1 ; 3E40030D v_mac_f32_e32 v8, v13, v6 ; 3E100D0D v_mac_f32_e32 v9, v13, v0 ; 3E12010D v_mul_f32_e32 v13, v26, v14 ; 101A1D1A v_mul_f32_e32 v14, v26, v21 ; 101C2B1A v_mul_f32_e32 v21, v26, v25 ; 102A331A v_mul_f32_e32 v25, v14, v27 ; 1032370E v_mul_f32_e32 v26, v14, v28 ; 1034390E v_mul_f32_e32 v14, v14, v30 ; 101C3D0E v_xor_b32_e32 v7, v7, v29 ; 3A0E3B07 v_cndmask_b32_e64 v10, v10, 1.0, vcc ; D200000A 01A9E50A v_cndmask_b32_e64 v27, v31, 0, vcc ; D200001B 01A9011F v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107 v_mad_f32 v25, v10, v13, -v25 ; D2820019 84661B0A v_mad_f32 v26, v27, v13, -v26 ; D282001A 846A1B1B v_mad_f32 v13, v7, v13, -v14 ; D282000D 843A1B07 v_mac_f32_e32 v25, v21, v1 ; 3E320315 v_mac_f32_e32 v26, v21, v6 ; 3E340D15 v_mac_f32_e32 v13, v21, v0 ; 3E1A0115 v_mul_f32_e32 v14, v32, v55 ; 101C6F20 v_mac_f32_e32 v14, v25, v16 ; 3E1C2119 v_mul_f32_e32 v8, v8, v55 ; 10106F08 v_mac_f32_e32 v8, v26, v16 ; 3E10211A v_mul_f32_e32 v9, v9, v55 ; 10126F09 v_mac_f32_e32 v9, v13, v16 ; 3E12210D v_xor_b32_e32 v11, v11, v29 ; 3A163B0B v_xor_b32_e32 v13, v35, v29 ; 3A1A3B23 v_mad_f32 v16, 2.0, v18, -1.0 ; D2820010 03CE24F4 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mad_f32 v18, -v17, v17, 1.0 ; D2820012 23CA2311 v_mad_f32 v18, -v16, v16, v18 ; D2820012 244A2110 v_mul_f32_e32 v16, s2, v16 ; 10202002 v_mul_f32_e32 v17, s2, v17 ; 10222202 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_sqrt_f32_e32 v18, v18 ; 7E246712 v_mul_f32_e32 v21, v16, v16 ; 102A2110 v_mac_f32_e32 v21, v17, v17 ; 3E2A2311 v_mac_f32_e32 v21, v18, v18 ; 3E2A2512 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_cndmask_b32_e64 v11, v11, 0, s[0:1] ; D200000B 0001010B v_cndmask_b32_e64 v25, v33, -1.0, s[0:1] ; D2000019 0001E721 v_cndmask_b32_e64 v13, v13, 0, s[0:1] ; D200000D 0001010D v_mul_f32_e32 v17, v21, v17 ; 10222315 v_mul_f32_e32 v11, v17, v11 ; 10161711 v_mul_f32_e32 v25, v17, v25 ; 10323311 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mad_f32 v10, v10, v16, -v11 ; D282000A 842E210A v_mad_f32 v11, v27, v16, -v25 ; D282000B 8466211B v_mad_f32 v7, v7, v16, -v13 ; D2820007 84362107 v_mul_f32_e32 v13, v21, v18 ; 101A2515 v_mac_f32_e32 v10, v13, v1 ; 3E14030D v_mac_f32_e32 v11, v13, v6 ; 3E160D0D v_mac_f32_e32 v7, v13, v0 ; 3E0E010D v_mul_f32_e32 v0, v14, v45 ; 10005B0E v_mac_f32_e32 v0, v10, v15 ; 3E001F0A v_mul_f32_e32 v1, v8, v45 ; 10025B08 v_mac_f32_e32 v1, v11, v15 ; 3E021F0B v_mul_f32_e32 v6, v9, v45 ; 100C5B09 v_mac_f32_e32 v6, v7, v15 ; 3E0C1F07 v_mul_f32_e32 v3, v3, v20 ; 10062903 v_mul_f32_e32 v4, v4, v22 ; 10082D04 v_mul_f32_e32 v5, v5, v23 ; 100A2F05 v_cvt_pkrtz_f16_f32_e32 v3, v3, v4 ; 5E060903 v_cvt_pkrtz_f16_f32_e64 v4, v5, 0 ; D25E0004 00010105 exp 15, 0, 1, 0, 0, v3, v4, v3, v4 ; F800040F 04030403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, s7, v19 ; 10062607 v_cvt_pkrtz_f16_f32_e32 v3, v3, v24 ; 5E063103 v_mul_f32_e32 v4, s8, v12 ; 10081808 v_cvt_pkrtz_f16_f32_e64 v4, v4, 0 ; D25E0004 00010104 exp 15, 1, 1, 0, 0, v3, v4, v3, v4 ; F800041F 04030403 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 2, 0, 0, 0, v2, v2, v2, v2 ; F800002F 02020202 v_mad_f32 v1, 0.5, v6, 0.5 ; D2820001 03C20CF0 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 3, 1, 1, 1, v0, v1, v0, v1 ; F8001C3F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 124 Code Size: 2816 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL IN[3], GENERIC[5], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..7], LOCAL IMM[0] UINT32 {0, 16, 4, 48} IMM[1] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000} IMM[2] UINT32 {44, 0, 0, 0} 0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz 1: RSQ TEMP[0].x, TEMP[0].xxxx 2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MOV TEMP[2].w, TEMP[1].wwww 6: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 7: MUL TEMP[1], TEMP[2], IN[2] 8: MOV TEMP[2].xy, IN[0].xyyy 9: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D 10: MUL TEMP[3].xyz, TEMP[0].zxyy, IN[3].yzxx 11: MAD TEMP[3].xyz, TEMP[0].yzxx, IN[3].zxyy, -TEMP[3].xyzz 12: MOV TEMP[4].xy, IN[0].xyyy 13: TEX TEMP[4].xyw, TEMP[4], SAMP[2], 2D 14: MAD TEMP[5].xy, TEMP[4].wyyy, IMM[1].xxxx, IMM[1].yyyy 15: MOV TEMP[6].x, TEMP[5].xxxx 16: MOV TEMP[6].y, -TEMP[5].yyyy 17: MUL TEMP[6].xy, TEMP[6].xyyy, CONST[1][0].xxxx 18: MOV TEMP[7].x, TEMP[6].xxxx 19: MOV TEMP[7].y, TEMP[6].yyyy 20: DP2 TEMP[5].x, TEMP[5].xyyy, TEMP[5].xyyy 21: ADD TEMP[5].x, IMM[1].zzzz, -TEMP[5].xxxx 22: MOV_SAT TEMP[5].x, TEMP[5].xxxx 23: SQRT TEMP[5].x, TEMP[5].xxxx 24: MOV TEMP[7].z, TEMP[5].xxxx 25: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz 26: RSQ TEMP[5].x, TEMP[5].xxxx 27: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx 28: DP3 TEMP[6].x, IN[3].xyzz, IN[3].xyzz 29: RSQ TEMP[6].x, TEMP[6].xxxx 30: MUL TEMP[6].xyz, IN[3].xyzz, TEMP[6].xxxx 31: DP3 TEMP[7].x, TEMP[3].xyzz, TEMP[3].xyzz 32: RSQ TEMP[7].x, TEMP[7].xxxx 33: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[7].xxxx 34: MUL TEMP[3].xyz, IN[3].wwww, TEMP[3].xyzz 35: MUL TEMP[3].xyz, TEMP[5].yyyy, TEMP[3].xyzz 36: MAD TEMP[3].xyz, TEMP[5].xxxx, TEMP[6].xyzz, TEMP[3].xyzz 37: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].zzzz, TEMP[3].xyzz 38: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx 41: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[1].wwww, IMM[1].wwww 42: MUL TEMP[3].x, IN[2].wwww, TEMP[1].wwww 43: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][0].yyyy 44: MOV TEMP[5].x, TEMP[1].xxxx 45: MOV TEMP[5].y, TEMP[1].yyyy 46: MOV TEMP[5].z, TEMP[1].zzzz 47: MOV TEMP[5].w, TEMP[3].xxxx 48: MUL TEMP[1].x, CONST[1][3].xxxx, TEMP[2].xxxx 49: MOV TEMP[1].y, TEMP[2].yyyy 50: MUL TEMP[2].x, CONST[1][2].wwww, TEMP[2].zzzz 51: MOV TEMP[1].z, TEMP[2].xxxx 52: MOV TEMP[1].w, TEMP[3].xxxx 53: MOV TEMP[2].x, TEMP[0].xxxx 54: MOV TEMP[2].y, TEMP[0].yyyy 55: MOV TEMP[2].z, TEMP[0].zzzz 56: MUL TEMP[0].x, CONST[1][0].yyyy, TEMP[4].xxxx 57: MOV TEMP[2].w, TEMP[0].xxxx 58: MOV OUT[0], TEMP[5] 59: MOV OUT[1], TEMP[2] 60: MOV OUT[2], TEMP[1] 61: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0 %33 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %36 = bitcast <8 x i32> addrspace(2)* %35 to <32 x i8> addrspace(2)* %37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %39 = bitcast <4 x i32> addrspace(2)* %38 to <16 x i8> addrspace(2)* %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)* %43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0 %44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %58 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %59 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %60 = fmul float %49, %49 %61 = fmul float %50, %50 %62 = fadd float %61, %60 %63 = fmul float %51, %51 %64 = fadd float %62, %63 %65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64) %66 = fmul float %49, %65 %67 = fmul float %50, %65 %68 = fmul float %51, %65 %69 = bitcast float %47 to i32 %70 = bitcast float %48 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %32, <16 x i8> %34, i32 2) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = extractelement <4 x float> %73, i32 3 %78 = fmul float %26, %74 %79 = fmul float %27, %75 %80 = fmul float %28, %76 %81 = fmul float %78, %52 %82 = fmul float %79, %53 %83 = fmul float %80, %54 %84 = fmul float %77, %55 %85 = bitcast float %47 to i32 %86 = bitcast float %48 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %37, <16 x i8> %40, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = fmul float %68, %57 %94 = fmul float %66, %58 %95 = fmul float %67, %56 %96 = fmul float %67, %58 %97 = fsub float %96, %93 %98 = fmul float %68, %56 %99 = fsub float %98, %94 %100 = fmul float %66, %57 %101 = fsub float %100, %95 %102 = bitcast float %47 to i32 %103 = bitcast float %48 to i32 %104 = insertelement <2 x i32> undef, i32 %102, i32 0 %105 = insertelement <2 x i32> %104, i32 %103, i32 1 %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %105, <32 x i8> %43, <16 x i8> %46, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 3 %110 = fmul float %109, 2.000000e+00 %111 = fadd float %110, -1.000000e+00 %112 = fmul float %108, 2.000000e+00 %113 = fadd float %112, -1.000000e+00 %114 = fmul float %111, %24 %115 = fmul float %113, %24 %116 = fmul float %111, %111 %117 = fmul float %113, %113 %118 = fadd float %116, %117 %119 = fsub float 1.000000e+00, %118 %120 = call float @llvm.AMDIL.clamp.(float %119, float 0.000000e+00, float 1.000000e+00) %121 = call float @llvm.sqrt.f32(float %120) %122 = fmul float %114, %114 %123 = fmul float %115, %115 %124 = fadd float %123, %122 %125 = fmul float %121, %121 %126 = fadd float %124, %125 %127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126) %128 = fmul float %114, %127 %129 = fmul float %115, %127 %130 = fsub float -0.000000e+00, %129 %131 = fmul float %121, %127 %132 = fmul float %56, %56 %133 = fmul float %57, %57 %134 = fadd float %133, %132 %135 = fmul float %58, %58 %136 = fadd float %134, %135 %137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136) %138 = fmul float %56, %137 %139 = fmul float %57, %137 %140 = fmul float %58, %137 %141 = fmul float %97, %97 %142 = fmul float %99, %99 %143 = fadd float %142, %141 %144 = fmul float %101, %101 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %97, %146 %148 = fmul float %99, %146 %149 = fmul float %101, %146 %150 = fmul float %59, %147 %151 = fmul float %59, %148 %152 = fmul float %59, %149 %153 = fmul float %150, %130 %154 = fmul float %151, %130 %155 = fmul float %152, %130 %156 = fmul float %128, %138 %157 = fadd float %156, %153 %158 = fmul float %128, %139 %159 = fadd float %158, %154 %160 = fmul float %128, %140 %161 = fadd float %160, %155 %162 = fmul float %66, %131 %163 = fadd float %162, %157 %164 = fmul float %67, %131 %165 = fadd float %164, %159 %166 = fmul float %68, %131 %167 = fadd float %166, %161 %168 = fmul float %163, %163 %169 = fmul float %165, %165 %170 = fadd float %169, %168 %171 = fmul float %167, %167 %172 = fadd float %170, %171 %173 = call float @llvm.AMDGPU.rsq.clamped.f32(float %172) %174 = fmul float %163, %173 %175 = fmul float %165, %173 %176 = fmul float %167, %173 %177 = fmul float %174, 5.000000e-01 %178 = fadd float %177, 5.000000e-01 %179 = fmul float %175, 5.000000e-01 %180 = fadd float %179, 5.000000e-01 %181 = fmul float %176, 5.000000e-01 %182 = fadd float %181, 5.000000e-01 %183 = fmul float %55, %84 %184 = fmul float %183, %25 %185 = fmul float %30, %90 %186 = fmul float %29, %92 %187 = fmul float %25, %107 %188 = call i32 @llvm.SI.packf16(float %81, float %82) %189 = bitcast i32 %188 to float %190 = call i32 @llvm.SI.packf16(float %83, float %184) %191 = bitcast i32 %190 to float %192 = call i32 @llvm.SI.packf16(float %178, float %180) %193 = bitcast i32 %192 to float %194 = call i32 @llvm.SI.packf16(float %182, float %187) %195 = bitcast i32 %194 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %189, float %191, float %189, float %191) %196 = call i32 @llvm.SI.packf16(float %185, float %91) %197 = bitcast i32 %196 to float %198 = call i32 @llvm.SI.packf16(float %186, float %184) %199 = bitcast i32 %198 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %193, float %195, float %193, float %195) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %197, float %199, float %197, float %199) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s32, s[0:3], 0x4 ; C2100104 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 s_buffer_load_dword s33, s[0:3], 0x5 ; C2108105 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 s_buffer_load_dword s34, s[0:3], 0x6 ; C2110106 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800F00 00A60E02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s32, v14 ; 10021C20 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mul_f32_e32 v14, s33, v15 ; 101C1E21 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B v_mul_f32_e32 v15, s34, v16 ; 101E2022 v_mul_f32_e32 v16, v10, v17 ; 1020230A image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[36:39] ; F0800700 012A1102 v_mul_f32_e32 v20, v4, v4 ; 10280904 v_mac_f32_e32 v20, v5, v5 ; 3E280B05 v_mac_f32_e32 v20, v6, v6 ; 3E280D06 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v17, s4, v17 ; 10222204 v_cvt_pkrtz_f16_f32_e32 v17, v17, v18 ; 5E222511 v_mul_f32_e32 v18, s5, v19 ; 10242605 v_mul_f32_e32 v4, v20, v4 ; 10080914 v_mul_f32_e32 v5, v20, v5 ; 100A0B14 v_mul_f32_e32 v6, v20, v6 ; 100C0D14 v_mul_f32_e32 v19, v12, v6 ; 10260D0C v_mad_f32 v19, v5, v13, -v19 ; D2820013 844E1B05 v_mul_f32_e32 v20, v11, v11 ; 1028170B v_mac_f32_e32 v20, v12, v12 ; 3E28190C v_mac_f32_e32 v20, v13, v13 ; 3E281B0D v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_mul_f32_e32 v21, v13, v4 ; 102A090D v_mad_f32 v21, v6, v11, -v21 ; D2820015 84561706 v_mul_f32_e32 v22, v11, v5 ; 102C0B0B v_mad_f32 v22, v4, v12, -v22 ; D2820016 845A1904 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mul_f32_e32 v23, v19, v19 ; 102E2713 v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15 v_mac_f32_e32 v23, v22, v22 ; 3E2E2D16 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v12, v20, v12 ; 10181914 v_mul_f32_e32 v13, v20, v13 ; 101A1B14 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_mul_f32_e32 v19, v23, v19 ; 10262717 v_mul_f32_e32 v20, v23, v21 ; 10282B17 v_mul_f32_e32 v21, v23, v22 ; 102A2D17 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 image_sample v[22:24], 11, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800B00 00431602 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, 2.0, v24, -1.0 ; D2820002 03CE30F4 v_mad_f32 v3, 2.0, v23, -1.0 ; D2820003 03CE2EF4 v_mad_f32 v23, -v3, v3, 1.0 ; D2820017 23CA0703 v_mad_f32 v23, -v2, v2, v23 ; D2820017 245E0502 v_mul_f32_e32 v2, s4, v2 ; 10040404 v_mul_f32_e32 v3, s4, v3 ; 10060604 v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_sqrt_f32_e32 v23, v23 ; 7E2E6717 v_mul_f32_e32 v24, v2, v2 ; 10300502 v_mac_f32_e32 v24, v3, v3 ; 3E300703 v_mac_f32_e32 v24, v23, v23 ; 3E302F17 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v19, v19, v0 ; 10260113 v_mul_f32_e32 v20, v20, v0 ; 10280114 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mul_f32_e32 v3, v24, v3 ; 10060718 v_mul_f32_e32 v19, v3, v19 ; 10262703 v_mul_f32_e32 v20, v3, v20 ; 10282903 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v2, v24, v2 ; 10040518 v_mad_f32 v3, v2, v11, -v19 ; D2820003 844E1702 v_mad_f32 v11, v2, v12, -v20 ; D282000B 84521902 v_mad_f32 v0, v2, v13, -v0 ; D2820000 84021B02 v_mul_f32_e32 v2, s0, v22 ; 10042C00 v_mul_f32_e32 v10, v16, v10 ; 10141510 v_mul_f32_e32 v10, s0, v10 ; 10141400 v_mul_f32_e32 v12, v24, v23 ; 10182F18 v_mac_f32_e32 v3, v12, v4 ; 3E06090C v_mac_f32_e32 v11, v12, v5 ; 3E160B0C v_mac_f32_e32 v0, v12, v6 ; 3E000D0C v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v4, v8, v14 ; 10081D08 v_mul_f32_e32 v5, v9, v15 ; 100A1F09 v_mul_f32_e32 v6, v3, v3 ; 100C0703 v_mac_f32_e32 v6, v11, v11 ; 3E0C170B v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_cvt_pkrtz_f16_f32_e32 v4, v5, v10 ; 5E081505 exp 15, 0, 1, 0, 0, v1, v4, v1, v4 ; F800040F 04010401 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, v6, v3 ; 10020706 v_mul_f32_e32 v3, v6, v11 ; 10061706 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v18, v10 ; 5E001512 exp 15, 2, 1, 1, 1, v17, v0, v17, v0 ; F8001C2F 00110011 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 28 Code Size: 628 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 16, 4, 0} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0], TEMP[1], IN[2] 5: DP3 TEMP[1].x, IN[1].xyzz, IN[1].xyzz 6: RSQ TEMP[1].x, TEMP[1].xxxx 7: MUL TEMP[1].xyz, IN[1].xyzz, TEMP[1].xxxx 8: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, IMM[1].xxxx 9: MUL TEMP[2].x, IN[2].wwww, TEMP[0].wwww 10: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1][0].yyyy 11: MOV TEMP[3].x, TEMP[0].xxxx 12: MOV TEMP[3].y, TEMP[0].yyyy 13: MOV TEMP[3].z, TEMP[0].zzzz 14: MOV TEMP[3].w, TEMP[2].xxxx 15: MOV TEMP[0].xyz, IMM[1].yyyy 16: MOV TEMP[0].w, TEMP[2].xxxx 17: MOV TEMP[2].w, IMM[1].yyyy 18: MOV TEMP[2].x, TEMP[1].xxxx 19: MOV TEMP[2].y, TEMP[1].yyyy 20: MOV TEMP[2].z, TEMP[1].zzzz 21: MOV OUT[0], TEMP[3] 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %41 = bitcast float %32 to i32 %42 = bitcast float %33 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %29, <16 x i8> %31, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %25, %46 %51 = fmul float %26, %47 %52 = fmul float %27, %48 %53 = fmul float %50, %37 %54 = fmul float %51, %38 %55 = fmul float %52, %39 %56 = fmul float %49, %40 %57 = fmul float %34, %34 %58 = fmul float %35, %35 %59 = fadd float %58, %57 %60 = fmul float %36, %36 %61 = fadd float %59, %60 %62 = call float @llvm.AMDGPU.rsq.clamped.f32(float %61) %63 = fmul float %34, %62 %64 = fmul float %35, %62 %65 = fmul float %36, %62 %66 = fmul float %63, 5.000000e-01 %67 = fadd float %66, 5.000000e-01 %68 = fmul float %64, 5.000000e-01 %69 = fadd float %68, 5.000000e-01 %70 = fmul float %65, 5.000000e-01 %71 = fadd float %70, 5.000000e-01 %72 = fmul float %40, %56 %73 = fmul float %72, %24 %74 = call i32 @llvm.SI.packf16(float %53, float %54) %75 = bitcast i32 %74 to float %76 = call i32 @llvm.SI.packf16(float %55, float %73) %77 = bitcast i32 %76 to float %78 = call i32 @llvm.SI.packf16(float %67, float %69) %79 = bitcast i32 %78 to float %80 = call i32 @llvm.SI.packf16(float %71, float 0.000000e+00) %81 = bitcast i32 %80 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %75, float %77, float %75, float %77) %82 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %83 = bitcast i32 %82 to float %84 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %73) %85 = bitcast i32 %84 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %79, float %81, float %79, float %81) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %83, float %85, float %83, float %85) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v10 ; 10021404 v_mul_f32_e32 v2, s5, v11 ; 10041605 v_mul_f32_e32 v3, s6, v12 ; 10061806 v_mul_f32_e32 v10, v0, v13 ; 10141B00 v_mul_f32_e32 v11, v4, v4 ; 10160904 v_mac_f32_e32 v11, v5, v5 ; 3E160B05 v_mac_f32_e32 v11, v6, v6 ; 3E160D06 v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v2, v8, v2 ; 10040508 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mul_f32_e32 v5, v11, v5 ; 100A0B0B v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_mad_f32 v5, 0.5, v5, 0.5 ; D2820005 03C20AF0 v_mad_f32 v6, 0.5, v6, 0.5 ; D2820006 03C20CF0 v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mul_f32_e32 v0, s0, v0 ; 10000000 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_cvt_pkrtz_f16_f32_e32 v2, v3, v0 ; 5E040103 v_cvt_pkrtz_f16_f32_e32 v3, v4, v5 ; 5E060B04 v_cvt_pkrtz_f16_f32_e64 v4, v6, 0 ; D25E0004 00010106 exp 15, 0, 1, 0, 0, v1, v2, v1, v2 ; F800040F 02010201 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e64 v1, 0, 0 ; D25E0001 00010080 v_cvt_pkrtz_f16_f32_e32 v0, 0, v0 ; 5E000080 exp 15, 1, 1, 0, 0, v3, v4, v3, v4 ; F800041F 04030403 exp 15, 2, 1, 1, 1, v1, v0, v1, v0 ; F8001C2F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x7 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[2], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..3] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..4], LOCAL IMM[0] UINT32 {0, 16, 4, 48} IMM[1] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} IMM[2] UINT32 {44, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].w, TEMP[0].wwww 3: MUL TEMP[1].xyz, CONST[1][1].xyzz, TEMP[0].xyzz 4: MUL TEMP[0], TEMP[1], IN[2] 5: MOV TEMP[1].xy, IN[0].xyyy 6: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D 7: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz 8: RSQ TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx 10: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, IMM[1].xxxx 11: MUL TEMP[3].x, IN[2].wwww, TEMP[0].wwww 12: MUL TEMP[3].x, TEMP[3].xxxx, CONST[1][0].yyyy 13: MOV TEMP[4].x, TEMP[0].xxxx 14: MOV TEMP[4].y, TEMP[0].yyyy 15: MOV TEMP[4].z, TEMP[0].zzzz 16: MOV TEMP[4].w, TEMP[3].xxxx 17: MUL TEMP[0].x, CONST[1][3].xxxx, TEMP[1].xxxx 18: MOV TEMP[0].y, TEMP[1].yyyy 19: MUL TEMP[1].x, CONST[1][2].wwww, TEMP[1].zzzz 20: MOV TEMP[0].z, TEMP[1].xxxx 21: MOV TEMP[0].w, TEMP[3].xxxx 22: MOV TEMP[1].w, IMM[1].yyyy 23: MOV TEMP[1].x, TEMP[2].xxxx 24: MOV TEMP[1].y, TEMP[2].yyyy 25: MOV TEMP[1].z, TEMP[2].zzzz 26: MOV OUT[0], TEMP[4] 27: MOV OUT[1], TEMP[1] 28: MOV OUT[2], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0 %32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0 %34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)* %36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0 %37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)* %39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %49 = bitcast float %40 to i32 %50 = bitcast float %41 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %25, %54 %59 = fmul float %26, %55 %60 = fmul float %27, %56 %61 = fmul float %58, %45 %62 = fmul float %59, %46 %63 = fmul float %60, %47 %64 = fmul float %57, %48 %65 = bitcast float %40 to i32 %66 = bitcast float %41 to i32 %67 = insertelement <2 x i32> undef, i32 %65, i32 0 %68 = insertelement <2 x i32> %67, i32 %66, i32 1 %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %36, <16 x i8> %39, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = fmul float %42, %42 %74 = fmul float %43, %43 %75 = fadd float %74, %73 %76 = fmul float %44, %44 %77 = fadd float %75, %76 %78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77) %79 = fmul float %42, %78 %80 = fmul float %43, %78 %81 = fmul float %44, %78 %82 = fmul float %79, 5.000000e-01 %83 = fadd float %82, 5.000000e-01 %84 = fmul float %80, 5.000000e-01 %85 = fadd float %84, 5.000000e-01 %86 = fmul float %81, 5.000000e-01 %87 = fadd float %86, 5.000000e-01 %88 = fmul float %48, %64 %89 = fmul float %88, %24 %90 = fmul float %29, %70 %91 = fmul float %28, %72 %92 = call i32 @llvm.SI.packf16(float %61, float %62) %93 = bitcast i32 %92 to float %94 = call i32 @llvm.SI.packf16(float %63, float %89) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %83, float %85) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %87, float 0.000000e+00) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %93, float %95, float %93, float %95) %100 = call i32 @llvm.SI.packf16(float %90, float %71) %101 = bitcast i32 %100 to float %102 = call i32 @llvm.SI.packf16(float %91, float %89) %103 = bitcast i32 %102 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %97, float %99, float %97, float %99) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 2, i32 1, float %101, float %103, float %101, float %103) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600 v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_buffer_load_dword s32, s[0:3], 0xb ; C210010B v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440A02 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800700 00660102 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_mul_f32_e32 v10, s4, v10 ; 10141404 v_mul_f32_e32 v11, s5, v11 ; 10161605 v_mul_f32_e32 v12, s6, v12 ; 10181806 v_mul_f32_e32 v13, v0, v13 ; 101A1B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, s7, v1 ; 10020207 v_mul_f32_e32 v3, s32, v3 ; 10060620 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 v_mul_f32_e32 v2, v7, v10 ; 10041507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v8, v9, v12 ; 10101909 v_mul_f32_e32 v0, v13, v0 ; 1000010D v_mul_f32_e32 v0, s0, v0 ; 10000000 v_mul_f32_e32 v9, v4, v4 ; 10120904 v_mac_f32_e32 v9, v5, v5 ; 3E120B05 v_mac_f32_e32 v9, v6, v6 ; 3E120D06 v_rsq_clamp_f32_e32 v9, v9 ; 7E125909 v_cvt_pkrtz_f16_f32_e32 v2, v2, v7 ; 5E040F02 v_cvt_pkrtz_f16_f32_e32 v7, v8, v0 ; 5E0E0108 exp 15, 0, 1, 0, 0, v2, v7, v2, v7 ; F800040F 07020702 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, v9, v4 ; 10040909 v_mul_f32_e32 v4, v9, v5 ; 10080B09 v_mul_f32_e32 v5, v9, v6 ; 100A0D09 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_cvt_pkrtz_f16_f32_e32 v2, v2, v4 ; 5E040902 v_mad_f32 v4, 0.5, v5, 0.5 ; D2820004 03C20AF0 v_cvt_pkrtz_f16_f32_e64 v4, v4, 0 ; D25E0004 00010104 exp 15, 1, 1, 0, 0, v2, v4, v2, v4 ; F800041F 04020402 v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103 exp 15, 2, 1, 1, 1, v1, v0, v1, v0 ; F8001C2F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 308 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0xB last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL IN[3], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL OUT[2], COLOR[2] DCL OUT[3], COLOR[3] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[5] DCL CONST[1][0..17] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..10], LOCAL IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 2.0000} IMM[1] UINT32 {0, 156, 176, 160} IMM[2] FLT32 { -1.0000, 0.1000, 0.3110, 1120.0000} IMM[3] UINT32 {164, 168, 152, 3} IMM[4] UINT32 {348, 180, 184, 172} IMM[5] UINT32 {400, 304, 44, 60} IMM[6] UINT32 {272, 208, 236, 448} IMM[7] UINT32 {224, 204, 32, 48} IMM[8] FLT32 { -0.0500, 20.0000, 3.5000, 0.0000} IMM[9] UINT32 {64, 240, 244, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[5].xxxx, CONST[5].yyyy 2: MOV TEMP[1].w, IMM[0].xxxx 3: MOV TEMP[1].x, IN[2].wwww 4: MOV TEMP[1].y, IN[2].wwww 5: MOV TEMP[1].z, IN[2].wwww 6: DP3 TEMP[2].x, IN[3].xyzz, IN[3].xyzz 7: RSQ TEMP[2].x, TEMP[2].xxxx 8: MUL TEMP[2].xyz, IN[3].xyzz, TEMP[2].xxxx 9: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy, IMM[0].yyyy 10: MOV TEMP[3].w, IMM[0].zzzz 11: MOV TEMP[3].x, TEMP[2].xxxx 12: MOV TEMP[3].y, TEMP[2].yyyy 13: MOV TEMP[3].z, TEMP[2].zzzz 14: MUL TEMP[2].xy, CONST[1][9].wwww, IN[2].xyzz 15: MOV TEMP[4].x, TEMP[2].xxxx 16: MOV TEMP[4].y, -TEMP[2].yyyy 17: MUL TEMP[2].xy, CONST[1][11].xxxx, TEMP[4].xyyy 18: MUL TEMP[5].xy, CONST[1][10].xxxx, IN[1].xyyy 19: MOV TEMP[5].xy, TEMP[5].xyyy 20: TEX TEMP[5].xy, TEMP[5], SAMP[1], 2D 21: MAD TEMP[5].xy, TEMP[5].xyyy, IMM[0].wwww, IMM[2].xxxx 22: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[1][10].yyyy 23: MOV TEMP[4].xy, TEMP[4].xyyy 24: TEX TEMP[4].y, TEMP[4], SAMP[0], 2D 25: MUL TEMP[6].x, CONST[1][9].zzzz, CONST[4][21].wwww 26: MUL TEMP[7].x, CONST[1][11].yyyy, IMM[0].wwww 27: RCP TEMP[7].x, TEMP[7].xxxx 28: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 29: MAD TEMP[4].x, TEMP[4].yyyy, CONST[1][10].zzzz, TEMP[6].xxxx 30: FRC TEMP[6].x, TEMP[4].xxxx 31: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].yyyy 32: FRC TEMP[8].x, TEMP[7].xxxx 33: MUL TEMP[9].xy, CONST[1][11].zzzz, TEMP[5].xyyy 34: MUL TEMP[10].x, TEMP[5].yyyy, TEMP[5].yyyy 35: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx, TEMP[10].xxxx 36: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy 37: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][0].xxxx 38: MAD TEMP[10].x, IMM[0].wwww, TEMP[6].xxxx, IMM[2].xxxx 39: ABS TEMP[10].x, TEMP[10].xxxx 40: POW TEMP[10].x, TEMP[10].xxxx, CONST[1][10].wwww 41: FLR TEMP[7].x, TEMP[7].xxxx 42: MAD TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx, IMM[0].yyyy 43: ADD TEMP[7].xy, TEMP[7].xxxx, TEMP[2].xyyy 44: MAD TEMP[7].xy, TEMP[9].xyyy, TEMP[8].xxxx, TEMP[7].xyyy 45: MOV TEMP[7].xy, TEMP[7].xyyy 46: TEX TEMP[7].yw, TEMP[7], SAMP[2], 2D 47: MAD TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx, IMM[2].xxxx 48: ABS TEMP[8].x, TEMP[8].xxxx 49: POW TEMP[8].x, TEMP[8].xxxx, CONST[1][10].wwww 50: FLR TEMP[4].x, TEMP[4].xxxx 51: MUL TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz 52: ADD TEMP[2].xy, TEMP[4].xxxx, TEMP[2].xyyy 53: MAD TEMP[2].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[2].xyyy 54: MOV TEMP[2].xy, TEMP[2].xyyy 55: TEX TEMP[2].yw, TEMP[2], SAMP[2], 2D 56: MUL TEMP[2].xy, TEMP[8].xxxx, TEMP[2].wyyy 57: MAD TEMP[2].xy, TEMP[10].xxxx, TEMP[7].wyyy, TEMP[2].xyyy 58: MAD TEMP[2].xy, IMM[0].wwww, TEMP[2].xyyy, IMM[2].xxxx 59: MUL TEMP[2].xy, TEMP[5].xxxx, TEMP[2].xyyy 60: MOV TEMP[4].xy, TEMP[2].xyxx 61: DP2 TEMP[5].x, TEMP[2].xyyy, TEMP[2].xyyy 62: RSQ TEMP[5].x, TEMP[5].xxxx 63: MUL TEMP[5].xy, TEMP[2].xyyy, TEMP[5].xxxx 64: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 65: FSLT TEMP[2].x, IMM[0].xxxx, TEMP[2].xxxx 66: UIF TEMP[2].xxxx :2 67: MOV TEMP[2].xy, TEMP[5].xyzx 68: ELSE :2 69: MOV TEMP[2].xy, TEMP[4].xyzx 70: ENDIF 71: MOV TEMP[4].xy, TEMP[2].xyxx 72: DP2 TEMP[5].x, TEMP[2].xyyy, TEMP[2].xyyy 73: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx 74: MOV_SAT TEMP[5].x, TEMP[5].xxxx 75: SQRT TEMP[5].x, TEMP[5].xxxx 76: MOV TEMP[4].z, TEMP[5].xxxx 77: ADD TEMP[5].xy, TEMP[0].xyyy, IMM[0].yyyy 78: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[4][25].xyyy 79: MOV TEMP[6].xy, TEMP[5].xyyy 80: TEX TEMP[6].w, TEMP[6], SAMP[3], 2D 81: ADD TEMP[7].xyz, IN[2].xyzz, -CONST[4][19].xyzz 82: MOV TEMP[8].xyz, -TEMP[7].xyzx 83: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz 84: SQRT TEMP[8].x, TEMP[8].xxxx 85: MUL TEMP[9].x, CONST[1][2].wwww, TEMP[6].wwww 86: MUL TEMP[9].xy, TEMP[9].xxxx, TEMP[2].xyyy 87: MUL TEMP[9].xy, IMM[2].wwww, TEMP[9].xyyy 88: RCP TEMP[10].xy, TEMP[8].xxxx 89: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[10].xyyy, TEMP[5].xyyy 90: MUL TEMP[10].x, CONST[1][3].wwww, TEMP[6].wwww 91: MUL TEMP[2].xy, TEMP[10].xxxx, TEMP[2].xyyy 92: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[2].wwww 93: RCP TEMP[8].xy, TEMP[8].xxxx 94: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[8].xyyy 95: ADD TEMP[2].xy, TEMP[5].xyyy, -TEMP[2].xyyy 96: MOV TEMP[5].x, TEMP[9].xxxx 97: MAX TEMP[8].x, TEMP[9].yyyy, CONST[1][17].yyyy 98: MIN TEMP[8].x, TEMP[8].xxxx, CONST[1][17].wwww 99: MOV TEMP[5].y, TEMP[8].xxxx 100: MOV TEMP[8].x, TEMP[2].xxxx 101: MAX TEMP[2].x, TEMP[2].yyyy, CONST[1][17].yyyy 102: MIN TEMP[2].x, TEMP[2].xxxx, CONST[1][17].wwww 103: MOV TEMP[8].y, TEMP[2].xxxx 104: MOV TEMP[2].xy, TEMP[8].xyyy 105: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 106: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 107: RSQ TEMP[8].x, TEMP[8].xxxx 108: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 109: MOV TEMP[7].xyz, -TEMP[7].xyzx 110: DP3 TEMP[8].x, TEMP[4].xyzz, TEMP[7].xyzz 111: MUL TEMP[4].xyz, TEMP[8].xxxx, TEMP[4].xyzz 112: MUL TEMP[4].xyz, IMM[0].wwww, TEMP[4].xyzz 113: ADD TEMP[4].xyz, TEMP[7].xyzz, -TEMP[4].xyzz 114: DP3 TEMP[4].x, CONST[1][13].xyzz, TEMP[4].xyzz 115: MOV_SAT TEMP[4].x, TEMP[4].xxxx 116: POW TEMP[4].x, TEMP[4].xxxx, CONST[1][14].wwww 117: MUL TEMP[7].xyz, CONST[4][28].xyzz, CONST[1][14].xyzz 118: ADD TEMP[8].x, TEMP[6].wwww, IMM[8].xxxx 119: MUL TEMP[8].x, TEMP[8].xxxx, IMM[8].yyyy 120: MOV_SAT TEMP[8].x, TEMP[8].xxxx 121: MUL TEMP[8].x, CONST[1][12].wwww, TEMP[8].xxxx 122: MUL TEMP[9].x, IMM[0].wwww, TEMP[6].wwww 123: MOV_SAT TEMP[9].x, TEMP[9].xxxx 124: MOV TEMP[5].xy, TEMP[5].xyyy 125: TEX TEMP[5].xyz, TEMP[5], SAMP[4], 2D 126: MUL TEMP[10].xyz, CONST[4][28].xyzz, CONST[1][2].xyzz 127: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[10].xyzz 128: MUL TEMP[5].xyz, TEMP[9].xxxx, TEMP[5].xyzz 129: MUL TEMP[9].xyz, CONST[1][3].xyzz, TEMP[2].xyzz 130: MUL TEMP[10].x, TEMP[6].wwww, IMM[8].zzzz 131: MOV_SAT TEMP[10].x, TEMP[10].xxxx 132: LRP TEMP[2].xyz, TEMP[10].xxxx, TEMP[9].xyzz, TEMP[2].xyzz 133: LRP TEMP[2].xyz, TEMP[6].wwww, CONST[1][4].xyzz, TEMP[2].xyzz 134: MAD TEMP[2].xyz, TEMP[8].xxxx, TEMP[5].xyzz, TEMP[2].xyzz 135: MAD TEMP[2].xyz, TEMP[4].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 136: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz 137: MOV TEMP[5].w, IMM[0].zzzz 138: MOV TEMP[5].x, TEMP[2].xxxx 139: MOV TEMP[5].y, TEMP[2].yyyy 140: MOV TEMP[5].z, TEMP[2].zzzz 141: MOV TEMP[2].x, IMM[0].zzzz 142: MOV TEMP[6].z, IMM[0].zzzz 143: MOV TEMP[6].x, CONST[1][15].xxxx 144: POW TEMP[4].x, TEMP[4].xxxx, CONST[1][15].yyyy 145: MOV TEMP[6].y, TEMP[4].xxxx 146: MOV TEMP[2].yzw, TEMP[6].yxyz 147: MOV OUT[2], TEMP[1] 148: MOV OUT[0], TEMP[5] 149: MOV OUT[3], TEMP[3] 150: MOV OUT[1], TEMP[2] 151: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 40) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 44) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 56) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 60) %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 64) %38 = call float @llvm.SI.load.const(<16 x i8> %27, i32 68) %39 = call float @llvm.SI.load.const(<16 x i8> %27, i32 72) %40 = call float @llvm.SI.load.const(<16 x i8> %27, i32 152) %41 = call float @llvm.SI.load.const(<16 x i8> %27, i32 156) %42 = call float @llvm.SI.load.const(<16 x i8> %27, i32 160) %43 = call float @llvm.SI.load.const(<16 x i8> %27, i32 164) %44 = call float @llvm.SI.load.const(<16 x i8> %27, i32 168) %45 = call float @llvm.SI.load.const(<16 x i8> %27, i32 172) %46 = call float @llvm.SI.load.const(<16 x i8> %27, i32 176) %47 = call float @llvm.SI.load.const(<16 x i8> %27, i32 180) %48 = call float @llvm.SI.load.const(<16 x i8> %27, i32 184) %49 = call float @llvm.SI.load.const(<16 x i8> %27, i32 204) %50 = call float @llvm.SI.load.const(<16 x i8> %27, i32 208) %51 = call float @llvm.SI.load.const(<16 x i8> %27, i32 212) %52 = call float @llvm.SI.load.const(<16 x i8> %27, i32 216) %53 = call float @llvm.SI.load.const(<16 x i8> %27, i32 224) %54 = call float @llvm.SI.load.const(<16 x i8> %27, i32 228) %55 = call float @llvm.SI.load.const(<16 x i8> %27, i32 232) %56 = call float @llvm.SI.load.const(<16 x i8> %27, i32 236) %57 = call float @llvm.SI.load.const(<16 x i8> %27, i32 240) %58 = call float @llvm.SI.load.const(<16 x i8> %27, i32 244) %59 = call float @llvm.SI.load.const(<16 x i8> %27, i32 276) %60 = call float @llvm.SI.load.const(<16 x i8> %27, i32 284) %61 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = call float @llvm.SI.load.const(<16 x i8> %62, i32 304) %64 = call float @llvm.SI.load.const(<16 x i8> %62, i32 308) %65 = call float @llvm.SI.load.const(<16 x i8> %62, i32 312) %66 = call float @llvm.SI.load.const(<16 x i8> %62, i32 348) %67 = call float @llvm.SI.load.const(<16 x i8> %62, i32 400) %68 = call float @llvm.SI.load.const(<16 x i8> %62, i32 404) %69 = call float @llvm.SI.load.const(<16 x i8> %62, i32 448) %70 = call float @llvm.SI.load.const(<16 x i8> %62, i32 452) %71 = call float @llvm.SI.load.const(<16 x i8> %62, i32 456) %72 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %73 = load <32 x i8>, <32 x i8> addrspace(2)* %72, align 32, !tbaa !0 %74 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %77 = bitcast <8 x i32> addrspace(2)* %76 to <32 x i8> addrspace(2)* %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0 %79 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %80 = bitcast <4 x i32> addrspace(2)* %79 to <16 x i8> addrspace(2)* %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0 %84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0 %86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %87 = load <8 x i32>, <8 x i32> addrspace(2)* %86, align 32, !tbaa !0 %88 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %89 = load <4 x i32>, <4 x i32> addrspace(2)* %88, align 16, !tbaa !0 %90 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %91 = bitcast <8 x i32> addrspace(2)* %90 to <32 x i8> addrspace(2)* %92 = load <32 x i8>, <32 x i8> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %94 = bitcast <4 x i32> addrspace(2)* %93 to <16 x i8> addrspace(2)* %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %105 = fmul float %24, %15 %106 = fadd float %105, %25 %107 = fmul float %102, %102 %108 = fmul float %103, %103 %109 = fadd float %108, %107 %110 = fmul float %104, %104 %111 = fadd float %109, %110 %112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %111) %113 = fmul float %102, %112 %114 = fmul float %103, %112 %115 = fmul float %104, %112 %116 = fmul float %113, 5.000000e-01 %117 = fadd float %116, 5.000000e-01 %118 = fmul float %114, 5.000000e-01 %119 = fadd float %118, 5.000000e-01 %120 = fmul float %115, 5.000000e-01 %121 = fadd float %120, 5.000000e-01 %122 = fmul float %41, %98 %123 = fmul float %41, %99 %124 = fsub float -0.000000e+00, %123 %125 = fmul float %46, %122 %126 = fmul float %46, %124 %127 = fmul float %42, %96 %128 = fmul float %42, %97 %129 = bitcast float %127 to i32 %130 = bitcast float %128 to i32 %131 = insertelement <2 x i32> undef, i32 %129, i32 0 %132 = insertelement <2 x i32> %131, i32 %130, i32 1 %133 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %132, <32 x i8> %78, <16 x i8> %81, i32 2) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = fmul float %134, 2.000000e+00 %137 = fadd float %136, -1.000000e+00 %138 = fmul float %135, 2.000000e+00 %139 = fadd float %138, -1.000000e+00 %140 = fmul float %122, %43 %141 = fmul float %43, %124 %142 = bitcast float %140 to i32 %143 = bitcast float %141 to i32 %144 = insertelement <2 x i32> undef, i32 %142, i32 0 %145 = insertelement <2 x i32> %144, i32 %143, i32 1 %146 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %145, <32 x i8> %73, <16 x i8> %75, i32 2) %147 = extractelement <4 x float> %146, i32 1 %148 = fmul float %40, %66 %149 = fmul float %47, 2.000000e+00 %150 = fdiv float 1.000000e+00, %149 %151 = fmul float %148, %150 %152 = fmul float %147, %44 %153 = fadd float %152, %151 %154 = call float @llvm.AMDIL.fraction.(float %153) %155 = fadd float %153, 5.000000e-01 %156 = call float @llvm.AMDIL.fraction.(float %155) %157 = fmul float %48, %137 %158 = fmul float %48, %139 %159 = fmul float %139, %139 %160 = fmul float %137, %137 %161 = fadd float %160, %159 %162 = fadd float %161, 0x3FB99999A0000000 %163 = fmul float %162, %28 %164 = fmul float %154, 2.000000e+00 %165 = fadd float %164, -1.000000e+00 %166 = call float @fabs(float %165) %167 = call float @llvm.pow.f32(float %166, float %45) %168 = call float @floor(float %155) %169 = fmul float %168, 0x3FD3E76C80000000 %170 = fadd float %169, 5.000000e-01 %171 = fadd float %170, %125 %172 = fadd float %170, %126 %173 = fmul float %157, %156 %174 = fadd float %173, %171 %175 = fmul float %158, %156 %176 = fadd float %175, %172 %177 = bitcast float %174 to i32 %178 = bitcast float %176 to i32 %179 = insertelement <2 x i32> undef, i32 %177, i32 0 %180 = insertelement <2 x i32> %179, i32 %178, i32 1 %181 = bitcast <8 x i32> %83 to <32 x i8> %182 = bitcast <4 x i32> %85 to <16 x i8> %183 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %180, <32 x i8> %181, <16 x i8> %182, i32 2) %184 = extractelement <4 x float> %183, i32 1 %185 = extractelement <4 x float> %183, i32 3 %186 = fmul float %156, 2.000000e+00 %187 = fadd float %186, -1.000000e+00 %188 = call float @fabs(float %187) %189 = call float @llvm.pow.f32(float %188, float %45) %190 = call float @floor(float %153) %191 = fmul float %190, 0x3FD3E76C80000000 %192 = fadd float %191, %125 %193 = fadd float %191, %126 %194 = fmul float %157, %154 %195 = fadd float %194, %192 %196 = fmul float %158, %154 %197 = fadd float %196, %193 %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = insertelement <2 x i32> undef, i32 %198, i32 0 %201 = insertelement <2 x i32> %200, i32 %199, i32 1 %202 = bitcast <8 x i32> %83 to <32 x i8> %203 = bitcast <4 x i32> %85 to <16 x i8> %204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %201, <32 x i8> %202, <16 x i8> %203, i32 2) %205 = extractelement <4 x float> %204, i32 1 %206 = extractelement <4 x float> %204, i32 3 %207 = fmul float %189, %206 %208 = fmul float %189, %205 %209 = fmul float %167, %185 %210 = fadd float %209, %207 %211 = fmul float %167, %184 %212 = fadd float %211, %208 %213 = fmul float %210, 2.000000e+00 %214 = fadd float %213, -1.000000e+00 %215 = fmul float %212, 2.000000e+00 %216 = fadd float %215, -1.000000e+00 %217 = fmul float %163, %214 %218 = fmul float %163, %216 %219 = fmul float %217, %217 %220 = fmul float %218, %218 %221 = fadd float %219, %220 %222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221) %223 = fmul float %217, %222 %224 = fmul float %218, %222 %225 = fmul float %217, %217 %226 = fmul float %218, %218 %227 = fadd float %225, %226 %228 = fcmp ogt float %227, 1.000000e+00 %. = select i1 %228, float %223, float %217 %.44 = select i1 %228, float %224, float %218 %229 = fmul float %., %. %230 = fmul float %.44, %.44 %231 = fadd float %229, %230 %232 = fsub float 1.000000e+00, %231 %233 = call float @llvm.AMDIL.clamp.(float %232, float 0.000000e+00, float 1.000000e+00) %234 = call float @llvm.sqrt.f32(float %233) %235 = fadd float %14, 5.000000e-01 %236 = fadd float %106, 5.000000e-01 %237 = fmul float %235, %67 %238 = fmul float %236, %68 %239 = bitcast float %237 to i32 %240 = bitcast float %238 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = bitcast <8 x i32> %87 to <32 x i8> %244 = bitcast <4 x i32> %89 to <16 x i8> %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2) %246 = extractelement <4 x float> %245, i32 3 %247 = fsub float %98, %63 %248 = fsub float %99, %64 %249 = fsub float %100, %65 %250 = fmul float %247, %247 %251 = fmul float %248, %248 %252 = fadd float %251, %250 %253 = fmul float %249, %249 %254 = fadd float %252, %253 %255 = call float @llvm.sqrt.f32(float %254) %256 = fmul float %32, %246 %257 = fmul float %256, %. %258 = fmul float %256, %.44 %259 = fmul float %257, 1.120000e+03 %260 = fmul float %258, 1.120000e+03 %261 = fdiv float 1.000000e+00, %255 %262 = fmul float %259, %261 %263 = fadd float %262, %237 %264 = fmul float %260, %261 %265 = fadd float %264, %238 %266 = fmul float %36, %246 %267 = fmul float %266, %. %268 = fmul float %266, %.44 %269 = fmul float %267, 1.120000e+03 %270 = fmul float %268, 1.120000e+03 %271 = fdiv float 1.000000e+00, %255 %272 = fmul float %269, %271 %273 = fmul float %270, %271 %274 = fsub float %237, %272 %275 = fsub float %238, %273 %276 = call float @llvm.maxnum.f32(float %265, float %59) %277 = call float @llvm.minnum.f32(float %276, float %60) %278 = call float @llvm.maxnum.f32(float %275, float %59) %279 = call float @llvm.minnum.f32(float %278, float %60) %280 = bitcast float %274 to i32 %281 = bitcast float %279 to i32 %282 = insertelement <2 x i32> undef, i32 %280, i32 0 %283 = insertelement <2 x i32> %282, i32 %281, i32 1 %284 = bitcast <8 x i32> %87 to <32 x i8> %285 = bitcast <4 x i32> %89 to <16 x i8> %286 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %283, <32 x i8> %284, <16 x i8> %285, i32 2) %287 = extractelement <4 x float> %286, i32 0 %288 = extractelement <4 x float> %286, i32 1 %289 = extractelement <4 x float> %286, i32 2 %290 = fmul float %247, %247 %291 = fmul float %248, %248 %292 = fadd float %291, %290 %293 = fmul float %249, %249 %294 = fadd float %292, %293 %295 = call float @llvm.AMDGPU.rsq.clamped.f32(float %294) %296 = fmul float %247, %295 %297 = fmul float %248, %295 %298 = fmul float %249, %295 %299 = fmul float %296, %. %300 = fsub float -0.000000e+00, %299 %301 = fmul float %297, %.44 %302 = fsub float %300, %301 %303 = fmul float %298, %234 %304 = fsub float %302, %303 %305 = fmul float %304, %. %306 = fmul float %304, %.44 %307 = fmul float %304, %234 %308 = fmul float %305, 2.000000e+00 %309 = fmul float %306, 2.000000e+00 %310 = fmul float %307, 2.000000e+00 %311 = fsub float -0.000000e+00, %308 %312 = fsub float %311, %296 %313 = fsub float -0.000000e+00, %309 %314 = fsub float %313, %297 %315 = fsub float -0.000000e+00, %310 %316 = fsub float %315, %298 %317 = fmul float %50, %312 %318 = fmul float %51, %314 %319 = fadd float %318, %317 %320 = fmul float %52, %316 %321 = fadd float %319, %320 %322 = call float @llvm.AMDIL.clamp.(float %321, float 0.000000e+00, float 1.000000e+00) %323 = call float @llvm.pow.f32(float %322, float %56) %324 = fmul float %69, %53 %325 = fmul float %70, %54 %326 = fmul float %71, %55 %327 = fadd float %246, 0xBFA99999A0000000 %328 = fmul float %327, 2.000000e+01 %329 = call float @llvm.AMDIL.clamp.(float %328, float 0.000000e+00, float 1.000000e+00) %330 = fmul float %49, %329 %331 = fmul float %246, 2.000000e+00 %332 = call float @llvm.AMDIL.clamp.(float %331, float 0.000000e+00, float 1.000000e+00) %333 = bitcast float %263 to i32 %334 = bitcast float %277 to i32 %335 = insertelement <2 x i32> undef, i32 %333, i32 0 %336 = insertelement <2 x i32> %335, i32 %334, i32 1 %337 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %336, <32 x i8> %92, <16 x i8> %95, i32 2) %338 = extractelement <4 x float> %337, i32 0 %339 = extractelement <4 x float> %337, i32 1 %340 = extractelement <4 x float> %337, i32 2 %341 = fmul float %69, %29 %342 = fmul float %70, %30 %343 = fmul float %71, %31 %344 = fmul float %338, %341 %345 = fmul float %339, %342 %346 = fmul float %340, %343 %347 = fmul float %332, %344 %348 = fmul float %332, %345 %349 = fmul float %332, %346 %350 = fmul float %33, %287 %351 = fmul float %34, %288 %352 = fmul float %35, %289 %353 = fmul float %246, 3.500000e+00 %354 = call float @llvm.AMDIL.clamp.(float %353, float 0.000000e+00, float 1.000000e+00) %355 = call float @llvm.AMDGPU.lrp(float %354, float %350, float %287) %356 = call float @llvm.AMDGPU.lrp(float %354, float %351, float %288) %357 = call float @llvm.AMDGPU.lrp(float %354, float %352, float %289) %358 = call float @llvm.AMDGPU.lrp(float %246, float %37, float %355) %359 = call float @llvm.AMDGPU.lrp(float %246, float %38, float %356) %360 = call float @llvm.AMDGPU.lrp(float %246, float %39, float %357) %361 = fmul float %330, %347 %362 = fadd float %361, %358 %363 = fmul float %330, %348 %364 = fadd float %363, %359 %365 = fmul float %330, %349 %366 = fadd float %365, %360 %367 = fmul float %323, %324 %368 = fadd float %367, %362 %369 = fmul float %323, %325 %370 = fadd float %369, %364 %371 = fmul float %323, %326 %372 = fadd float %371, %366 %373 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %374 = call float @llvm.AMDIL.clamp.(float %370, float 0.000000e+00, float 1.000000e+00) %375 = call float @llvm.AMDIL.clamp.(float %372, float 0.000000e+00, float 1.000000e+00) %376 = call float @llvm.pow.f32(float %323, float %58) %377 = call i32 @llvm.SI.packf16(float %373, float %374) %378 = bitcast i32 %377 to float %379 = call i32 @llvm.SI.packf16(float %375, float 0.000000e+00) %380 = bitcast i32 %379 to float %381 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %57) %382 = bitcast i32 %381 to float %383 = call i32 @llvm.SI.packf16(float %376, float 0.000000e+00) %384 = bitcast i32 %383 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %378, float %380, float %378, float %380) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %382, float %384, float %382, float %384) %385 = call i32 @llvm.SI.packf16(float %117, float %119) %386 = bitcast i32 %385 to float %387 = call i32 @llvm.SI.packf16(float %121, float 0.000000e+00) %388 = bitcast i32 %387 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 2, i32 0, float %101, float %101, float %101, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 3, i32 1, float %386, float %388, float %386, float %388) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 v_interp_p1_f32 v0, v0, 2, 2, [m0] ; C8000A00 v_interp_p2_f32 v0, [v0], v1, 2, 2, [m0] ; C8010A01 s_load_dwordx4 s[8:11], s[2:3], 0x4 ; C0840304 s_load_dwordx4 s[28:31], s[2:3], 0x10 ; C08E0310 s_load_dwordx4 s[56:59], s[4:5], 0x0 ; C09C0500 s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510 s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700 s_load_dwordx8 s[72:79], s[6:7], 0x8 ; C0E40708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[8:11], 0x29 ; C2020929 s_buffer_load_dword s5, s[8:11], 0x2a ; C202892A s_buffer_load_dword s80, s[8:11], 0x2b ; C228092B s_buffer_load_dword s81, s[8:11], 0x2c ; C228892C s_buffer_load_dword s82, s[8:11], 0x2d ; C229092D s_buffer_load_dword s83, s[8:11], 0x2e ; C229892E s_buffer_load_dword s0, s[8:11], 0x33 ; C2000933 s_buffer_load_dword s3, s[8:11], 0x34 ; C2018934 s_buffer_load_dword s2, s[8:11], 0x35 ; C2010935 s_buffer_load_dword s1, s[8:11], 0x36 ; C2008936 s_buffer_load_dword s20, s[8:11], 0x28 ; C20A0928 s_buffer_load_dword s84, s[8:11], 0x27 ; C22A0927 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s20, v4 ; 10180814 v_mul_f32_e32 v13, s20, v5 ; 101A0A14 s_buffer_load_dword s85, s[8:11], 0x26 ; C22A8926 s_buffer_load_dword s86, s[28:31], 0x57 ; C22B1D57 s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718 s_load_dwordx8 s[20:27], s[6:7], 0x20 ; C0CA0720 image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[72:79], s[68:71] ; F0800300 0232040C v_mul_f32_e32 v1, s84, v6 ; 10020C54 v_mul_f32_e32 v12, s84, v7 ; 10180E54 v_mul_f32_e32 v13, s4, v1 ; 101A0204 v_mul_f32_e64 v14, s4, -v12 ; D210000E 40021804 v_add_f32_e64 v15, s82, s82 ; D206000F 0000A452 v_rcp_f32_e32 v15, v15 ; 7E1E550F image_sample v13, 2, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[60:67], s[56:59] ; F0800200 01CF0D0D s_buffer_load_dword s4, s[28:31], 0x64 ; C2021D64 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v14, s86 ; 7E1C0256 v_mul_f32_e32 v14, s85, v14 ; 101C1C55 v_mul_f32_e32 v16, v15, v14 ; 10201D0F s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v16, s5, v13 ; 3E201A05 v_floor_f32_e32 v16, v16 ; 7E204910 v_mad_f32 v17, v14, v15, -v16 ; D2820011 84421F0E v_mad_f32 v14, v14, v15, 0.5 ; D282000E 03C21F0E v_mac_f32_e32 v17, s5, v13 ; 3E221A05 v_mac_f32_e32 v14, s5, v13 ; 3E1C1A05 v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4 v_mad_f32 v5, 2.0, v5, -1.0 ; D2820005 03CE0AF4 v_mul_f32_e32 v13, s83, v4 ; 101A0853 v_mul_f32_e32 v15, s83, v5 ; 101E0A53 v_mul_f32_e32 v18, s81, v12 ; 10241851 v_floor_f32_e32 v19, v14 ; 7E26490E v_mov_b32_e32 v20, 0x3e9f3b64 ; 7E2802FF 3E9F3B64 v_mad_f32 v21, v19, v20, 0.5 ; D2820015 03C22913 v_mul_f32_e32 v22, s81, v1 ; 102C0251 v_mad_f32 v24, s81, v1, v21 ; D2820018 04560251 v_mad_f32 v25, s81, -v12, v21 ; D2820019 44561851 v_mac_f32_e32 v22, v20, v16 ; 3E2C2114 v_mad_f32 v23, v16, v20, -v18 ; D2820017 844A2910 v_subrev_f32_e32 v1, v19, v14 ; 0A021D13 v_mac_f32_e32 v24, v1, v13 ; 3E301B01 v_mac_f32_e32 v22, v17, v13 ; 3E2C1B11 v_mac_f32_e32 v25, v1, v15 ; 3E321F01 v_mac_f32_e32 v23, v17, v15 ; 3E2E1F11 image_sample v[12:13], 10, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[40:47], s[36:39] ; F0800A00 012A0C18 image_sample v[14:15], 10, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800A00 012A0E16 s_buffer_load_dword s5, s[32:35], 0x14 ; C202A114 s_buffer_load_dword s6, s[32:35], 0x15 ; C2032115 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_and_b32_e32 v1, v1, v16 ; 36022101 v_log_f32_e32 v1, v1 ; 7E024F01 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_and_b32_e32 v16, v17, v16 ; 36202111 v_log_f32_e32 v16, v16 ; 7E204F10 v_mul_legacy_f32_e32 v1, s80, v1 ; 0E020250 v_exp_f32_e32 v1, v1 ; 7E024B01 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, v15, v1 ; 101E030F v_mul_f32_e32 v1, v14, v1 ; 1002030E v_mul_legacy_f32_e32 v14, s80, v16 ; 0E1C2050 s_buffer_load_dword s7, s[8:11], 0x0 ; C2038900 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mac_f32_e32 v15, v13, v14 ; 3E1E1D0D v_mac_f32_e32 v1, v12, v14 ; 3E021D0C v_madak_f32_e32 v5, v5, v5, 0x3dcccccd ; 420A0B05 3DCCCCCD v_mac_f32_e32 v5, v4, v4 ; 3E0A0904 s_buffer_load_dword s32, s[8:11], 0x8 ; C2100908 s_buffer_load_dword s33, s[8:11], 0x9 ; C2108909 s_buffer_load_dword s34, s[8:11], 0xa ; C211090A s_buffer_load_dword s35, s[8:11], 0xb ; C211890B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s7, v5 ; 10080A07 v_mad_f32 v5, 2.0, v15, -1.0 ; D2820005 03CE1EF4 v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4 v_mul_f32_e32 v5, v5, v4 ; 100A0905 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v4, v1, v1 ; 10080301 v_mac_f32_e32 v4, v5, v5 ; 3E080B05 v_rsq_clamp_f32_e32 v12, v4 ; 7E185904 s_buffer_load_dword s7, s[28:31], 0x4c ; C2039D4C s_buffer_load_dword s36, s[28:31], 0x4d ; C2121D4D s_buffer_load_dword s37, s[28:31], 0x4e ; C2129D4E s_buffer_load_dword s38, s[28:31], 0x65 ; C2131D65 s_buffer_load_dword s39, s[28:31], 0x70 ; C2139D70 s_buffer_load_dword s40, s[28:31], 0x71 ; C2141D71 s_buffer_load_dword s28, s[28:31], 0x72 ; C20E1D72 v_mul_f32_e32 v13, v12, v5 ; 101A0B0C s_buffer_load_dword s29, s[8:11], 0xf ; C20E890F v_cmp_lt_f32_e32 vcc, 1.0, v4 ; 7C0208F2 v_cndmask_b32_e32 v4, v5, v13 ; 00081B05 v_mul_f32_e32 v5, v12, v1 ; 100A030C v_cndmask_b32_e32 v1, v1, v5 ; 00020B01 v_mov_b32_e32 v5, s6 ; 7E0A0206 v_mac_f32_e32 v5, s5, v3 ; 3E0A0605 v_add_f32_e32 v3, 0.5, v5 ; 06060AF0 v_mul_f32_e32 v12, s4, v2 ; 10180404 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s38, v3 ; 101A0626 image_sample v5, 8, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[48:55], s[16:19] ; F0800800 008C050C s_buffer_load_dword s5, s[8:11], 0x10 ; C2028910 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v14, s29, v5 ; 101C0A1D v_subrev_f32_e32 v15, s7, v6 ; 0A1E0C07 v_subrev_f32_e32 v16, s36, v7 ; 0A200E24 v_mul_f32_e32 v17, v15, v15 ; 10221F0F v_mac_f32_e32 v17, v16, v16 ; 3E222110 v_subrev_f32_e32 v18, s37, v8 ; 0A241025 v_mac_f32_e32 v17, v18, v18 ; 3E222512 v_rsq_f32_e32 v19, v17 ; 7E265D11 v_mov_b32_e32 v20, 0x448c0000 ; 7E2802FF 448C0000 v_mul_f32_e32 v21, v4, v14 ; 102A1D04 v_mul_f32_e32 v21, v20, v21 ; 102A2B14 v_mul_f32_e32 v21, v19, v21 ; 102A2B13 v_mad_f32 v21, v2, s4, -v21 ; D2820015 84540902 v_mul_f32_e32 v2, s35, v5 ; 10040A23 s_buffer_load_dword s4, s[8:11], 0x45 ; C2020945 s_buffer_load_dword s6, s[8:11], 0x47 ; C2030947 v_mul_f32_e32 v14, v1, v14 ; 101C1D01 v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mad_f32 v3, v3, s38, -v14 ; D2820003 84384D03 v_mul_f32_e32 v14, v4, v2 ; 101C0504 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_mul_f32_e32 v2, v20, v2 ; 10040514 s_waitcnt lgkmcnt(0) ; BF8C007F v_max_f32_e32 v3, s4, v3 ; 20060604 v_min_f32_e32 v22, s6, v3 ; 1E2C0606 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[48:55], s[16:19] ; F0800700 008C1415 v_mac_f32_e32 v12, v19, v14 ; 3E181D13 v_mac_f32_e32 v13, v19, v2 ; 3E1A0513 v_max_f32_e32 v2, s4, v13 ; 20041A04 v_min_f32_e32 v13, s6, v2 ; 1E1A0406 v_mov_b32_e32 v2, s32 ; 7E040220 v_mov_b32_e32 v3, s33 ; 7E060221 v_mov_b32_e32 v14, s34 ; 7E1C0222 v_mul_f32_e32 v2, s39, v2 ; 10040427 v_mul_f32_e32 v3, s40, v3 ; 10060628 v_mul_f32_e32 v14, s28, v14 ; 101C1C1C image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[20:27], s[12:15] ; F0800700 0065170C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v23 ; 10042F02 v_mul_f32_e32 v3, v3, v24 ; 10063103 v_mul_f32_e32 v12, v14, v25 ; 1018330E s_buffer_load_dword s4, s[8:11], 0xc ; C202090C s_buffer_load_dword s6, s[8:11], 0xd ; C203090D s_buffer_load_dword s12, s[8:11], 0xe ; C206090E v_rsq_clamp_f32_e32 v13, v17 ; 7E1A5911 v_mul_f32_e32 v14, 0x40600000, v5 ; 101C0AFF 40600000 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_sub_f32_e32 v17, 1.0, v14 ; 08221CF2 v_mul_f32_e32 v19, v20, v17 ; 10262314 v_mul_f32_e32 v23, v21, v17 ; 102E2315 v_mul_f32_e32 v17, v22, v17 ; 10222316 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s4, v20 ; 10282804 v_mul_f32_e32 v21, s6, v21 ; 102A2A06 v_mul_f32_e32 v22, s12, v22 ; 102C2C0C v_mul_f32_e32 v15, v13, v15 ; 101E1F0D v_mul_f32_e32 v16, v13, v16 ; 1020210D v_mul_f32_e32 v15, v4, v15 ; 101E1F04 v_mad_f32 v15, -v16, v1, -v15 ; D282000F A43E0310 v_mad_f32 v16, -v1, v1, 1.0 ; D2820010 23CA0301 v_mad_f32 v16, -v4, v4, v16 ; D2820010 24420904 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_sqrt_f32_e32 v16, v16 ; 7E206710 v_mul_f32_e32 v18, v13, v18 ; 1024250D v_mad_f32 v15, -v18, v16, v15 ; D282000F 243E2112 v_mul_f32_e32 v18, v4, v15 ; 10241F04 v_mac_f32_e32 v18, v4, v15 ; 3E241F04 v_mul_f32_e32 v4, v1, v15 ; 10081F01 v_mac_f32_e32 v4, v1, v15 ; 3E081F01 v_mul_f32_e32 v1, v16, v15 ; 10021F10 v_mac_f32_e32 v1, v16, v15 ; 3E021F10 v_sub_f32_e32 v6, s7, v6 ; 080C0C07 v_sub_f32_e32 v7, s36, v7 ; 080E0E24 v_sub_f32_e32 v8, s37, v8 ; 08101025 v_mad_f32 v6, v6, v13, -v18 ; D2820006 844A1B06 v_mad_f32 v4, v7, v13, -v4 ; D2820004 84121B07 v_mad_f32 v1, v8, v13, -v1 ; D2820001 84061B08 v_mul_f32_e32 v6, s3, v6 ; 100C0C03 v_mac_f32_e32 v6, s2, v4 ; 3E0C0802 s_buffer_load_dword s2, s[8:11], 0x11 ; C2010911 v_mac_f32_e32 v19, v20, v14 ; 3E261D14 s_buffer_load_dword s3, s[8:11], 0x12 ; C2018912 v_mac_f32_e32 v23, v21, v14 ; 3E2E1D15 v_mac_f32_e32 v17, v22, v14 ; 3E221D16 v_mac_f32_e32 v6, s1, v1 ; 3E0C0201 v_sub_f32_e32 v1, 1.0, v5 ; 08020AF2 v_mul_f32_e32 v4, v19, v1 ; 10080313 v_mac_f32_e32 v4, s5, v5 ; 3E080A05 v_mul_f32_e32 v7, v23, v1 ; 100E0317 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s2, v5 ; 3E0E0A02 v_mul_f32_e32 v1, v17, v1 ; 10020311 v_mac_f32_e32 v1, s3, v5 ; 3E020A03 v_mov_b32_e32 v8, 0xbd4ccccd ; 7E1002FF BD4CCCCD v_add_f32_e32 v8, v5, v8 ; 06101105 v_add_f32_e32 v5, v5, v5 ; 060A0B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mul_f32_e32 v5, v12, v5 ; 100A0B0C v_mul_f32_e32 v8, 0x41a00000, v8 ; 101010FF 41A00000 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 s_buffer_load_dword s1, s[8:11], 0x3b ; C200893B s_buffer_load_dword s2, s[8:11], 0x38 ; C2010938 s_buffer_load_dword s3, s[8:11], 0x39 ; C2018939 v_mul_f32_e32 v8, s0, v8 ; 10101000 v_mac_f32_e32 v4, v2, v8 ; 3E081102 v_add_f32_e64 v2, 0, v6 clamp ; D2060802 00020C80 s_buffer_load_dword s0, s[8:11], 0x3a ; C200093A v_log_f32_e32 v2, v2 ; 7E044F02 v_mac_f32_e32 v7, v3, v8 ; 3E0E1103 v_mac_f32_e32 v1, v5, v8 ; 3E021105 s_buffer_load_dword s4, s[8:11], 0x3c ; C202093C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_legacy_f32_e32 v2, s1, v2 ; 0E040401 v_mov_b32_e32 v3, s2 ; 7E060202 v_mul_f32_e32 v3, s39, v3 ; 10060627 v_mov_b32_e32 v5, s3 ; 7E0A0203 v_mul_f32_e32 v5, s40, v5 ; 100A0A28 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mac_f32_e32 v4, v3, v2 ; 3E080503 v_mac_f32_e32 v7, v5, v2 ; 3E0E0505 v_mov_b32_e32 v3, s0 ; 7E060200 v_mul_f32_e32 v3, s28, v3 ; 1006061C v_mac_f32_e32 v1, v3, v2 ; 3E020503 s_buffer_load_dword s0, s[8:11], 0x3d ; C200093D v_add_f32_e64 v3, 0, v4 clamp ; D2060803 00020880 v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80 v_cvt_pkrtz_f16_f32_e32 v3, v3, v4 ; 5E060903 v_mul_f32_e32 v4, v10, v10 ; 1008150A v_mac_f32_e32 v4, v11, v11 ; 3E08170B v_mac_f32_e32 v4, v0, v0 ; 3E080100 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 0, 0, v3, v1, v3, v1 ; F800040F 01030103 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, v4, v10 ; 10021504 v_mul_f32_e32 v3, v4, v11 ; 10061704 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_log_f32_e32 v2, v2 ; 7E044F02 v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0 v_mad_f32 v3, 0.5, v3, 0.5 ; D2820003 03C206F0 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_mul_legacy_f32_e32 v2, s0, v2 ; 0E040400 v_exp_f32_e32 v2, v2 ; 7E044B02 v_cvt_pkrtz_f16_f32_e64 v4, 0, s4 ; D25E0004 00000880 v_cvt_pkrtz_f16_f32_e64 v2, v2, 0 ; D25E0002 00010102 exp 15, 1, 1, 0, 0, v4, v2, v4, v2 ; F800041F 02040204 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701 v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100 exp 15, 2, 0, 0, 0, v9, v9, v9, v2 ; F800002F 02090909 exp 15, 3, 1, 1, 1, v1, v0, v1, v0 ; F8001C3F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 96 VGPRS: 28 Code Size: 1448 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0..4] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 32, 16, 48} IMM[1] FLT32 { 0.5000, 0.0000, 65535.0000, 0.0039} IMM[2] UINT32 {64, 1, 368, 0} IMM[3] FLT32 { 0.0039, 256.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xy, TEMP[0], SAMP[0], 2D 2: MAD TEMP[1].xy, CONST[1][2].ywww, IMM[1].xxxx, IN[0].xyyy 3: MOV TEMP[2].x, IMM[1].yyyy 4: MOV TEMP[2].y, CONST[1][1].wwww 5: ADD TEMP[2].xy, TEMP[2].xyyy, TEMP[1].xyyy 6: MOV TEMP[2].xy, TEMP[2].xyyy 7: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D 8: MOV TEMP[2].x, TEMP[2].xxxx 9: ADD TEMP[3].xy, CONST[1][1].xwww, TEMP[1].xyyy 10: MOV TEMP[3].xy, TEMP[3].xyyy 11: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 12: MOV TEMP[2].y, TEMP[3].xxxx 13: ADD TEMP[3].xy, CONST[1][1].ywww, TEMP[1].xyyy 14: MOV TEMP[3].xy, TEMP[3].xyyy 15: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 16: MOV TEMP[2].z, TEMP[3].xxxx 17: ADD TEMP[3].xy, CONST[1][1].zwww, TEMP[1].xyyy 18: MOV TEMP[3].xy, TEMP[3].xyyy 19: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 20: MOV TEMP[2].w, TEMP[3].xxxx 21: MOV TEMP[3].x, IMM[1].yyyy 22: MOV TEMP[3].y, CONST[1][2].wwww 23: ADD TEMP[3].xy, TEMP[3].xyyy, TEMP[1].xyyy 24: MOV TEMP[3].xy, TEMP[3].xyyy 25: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D 26: MOV TEMP[3].x, TEMP[3].xxxx 27: ADD TEMP[4].xy, CONST[1][2].xwww, TEMP[1].xyyy 28: MOV TEMP[4].xy, TEMP[4].xyyy 29: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 30: MOV TEMP[3].y, TEMP[4].xxxx 31: ADD TEMP[4].xy, CONST[1][2].ywww, TEMP[1].xyyy 32: MOV TEMP[4].xy, TEMP[4].xyyy 33: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 34: MOV TEMP[3].z, TEMP[4].xxxx 35: ADD TEMP[4].xy, CONST[1][2].zwww, TEMP[1].xyyy 36: MOV TEMP[4].xy, TEMP[4].xyyy 37: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 38: MOV TEMP[3].w, TEMP[4].xxxx 39: MOV TEMP[4].xy, TEMP[1].xyyy 40: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D 41: MOV TEMP[4].x, TEMP[4].xxxx 42: ADD TEMP[5].xy, CONST[1][3].xwww, TEMP[1].xyyy 43: MOV TEMP[5].xy, TEMP[5].xyyy 44: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 45: MOV TEMP[4].y, TEMP[5].xxxx 46: ADD TEMP[5].xy, CONST[1][3].ywww, TEMP[1].xyyy 47: MOV TEMP[5].xy, TEMP[5].xyyy 48: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 49: MOV TEMP[4].z, TEMP[5].xxxx 50: ADD TEMP[5].xy, CONST[1][3].zwww, TEMP[1].xyyy 51: MOV TEMP[5].xy, TEMP[5].xyyy 52: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 53: MOV TEMP[4].w, TEMP[5].xxxx 54: MOV TEMP[5].x, IMM[1].yyyy 55: MOV TEMP[5].y, CONST[1][4].wwww 56: ADD TEMP[5].xy, TEMP[5].xyyy, TEMP[1].xyyy 57: MOV TEMP[5].xy, TEMP[5].xyyy 58: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D 59: MOV TEMP[5].x, TEMP[5].xxxx 60: ADD TEMP[6].xy, CONST[1][4].xwww, TEMP[1].xyyy 61: MOV TEMP[6].xy, TEMP[6].xyyy 62: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 63: MOV TEMP[5].y, TEMP[6].xxxx 64: ADD TEMP[6].xy, CONST[1][4].ywww, TEMP[1].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D 67: MOV TEMP[5].z, TEMP[6].xxxx 68: ADD TEMP[1].xy, CONST[1][4].zwww, TEMP[1].xyyy 69: MOV TEMP[1].xy, TEMP[1].xyyy 70: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 71: MOV TEMP[5].w, TEMP[1].xxxx 72: MIN TEMP[1], TEMP[2], TEMP[3] 73: MIN TEMP[2], TEMP[4], TEMP[5] 74: MIN TEMP[1], TEMP[1], TEMP[2] 75: MIN TEMP[1].xy, TEMP[1].xyyy, TEMP[1].zwww 76: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[1].yyyy 77: RCP TEMP[2].x, CONST[2][23].xxxx 78: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 79: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 80: FLR TEMP[1].x, TEMP[1].xxxx 81: MUL TEMP[2].x, TEMP[1].xxxx, IMM[1].wwww 82: FLR TEMP[2].x, TEMP[2].xxxx 83: MUL TEMP[3].x, TEMP[2].xxxx, IMM[3].xxxx 84: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 85: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx 86: MUL TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx 87: MOV TEMP[3].y, TEMP[1].xxxx 88: MOV TEMP[3].z, TEMP[0].xxxx 89: MOV TEMP[3].w, TEMP[0].yyyy 90: MOV OUT[0], TEMP[3] 91: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %40 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = call float @llvm.SI.load.const(<16 x i8> %41, i32 368) %43 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %48 = load <8 x i32>, <8 x i32> addrspace(2)* %47, align 32, !tbaa !0 %49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !0 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %53 = bitcast float %51 to i32 %54 = bitcast float %52 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %44, <16 x i8> %46, i32 2) %58 = extractelement <4 x float> %57, i32 0 %59 = extractelement <4 x float> %57, i32 1 %60 = fmul float %29, 5.000000e-01 %61 = fadd float %60, %51 %62 = fmul float %31, 5.000000e-01 %63 = fadd float %62, %52 %64 = fadd float %61, 0.000000e+00 %65 = fadd float %27, %63 %66 = bitcast float %64 to i32 %67 = bitcast float %65 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = bitcast <8 x i32> %48 to <32 x i8> %71 = bitcast <4 x i32> %50 to <16 x i8> %72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %70, <16 x i8> %71, i32 2) %73 = extractelement <4 x float> %72, i32 0 %74 = fadd float %24, %61 %75 = fadd float %27, %63 %76 = bitcast float %74 to i32 %77 = bitcast float %75 to i32 %78 = insertelement <2 x i32> undef, i32 %76, i32 0 %79 = insertelement <2 x i32> %78, i32 %77, i32 1 %80 = bitcast <8 x i32> %48 to <32 x i8> %81 = bitcast <4 x i32> %50 to <16 x i8> %82 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %79, <32 x i8> %80, <16 x i8> %81, i32 2) %83 = extractelement <4 x float> %82, i32 0 %84 = fadd float %25, %61 %85 = fadd float %27, %63 %86 = bitcast float %84 to i32 %87 = bitcast float %85 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = bitcast <8 x i32> %48 to <32 x i8> %91 = bitcast <4 x i32> %50 to <16 x i8> %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = fadd float %26, %61 %95 = fadd float %27, %63 %96 = bitcast float %94 to i32 %97 = bitcast float %95 to i32 %98 = insertelement <2 x i32> undef, i32 %96, i32 0 %99 = insertelement <2 x i32> %98, i32 %97, i32 1 %100 = bitcast <8 x i32> %48 to <32 x i8> %101 = bitcast <4 x i32> %50 to <16 x i8> %102 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %100, <16 x i8> %101, i32 2) %103 = extractelement <4 x float> %102, i32 0 %104 = fadd float %61, 0.000000e+00 %105 = fadd float %31, %63 %106 = bitcast float %104 to i32 %107 = bitcast float %105 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = bitcast <8 x i32> %48 to <32 x i8> %111 = bitcast <4 x i32> %50 to <16 x i8> %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %110, <16 x i8> %111, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fadd float %28, %61 %115 = fadd float %31, %63 %116 = bitcast float %114 to i32 %117 = bitcast float %115 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %48 to <32 x i8> %121 = bitcast <4 x i32> %50 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = fadd float %29, %61 %125 = fadd float %31, %63 %126 = bitcast float %124 to i32 %127 = bitcast float %125 to i32 %128 = insertelement <2 x i32> undef, i32 %126, i32 0 %129 = insertelement <2 x i32> %128, i32 %127, i32 1 %130 = bitcast <8 x i32> %48 to <32 x i8> %131 = bitcast <4 x i32> %50 to <16 x i8> %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %129, <32 x i8> %130, <16 x i8> %131, i32 2) %133 = extractelement <4 x float> %132, i32 0 %134 = fadd float %30, %61 %135 = fadd float %31, %63 %136 = bitcast float %134 to i32 %137 = bitcast float %135 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %48 to <32 x i8> %141 = bitcast <4 x i32> %50 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = bitcast float %61 to i32 %145 = bitcast float %63 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = bitcast <8 x i32> %48 to <32 x i8> %149 = bitcast <4 x i32> %50 to <16 x i8> %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %148, <16 x i8> %149, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = fadd float %32, %61 %153 = fadd float %35, %63 %154 = bitcast float %152 to i32 %155 = bitcast float %153 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = bitcast <8 x i32> %48 to <32 x i8> %159 = bitcast <4 x i32> %50 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = fadd float %33, %61 %163 = fadd float %35, %63 %164 = bitcast float %162 to i32 %165 = bitcast float %163 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = bitcast <8 x i32> %48 to <32 x i8> %169 = bitcast <4 x i32> %50 to <16 x i8> %170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %168, <16 x i8> %169, i32 2) %171 = extractelement <4 x float> %170, i32 0 %172 = fadd float %34, %61 %173 = fadd float %35, %63 %174 = bitcast float %172 to i32 %175 = bitcast float %173 to i32 %176 = insertelement <2 x i32> undef, i32 %174, i32 0 %177 = insertelement <2 x i32> %176, i32 %175, i32 1 %178 = bitcast <8 x i32> %48 to <32 x i8> %179 = bitcast <4 x i32> %50 to <16 x i8> %180 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %177, <32 x i8> %178, <16 x i8> %179, i32 2) %181 = extractelement <4 x float> %180, i32 0 %182 = fadd float %61, 0.000000e+00 %183 = fadd float %39, %63 %184 = bitcast float %182 to i32 %185 = bitcast float %183 to i32 %186 = insertelement <2 x i32> undef, i32 %184, i32 0 %187 = insertelement <2 x i32> %186, i32 %185, i32 1 %188 = bitcast <8 x i32> %48 to <32 x i8> %189 = bitcast <4 x i32> %50 to <16 x i8> %190 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = fadd float %36, %61 %193 = fadd float %39, %63 %194 = bitcast float %192 to i32 %195 = bitcast float %193 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %48 to <32 x i8> %199 = bitcast <4 x i32> %50 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = fadd float %37, %61 %203 = fadd float %39, %63 %204 = bitcast float %202 to i32 %205 = bitcast float %203 to i32 %206 = insertelement <2 x i32> undef, i32 %204, i32 0 %207 = insertelement <2 x i32> %206, i32 %205, i32 1 %208 = bitcast <8 x i32> %48 to <32 x i8> %209 = bitcast <4 x i32> %50 to <16 x i8> %210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %207, <32 x i8> %208, <16 x i8> %209, i32 2) %211 = extractelement <4 x float> %210, i32 0 %212 = fadd float %38, %61 %213 = fadd float %39, %63 %214 = bitcast float %212 to i32 %215 = bitcast float %213 to i32 %216 = insertelement <2 x i32> undef, i32 %214, i32 0 %217 = insertelement <2 x i32> %216, i32 %215, i32 1 %218 = bitcast <8 x i32> %48 to <32 x i8> %219 = bitcast <4 x i32> %50 to <16 x i8> %220 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %217, <32 x i8> %218, <16 x i8> %219, i32 2) %221 = extractelement <4 x float> %220, i32 0 %222 = call float @llvm.minnum.f32(float %73, float %113) %223 = call float @llvm.minnum.f32(float %83, float %123) %224 = call float @llvm.minnum.f32(float %93, float %133) %225 = call float @llvm.minnum.f32(float %103, float %143) %226 = call float @llvm.minnum.f32(float %151, float %191) %227 = call float @llvm.minnum.f32(float %161, float %201) %228 = call float @llvm.minnum.f32(float %171, float %211) %229 = call float @llvm.minnum.f32(float %181, float %221) %230 = call float @llvm.minnum.f32(float %222, float %226) %231 = call float @llvm.minnum.f32(float %223, float %227) %232 = call float @llvm.minnum.f32(float %224, float %228) %233 = call float @llvm.minnum.f32(float %225, float %229) %234 = call float @llvm.minnum.f32(float %230, float %232) %235 = call float @llvm.minnum.f32(float %231, float %233) %236 = call float @llvm.minnum.f32(float %234, float %235) %237 = fdiv float 1.000000e+00, %42 %238 = fmul float %236, %237 %239 = fmul float %238, 6.553500e+04 %240 = call float @floor(float %239) %241 = fmul float %240, 3.906250e-03 %242 = call float @floor(float %241) %243 = fmul float %242, 0x3F70101020000000 %244 = fmul float %242, 2.560000e+02 %245 = fsub float %240, %244 %246 = fmul float %245, 0x3F70101020000000 %247 = call i32 @llvm.SI.packf16(float %243, float %246) %248 = bitcast i32 %247 to float %249 = call i32 @llvm.SI.packf16(float %58, float %59) %250 = bitcast i32 %249 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %248, float %250, float %248, float %250) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[32:35], s[2:3], 0x4 ; C0900304 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[28:31] ; F0800300 00E50002 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_buffer_load_dword s4, s[32:35], 0x4 ; C2022104 s_buffer_load_dword s5, s[32:35], 0x5 ; C202A105 s_buffer_load_dword s6, s[32:35], 0x6 ; C2032106 s_buffer_load_dword s7, s[32:35], 0x7 ; C203A107 s_buffer_load_dword s20, s[32:35], 0x8 ; C20A2108 s_buffer_load_dword s21, s[32:35], 0x9 ; C20AA109 s_buffer_load_dword s22, s[32:35], 0xa ; C20B210A s_buffer_load_dword s23, s[32:35], 0xb ; C20BA10B s_buffer_load_dword s24, s[32:35], 0xc ; C20C210C s_buffer_load_dword s25, s[32:35], 0xd ; C20CA10D s_buffer_load_dword s26, s[32:35], 0xe ; C20D210E s_buffer_load_dword s27, s[32:35], 0xf ; C20DA10F s_buffer_load_dword s28, s[32:35], 0x10 ; C20E2110 s_buffer_load_dword s29, s[32:35], 0x11 ; C20EA111 s_buffer_load_dword s30, s[32:35], 0x12 ; C20F2112 s_buffer_load_dword s31, s[32:35], 0x13 ; C20FA113 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mac_f32_e64 v2, 0.5, s21 ; D23E0002 00002AF0 v_mac_f32_e64 v3, 0.5, s23 ; D23E0003 00002EF0 v_add_f32_e32 v5, s7, v3 ; 060A0607 v_mov_b32_e32 v6, v2 ; 7E0C0302 v_mov_b32_e32 v7, v3 ; 7E0E0303 s_buffer_load_dword s0, s[0:3], 0x5c ; C200015C v_mov_b32_e32 v7, v5 ; 7E0E0305 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800100 00430606 v_add_f32_e32 v4, s4, v2 ; 06080404 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430704 v_add_f32_e32 v4, s5, v2 ; 06080405 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430804 v_add_f32_e32 v4, s6, v2 ; 06080406 v_add_f32_e32 v10, s23, v3 ; 06140617 v_mov_b32_e32 v11, v2 ; 7E160302 v_mov_b32_e32 v12, v3 ; 7E180303 image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430404 v_mov_b32_e32 v12, v10 ; 7E18030A image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[12:19], s[8:11] ; F0800100 0043050B v_add_f32_e32 v9, s20, v2 ; 06120414 image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430B09 v_add_f32_e32 v9, s21, v2 ; 06120415 image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430C09 v_add_f32_e32 v9, s22, v2 ; 06120416 image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800100 00430909 v_add_f32_e32 v13, s24, v2 ; 061A0418 v_add_f32_e32 v14, s27, v3 ; 061C061B image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00430A02 image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[8:11] ; F0800100 00430F0D v_add_f32_e32 v13, s25, v2 ; 061A0419 image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[8:11] ; F0800100 0043100D v_add_f32_e32 v13, s26, v2 ; 061A041A v_add_f32_e32 v3, s31, v3 ; 0606061F v_add_f32_e32 v17, s28, v2 ; 0622041C image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[8:11] ; F0800100 00430D0D v_mov_b32_e32 v18, v3 ; 7E240303 image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00430E02 v_add_f32_e32 v19, s29, v2 ; 0626041D image_sample v17, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[12:19], s[8:11] ; F0800100 00431111 v_mov_b32_e32 v20, v3 ; 7E280303 image_sample v18, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800100 00431213 v_add_f32_e32 v2, s30, v2 ; 0604041E image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800100 00430202 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_min_f32_e32 v3, v14, v10 ; 1E06150E v_min3_f32 v3, v6, v5, v3 ; D2A20003 040E0B06 s_waitcnt vmcnt(1) ; BF8C0771 v_min_f32_e32 v5, v18, v16 ; 1E0A2112 v_min3_f32 v5, v8, v12, v5 ; D2A20005 04161908 s_waitcnt vmcnt(0) ; BF8C0770 v_min_f32_e32 v2, v2, v13 ; 1E041B02 v_min3_f32 v2, v4, v9, v2 ; D2A20002 040A1304 v_min_f32_e32 v4, v11, v7 ; 1E080F0B v_rcp_f32_e32 v6, s0 ; 7E0C5400 v_min_f32_e32 v7, v17, v15 ; 1E0E1F11 v_min3_f32 v2, v4, v7, v2 ; D2A20002 040A0F04 v_min3_f32 v2, v3, v5, v2 ; D2A20002 040A0B03 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v2, 0x477fff00, v2 ; 100404FF 477FFF00 v_floor_f32_e32 v2, v2 ; 7E044902 v_mul_f32_e32 v3, 0x3b800000, v2 ; 100604FF 3B800000 v_floor_f32_e32 v3, v3 ; 7E064903 v_mov_b32_e32 v4, 0x3b808081 ; 7E0802FF 3B808081 v_mul_f32_e32 v5, v4, v3 ; 100A0704 v_madmk_f32_e32 v2, v3, v2, 0xc3800000 ; 40040503 C3800000 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_cvt_pkrtz_f16_f32_e32 v2, v5, v2 ; 5E040505 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 24 Code Size: 520 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[4] DCL CONST[1][0..3] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0] DCL TEMP[1..17], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, 2.0000, 0.2500} IMM[1] FLT32 { -1.0000, 0.6152, 0.0992, 0.4686} IMM[2] UINT32 {1, 304, 0, 16} IMM[3] UINT32 {320, 368, 20, 12} IMM[4] UINT32 {28, 44, 60, 32} IMM[5] UINT32 {4, 36, 48, 52} IMM[6] FLT32 { 1.0000, 0.9961, 0.0039, -0.0015} IMM[7] FLT32 { -0.2467, 0.4956, 0.6367, 0.0625} IMM[8] FLT32 { -0.3027, 0.1974, 0.9290, 65535.0000} IMM[9] FLT32 { 0.3298, -0.2439, 0.4536, 0.0039} IMM[10] FLT32 { 0.2288, 0.2897, 0.1331, 7.0000} IMM[11] FLT32 { -0.2213, -0.1484, 0.1318, 8.0000} IMM[12] FLT32 { -0.4576, -0.6662, 0.2295, 0.0039} IMM[13] FLT32 { -0.0739, 0.0550, -0.0097, 256.0000} IMM[14] FLT32 { -0.0383, -0.0400, 0.0098, 0.0039} IMM[15] FLT32 { 0.7452, 0.3237, -0.4291, 0.0627} IMM[16] FLT32 { -0.0154, 0.2674, -0.0660, 0.2934} IMM[17] FLT32 { -0.0962, -0.5229, -0.2508, 0.2934} IMM[18] FLT32 { 0.4897, -0.4644, -0.1492, 0.2934} IMM[19] FLT32 { -0.1312, 0.1391, -0.7009, 0.2934} IMM[20] FLT32 { -0.3623, -0.1005, -0.3062, 0.2934} IMM[21] FLT32 { 0.2934, -0.1706, -0.6618, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: MOV TEMP[1].xy, IN[1].xyyy 3: MOV TEMP[1].w, IMM[0].xxxx 4: TXL TEMP[1].xyz, TEMP[1], SAMP[1], 2D 5: ADD TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy 6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz 7: MOV TEMP[2].xy, IN[1].xyyy 8: MOV TEMP[2].w, IMM[0].xxxx 9: TXL TEMP[2].x, TEMP[2], SAMP[2], 2D 10: MUL TEMP[3].xy, TEMP[0].xyyy, IMM[0].wwww 11: MOV TEMP[3].xy, TEMP[3].xyyy 12: TEX TEMP[3], TEMP[3], SAMP[3], 2D 13: MAD TEMP[3], TEMP[3], IMM[0].zzzz, IMM[1].xxxx 14: DP4 TEMP[4].x, TEMP[3], TEMP[3] 15: RSQ TEMP[4].x, TEMP[4].xxxx 16: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 17: MAD TEMP[4].xyz, IN[2].xyzz, TEMP[2].xxxx, CONST[2][19].xyzz 18: MAD TEMP[4].xyz, CONST[1][1].xxxx, TEMP[1].xyzz, TEMP[4].xyzz 19: RCP TEMP[5].xyz, CONST[2][23].xxxx 20: MUL TEMP[5].xyz, CONST[2][20].xyzz, TEMP[5].xyzz 21: MUL TEMP[6].xyz, CONST[1][1].yyyy, TEMP[5].xyzz 22: MOV TEMP[7].x, TEMP[6].xxxx 23: MOV TEMP[7].y, TEMP[6].yyyy 24: MOV TEMP[7].z, TEMP[6].zzzz 25: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[4].xyzz 26: DP3 TEMP[5].x, TEMP[5].xyzz, CONST[2][19].xyzz 27: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx 28: MOV TEMP[7].w, TEMP[5].xxxx 29: MUL TEMP[5].x, CONST[2][0].xxxx, CONST[1][2].xxxx 30: MAD TEMP[5].x, CONST[2][0].wwww, CONST[1][2].zzzz, TEMP[5].xxxx 31: MUL TEMP[6].x, CONST[2][1].xxxx, CONST[1][2].xxxx 32: MAD TEMP[6].x, CONST[2][1].wwww, CONST[1][2].zzzz, TEMP[6].xxxx 33: MOV TEMP[5].y, TEMP[6].xxxx 34: MUL TEMP[6].x, CONST[2][2].xxxx, CONST[1][2].xxxx 35: MAD TEMP[6].x, CONST[2][2].wwww, CONST[1][2].zzzz, TEMP[6].xxxx 36: MOV TEMP[5].z, TEMP[6].xxxx 37: MUL TEMP[6].x, CONST[2][0].yyyy, CONST[1][2].yyyy 38: MAD TEMP[6].x, CONST[2][0].wwww, CONST[1][2].wwww, TEMP[6].xxxx 39: MUL TEMP[8].x, CONST[2][1].yyyy, CONST[1][2].yyyy 40: MAD TEMP[8].x, CONST[2][1].wwww, CONST[1][2].wwww, TEMP[8].xxxx 41: MOV TEMP[6].y, TEMP[8].xxxx 42: MUL TEMP[8].x, CONST[2][2].yyyy, CONST[1][2].yyyy 43: MAD TEMP[8].x, CONST[2][2].wwww, CONST[1][2].wwww, TEMP[8].xxxx 44: MOV TEMP[6].z, TEMP[8].xxxx 45: MUL TEMP[8].xyz, TEMP[5].xyzz, CONST[1][1].yyyy 46: MOV TEMP[9].x, TEMP[8].xxxx 47: MOV TEMP[9].y, TEMP[8].yyyy 48: MOV TEMP[9].z, TEMP[8].zzzz 49: MUL TEMP[8].x, CONST[2][3].xxxx, CONST[1][2].xxxx 50: MAD TEMP[8].x, CONST[2][3].wwww, CONST[1][2].zzzz, TEMP[8].xxxx 51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[4].xyzz 52: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx 53: MOV TEMP[9].w, TEMP[5].xxxx 54: MUL TEMP[5].xyz, CONST[1][1].yyyy, TEMP[6].xyzz 55: MOV TEMP[8].x, TEMP[5].xxxx 56: MOV TEMP[8].y, TEMP[5].yyyy 57: MOV TEMP[8].z, TEMP[5].zzzz 58: MUL TEMP[5].x, CONST[2][3].yyyy, CONST[1][2].yyyy 59: MAD TEMP[5].x, CONST[2][3].wwww, CONST[1][2].wwww, TEMP[5].xxxx 60: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[4].xyzz 61: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 62: MOV TEMP[8].w, TEMP[5].xxxx 63: MOV TEMP[5].x, CONST[2][0].wwww 64: MOV TEMP[5].y, CONST[2][1].wwww 65: MOV TEMP[5].z, CONST[2][2].wwww 66: MUL TEMP[5].xyz, CONST[1][1].yyyy, TEMP[5].xyzz 67: MOV TEMP[6].x, TEMP[5].xxxx 68: MOV TEMP[6].y, TEMP[5].yyyy 69: MOV TEMP[6].z, TEMP[5].zzzz 70: MOV TEMP[5].x, CONST[2][0].wwww 71: MOV TEMP[5].y, CONST[2][1].wwww 72: MOV TEMP[5].z, CONST[2][2].wwww 73: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[4].xyzz 74: ADD TEMP[4].x, CONST[2][3].wwww, TEMP[4].xxxx 75: MOV TEMP[6].w, TEMP[4].xxxx 76: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[1].yzww 77: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[3].xyzz 78: MUL TEMP[4].xyz, IMM[0].zzzz, TEMP[4].xyzz 79: ADD TEMP[4].xyz, IMM[1].yzww, -TEMP[4].xyzz 80: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[4].xyzz 81: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[1].xyzz 82: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 83: ADD TEMP[5].xyz, TEMP[4].xyzz, -TEMP[5].xyzz 84: DP3 TEMP[10].x, TEMP[4].xyzz, TEMP[1].xyzz 85: FSLT TEMP[10].x, TEMP[10].xxxx, IMM[0].xxxx 86: UIF TEMP[10].xxxx :2 87: MOV TEMP[5].xyz, TEMP[5].xyzx 88: ELSE :2 89: MOV TEMP[5].xyz, TEMP[4].xyzx 90: ENDIF 91: MOV TEMP[4].w, IMM[6].xxxx 92: MOV TEMP[4].x, TEMP[5].xxxx 93: MOV TEMP[4].y, TEMP[5].yyyy 94: MOV TEMP[4].z, TEMP[5].zzzz 95: DP4 TEMP[5].x, TEMP[7], TEMP[4] 96: DP4 TEMP[10].x, TEMP[4], TEMP[9] 97: DP4 TEMP[11].x, TEMP[4], TEMP[8] 98: MOV TEMP[10].y, TEMP[11].xxxx 99: DP4 TEMP[4].x, TEMP[4], TEMP[6] 100: RCP TEMP[4].xy, TEMP[4].xxxx 101: MUL TEMP[4].xy, TEMP[10].xyyy, TEMP[4].xyyy 102: MOV TEMP[4].xy, TEMP[4].xyyy 103: TEX TEMP[4].xy, TEMP[4], SAMP[0], 2D 104: DP2 TEMP[4].x, TEMP[4].xyyy, IMM[6].yzzz 105: DP3 TEMP[10].x, TEMP[3].xyzz, IMM[7].xyzz 106: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[3].xyzz 107: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 108: ADD TEMP[10].xyz, IMM[7].xyzz, -TEMP[10].xyzz 109: DP3 TEMP[11].x, TEMP[1].xyzz, TEMP[10].xyzz 110: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[1].xyzz 111: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 112: ADD TEMP[11].xyz, TEMP[10].xyzz, -TEMP[11].xyzz 113: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[1].xyzz 114: FSLT TEMP[12].x, TEMP[12].xxxx, IMM[0].xxxx 115: UIF TEMP[12].xxxx :2 116: MOV TEMP[11].xyz, TEMP[11].xyzx 117: ELSE :2 118: MOV TEMP[11].xyz, TEMP[10].xyzx 119: ENDIF 120: MOV TEMP[10].w, IMM[6].xxxx 121: MOV TEMP[10].x, TEMP[11].xxxx 122: MOV TEMP[10].y, TEMP[11].yyyy 123: MOV TEMP[10].z, TEMP[11].zzzz 124: DP4 TEMP[11].x, TEMP[7], TEMP[10] 125: DP4 TEMP[12].x, TEMP[10], TEMP[9] 126: DP4 TEMP[13].x, TEMP[10], TEMP[8] 127: MOV TEMP[12].y, TEMP[13].xxxx 128: DP4 TEMP[10].x, TEMP[10], TEMP[6] 129: RCP TEMP[10].xy, TEMP[10].xxxx 130: MUL TEMP[10].xy, TEMP[12].xyyy, TEMP[10].xyyy 131: MOV TEMP[10].xy, TEMP[10].xyyy 132: TEX TEMP[10].xy, TEMP[10], SAMP[0], 2D 133: DP2 TEMP[10].x, TEMP[10].xyyy, IMM[6].yzzz 134: DP3 TEMP[12].x, TEMP[3].xyzz, IMM[8].xyzz 135: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[3].xyzz 136: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 137: ADD TEMP[12].xyz, IMM[8].xyzz, -TEMP[12].xyzz 138: DP3 TEMP[13].x, TEMP[1].xyzz, TEMP[12].xyzz 139: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[1].xyzz 140: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 141: ADD TEMP[13].xyz, TEMP[12].xyzz, -TEMP[13].xyzz 142: DP3 TEMP[14].x, TEMP[12].xyzz, TEMP[1].xyzz 143: FSLT TEMP[14].x, TEMP[14].xxxx, IMM[0].xxxx 144: UIF TEMP[14].xxxx :2 145: MOV TEMP[13].xyz, TEMP[13].xyzx 146: ELSE :2 147: MOV TEMP[13].xyz, TEMP[12].xyzx 148: ENDIF 149: MOV TEMP[12].w, IMM[6].xxxx 150: MOV TEMP[12].x, TEMP[13].xxxx 151: MOV TEMP[12].y, TEMP[13].yyyy 152: MOV TEMP[12].z, TEMP[13].zzzz 153: DP4 TEMP[13].x, TEMP[7], TEMP[12] 154: DP4 TEMP[14].x, TEMP[12], TEMP[9] 155: DP4 TEMP[15].x, TEMP[12], TEMP[8] 156: MOV TEMP[14].y, TEMP[15].xxxx 157: DP4 TEMP[12].x, TEMP[12], TEMP[6] 158: RCP TEMP[12].xy, TEMP[12].xxxx 159: MUL TEMP[12].xy, TEMP[14].xyyy, TEMP[12].xyyy 160: MOV TEMP[12].xy, TEMP[12].xyyy 161: TEX TEMP[12].xy, TEMP[12], SAMP[0], 2D 162: DP2 TEMP[12].x, TEMP[12].xyyy, IMM[6].yzzz 163: DP3 TEMP[14].x, TEMP[3].xyzz, IMM[9].xyzz 164: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[3].xyzz 165: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 166: ADD TEMP[14].xyz, IMM[9].xyzz, -TEMP[14].xyzz 167: DP3 TEMP[15].x, TEMP[1].xyzz, TEMP[14].xyzz 168: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[1].xyzz 169: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 170: ADD TEMP[15].xyz, TEMP[14].xyzz, -TEMP[15].xyzz 171: DP3 TEMP[16].x, TEMP[14].xyzz, TEMP[1].xyzz 172: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[0].xxxx 173: UIF TEMP[16].xxxx :2 174: MOV TEMP[15].xyz, TEMP[15].xyzx 175: ELSE :2 176: MOV TEMP[15].xyz, TEMP[14].xyzx 177: ENDIF 178: MOV TEMP[14].w, IMM[6].xxxx 179: MOV TEMP[14].x, TEMP[15].xxxx 180: MOV TEMP[14].y, TEMP[15].yyyy 181: MOV TEMP[14].z, TEMP[15].zzzz 182: MOV TEMP[5].x, TEMP[5].xxxx 183: MOV TEMP[5].y, TEMP[11].xxxx 184: MOV TEMP[5].z, TEMP[13].xxxx 185: DP4 TEMP[11].x, TEMP[7], TEMP[14] 186: MOV TEMP[5].w, TEMP[11].xxxx 187: DP4 TEMP[11].x, TEMP[14], TEMP[9] 188: DP4 TEMP[13].x, TEMP[14], TEMP[8] 189: MOV TEMP[11].y, TEMP[13].xxxx 190: MOV TEMP[4].x, TEMP[4].xxxx 191: MOV TEMP[4].y, TEMP[10].xxxx 192: MOV TEMP[4].z, TEMP[12].xxxx 193: DP4 TEMP[10].x, TEMP[14], TEMP[6] 194: RCP TEMP[10].xy, TEMP[10].xxxx 195: MUL TEMP[10].xy, TEMP[11].xyyy, TEMP[10].xyyy 196: MOV TEMP[10].xy, TEMP[10].xyyy 197: TEX TEMP[10].xy, TEMP[10], SAMP[0], 2D 198: DP2 TEMP[10].x, TEMP[10].xyyy, IMM[6].yzzz 199: MOV TEMP[4].w, TEMP[10].xxxx 200: ADD TEMP[4], TEMP[5], -TEMP[4] 201: ADD TEMP[4], TEMP[4], IMM[6].wwww 202: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].xxxx 203: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 204: MOV_SAT TEMP[5].x, TEMP[5].xxxx 205: FSLT TEMP[10].x, TEMP[4].xxxx, IMM[0].xxxx 206: UIF TEMP[10].xxxx :2 207: MOV TEMP[10].x, IMM[0].xxxx 208: ELSE :2 209: MOV TEMP[10].x, TEMP[5].xxxx 210: ENDIF 211: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].yyyy 212: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 213: MOV_SAT TEMP[5].x, TEMP[5].xxxx 214: FSLT TEMP[11].x, TEMP[4].yyyy, IMM[0].xxxx 215: UIF TEMP[11].xxxx :2 216: MOV TEMP[11].x, IMM[0].xxxx 217: ELSE :2 218: MOV TEMP[11].x, TEMP[5].xxxx 219: ENDIF 220: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].zzzz 221: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 222: MOV_SAT TEMP[5].x, TEMP[5].xxxx 223: FSLT TEMP[12].x, TEMP[4].zzzz, IMM[0].xxxx 224: UIF TEMP[12].xxxx :2 225: MOV TEMP[12].x, IMM[0].xxxx 226: ELSE :2 227: MOV TEMP[12].x, TEMP[5].xxxx 228: ENDIF 229: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[4].wwww 230: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 231: MOV_SAT TEMP[5].x, TEMP[5].xxxx 232: FSLT TEMP[4].x, TEMP[4].wwww, IMM[0].xxxx 233: UIF TEMP[4].xxxx :2 234: MOV TEMP[4].x, IMM[0].xxxx 235: ELSE :2 236: MOV TEMP[4].x, TEMP[5].xxxx 237: ENDIF 238: MOV TEMP[5].x, TEMP[10].xxxx 239: MOV TEMP[5].y, TEMP[11].xxxx 240: MOV TEMP[5].z, TEMP[12].xxxx 241: MOV TEMP[5].w, TEMP[4].xxxx 242: DP4 TEMP[4].x, TEMP[5], IMM[7].wwww 243: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[10].xyzz 244: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 245: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 246: ADD TEMP[5].xyz, IMM[10].xyzz, -TEMP[5].xyzz 247: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 248: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 249: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 250: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 251: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 252: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 253: UIF TEMP[11].xxxx :2 254: MOV TEMP[10].xyz, TEMP[10].xyzx 255: ELSE :2 256: MOV TEMP[10].xyz, TEMP[5].xyzx 257: ENDIF 258: MOV TEMP[5].w, IMM[6].xxxx 259: MOV TEMP[5].x, TEMP[10].xxxx 260: MOV TEMP[5].y, TEMP[10].yyyy 261: MOV TEMP[5].z, TEMP[10].zzzz 262: DP4 TEMP[10].x, TEMP[7], TEMP[5] 263: DP4 TEMP[11].x, TEMP[5], TEMP[9] 264: DP4 TEMP[12].x, TEMP[5], TEMP[8] 265: MOV TEMP[11].y, TEMP[12].xxxx 266: DP4 TEMP[5].x, TEMP[5], TEMP[6] 267: RCP TEMP[5].xy, TEMP[5].xxxx 268: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 269: MOV TEMP[5].xy, TEMP[5].xyyy 270: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 271: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 272: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[11].xyzz 273: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 274: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 275: ADD TEMP[11].xyz, IMM[11].xyzz, -TEMP[11].xyzz 276: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 277: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 278: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 279: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 280: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 281: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 282: UIF TEMP[13].xxxx :2 283: MOV TEMP[12].xyz, TEMP[12].xyzx 284: ELSE :2 285: MOV TEMP[12].xyz, TEMP[11].xyzx 286: ENDIF 287: MOV TEMP[11].w, IMM[6].xxxx 288: MOV TEMP[11].x, TEMP[12].xxxx 289: MOV TEMP[11].y, TEMP[12].yyyy 290: MOV TEMP[11].z, TEMP[12].zzzz 291: DP4 TEMP[12].x, TEMP[7], TEMP[11] 292: DP4 TEMP[13].x, TEMP[11], TEMP[9] 293: DP4 TEMP[14].x, TEMP[11], TEMP[8] 294: MOV TEMP[13].y, TEMP[14].xxxx 295: DP4 TEMP[11].x, TEMP[11], TEMP[6] 296: RCP TEMP[11].xy, TEMP[11].xxxx 297: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 298: MOV TEMP[11].xy, TEMP[11].xyyy 299: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 300: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 301: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[12].xyzz 302: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 303: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 304: ADD TEMP[13].xyz, IMM[12].xyzz, -TEMP[13].xyzz 305: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 306: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 307: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 308: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 309: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 310: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 311: UIF TEMP[15].xxxx :2 312: MOV TEMP[14].xyz, TEMP[14].xyzx 313: ELSE :2 314: MOV TEMP[14].xyz, TEMP[13].xyzx 315: ENDIF 316: MOV TEMP[13].w, IMM[6].xxxx 317: MOV TEMP[13].x, TEMP[14].xxxx 318: MOV TEMP[13].y, TEMP[14].yyyy 319: MOV TEMP[13].z, TEMP[14].zzzz 320: DP4 TEMP[14].x, TEMP[7], TEMP[13] 321: DP4 TEMP[15].x, TEMP[13], TEMP[9] 322: DP4 TEMP[16].x, TEMP[13], TEMP[8] 323: MOV TEMP[15].y, TEMP[16].xxxx 324: DP4 TEMP[13].x, TEMP[13], TEMP[6] 325: RCP TEMP[13].xy, TEMP[13].xxxx 326: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 327: MOV TEMP[13].xy, TEMP[13].xyyy 328: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 329: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 330: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[13].xyzz 331: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[3].xyzz 332: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 333: ADD TEMP[15].xyz, IMM[13].xyzz, -TEMP[15].xyzz 334: DP3 TEMP[16].x, TEMP[1].xyzz, TEMP[15].xyzz 335: MUL TEMP[16].xyz, TEMP[16].xxxx, TEMP[1].xyzz 336: MUL TEMP[16].xyz, IMM[0].zzzz, TEMP[16].xyzz 337: ADD TEMP[16].xyz, TEMP[15].xyzz, -TEMP[16].xyzz 338: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[1].xyzz 339: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[0].xxxx 340: UIF TEMP[17].xxxx :2 341: MOV TEMP[16].xyz, TEMP[16].xyzx 342: ELSE :2 343: MOV TEMP[16].xyz, TEMP[15].xyzx 344: ENDIF 345: MOV TEMP[15].w, IMM[6].xxxx 346: MOV TEMP[15].x, TEMP[16].xxxx 347: MOV TEMP[15].y, TEMP[16].yyyy 348: MOV TEMP[15].z, TEMP[16].zzzz 349: MOV TEMP[10].x, TEMP[10].xxxx 350: MOV TEMP[10].y, TEMP[12].xxxx 351: MOV TEMP[10].z, TEMP[14].xxxx 352: DP4 TEMP[12].x, TEMP[7], TEMP[15] 353: MOV TEMP[10].w, TEMP[12].xxxx 354: DP4 TEMP[12].x, TEMP[15], TEMP[9] 355: DP4 TEMP[14].x, TEMP[15], TEMP[8] 356: MOV TEMP[12].y, TEMP[14].xxxx 357: MOV TEMP[5].x, TEMP[5].xxxx 358: MOV TEMP[5].y, TEMP[11].xxxx 359: MOV TEMP[5].z, TEMP[13].xxxx 360: DP4 TEMP[11].x, TEMP[15], TEMP[6] 361: RCP TEMP[11].xy, TEMP[11].xxxx 362: MUL TEMP[11].xy, TEMP[12].xyyy, TEMP[11].xyyy 363: MOV TEMP[11].xy, TEMP[11].xyyy 364: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 365: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 366: MOV TEMP[5].w, TEMP[11].xxxx 367: ADD TEMP[5], TEMP[10], -TEMP[5] 368: ADD TEMP[5], IMM[6].wwww, TEMP[5] 369: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].xxxx 370: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 371: MOV_SAT TEMP[10].x, TEMP[10].xxxx 372: FSLT TEMP[11].x, TEMP[5].xxxx, IMM[0].xxxx 373: UIF TEMP[11].xxxx :2 374: MOV TEMP[11].x, IMM[0].xxxx 375: ELSE :2 376: MOV TEMP[11].x, TEMP[10].xxxx 377: ENDIF 378: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].yyyy 379: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 380: MOV_SAT TEMP[10].x, TEMP[10].xxxx 381: FSLT TEMP[12].x, TEMP[5].yyyy, IMM[0].xxxx 382: UIF TEMP[12].xxxx :2 383: MOV TEMP[12].x, IMM[0].xxxx 384: ELSE :2 385: MOV TEMP[12].x, TEMP[10].xxxx 386: ENDIF 387: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].zzzz 388: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 389: MOV_SAT TEMP[10].x, TEMP[10].xxxx 390: FSLT TEMP[13].x, TEMP[5].zzzz, IMM[0].xxxx 391: UIF TEMP[13].xxxx :2 392: MOV TEMP[13].x, IMM[0].xxxx 393: ELSE :2 394: MOV TEMP[13].x, TEMP[10].xxxx 395: ENDIF 396: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].wwww 397: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 398: MOV_SAT TEMP[10].x, TEMP[10].xxxx 399: FSLT TEMP[5].x, TEMP[5].wwww, IMM[0].xxxx 400: UIF TEMP[5].xxxx :2 401: MOV TEMP[5].x, IMM[0].xxxx 402: ELSE :2 403: MOV TEMP[5].x, TEMP[10].xxxx 404: ENDIF 405: MOV TEMP[10].x, TEMP[11].xxxx 406: MOV TEMP[10].y, TEMP[12].xxxx 407: MOV TEMP[10].z, TEMP[13].xxxx 408: MOV TEMP[10].w, TEMP[5].xxxx 409: DP4 TEMP[5].x, TEMP[10], IMM[7].wwww 410: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 411: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[14].xyzz 412: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 413: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 414: ADD TEMP[5].xyz, IMM[14].xyzz, -TEMP[5].xyzz 415: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 416: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 417: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 418: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 419: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 420: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 421: UIF TEMP[11].xxxx :2 422: MOV TEMP[10].xyz, TEMP[10].xyzx 423: ELSE :2 424: MOV TEMP[10].xyz, TEMP[5].xyzx 425: ENDIF 426: MOV TEMP[5].w, IMM[6].xxxx 427: MOV TEMP[5].x, TEMP[10].xxxx 428: MOV TEMP[5].y, TEMP[10].yyyy 429: MOV TEMP[5].z, TEMP[10].zzzz 430: DP4 TEMP[10].x, TEMP[7], TEMP[5] 431: DP4 TEMP[11].x, TEMP[5], TEMP[9] 432: DP4 TEMP[12].x, TEMP[5], TEMP[8] 433: MOV TEMP[11].y, TEMP[12].xxxx 434: DP4 TEMP[5].x, TEMP[5], TEMP[6] 435: RCP TEMP[5].xy, TEMP[5].xxxx 436: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 437: MOV TEMP[5].xy, TEMP[5].xyyy 438: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 439: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 440: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[15].xyzz 441: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 442: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 443: ADD TEMP[11].xyz, IMM[15].xyzz, -TEMP[11].xyzz 444: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 445: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 446: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 447: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 448: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 449: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 450: UIF TEMP[13].xxxx :2 451: MOV TEMP[12].xyz, TEMP[12].xyzx 452: ELSE :2 453: MOV TEMP[12].xyz, TEMP[11].xyzx 454: ENDIF 455: MOV TEMP[11].w, IMM[6].xxxx 456: MOV TEMP[11].x, TEMP[12].xxxx 457: MOV TEMP[11].y, TEMP[12].yyyy 458: MOV TEMP[11].z, TEMP[12].zzzz 459: DP4 TEMP[12].x, TEMP[7], TEMP[11] 460: DP4 TEMP[13].x, TEMP[11], TEMP[9] 461: DP4 TEMP[14].x, TEMP[11], TEMP[8] 462: MOV TEMP[13].y, TEMP[14].xxxx 463: DP4 TEMP[11].x, TEMP[11], TEMP[6] 464: RCP TEMP[11].xy, TEMP[11].xxxx 465: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 466: MOV TEMP[11].xy, TEMP[11].xyyy 467: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 468: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 469: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[16].xyzz 470: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 471: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 472: ADD TEMP[13].xyz, IMM[16].xyzz, -TEMP[13].xyzz 473: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 474: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 475: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 476: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 477: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 478: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 479: UIF TEMP[15].xxxx :2 480: MOV TEMP[14].xyz, TEMP[14].xyzx 481: ELSE :2 482: MOV TEMP[14].xyz, TEMP[13].xyzx 483: ENDIF 484: MOV TEMP[13].w, IMM[6].xxxx 485: MOV TEMP[13].x, TEMP[14].xxxx 486: MOV TEMP[13].y, TEMP[14].yyyy 487: MOV TEMP[13].z, TEMP[14].zzzz 488: DP4 TEMP[14].x, TEMP[7], TEMP[13] 489: DP4 TEMP[15].x, TEMP[13], TEMP[9] 490: DP4 TEMP[16].x, TEMP[13], TEMP[8] 491: MOV TEMP[15].y, TEMP[16].xxxx 492: DP4 TEMP[13].x, TEMP[13], TEMP[6] 493: RCP TEMP[13].xy, TEMP[13].xxxx 494: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 495: MOV TEMP[13].xy, TEMP[13].xyyy 496: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 497: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 498: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[17].xyzz 499: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[3].xyzz 500: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 501: ADD TEMP[15].xyz, IMM[17].xyzz, -TEMP[15].xyzz 502: DP3 TEMP[16].x, TEMP[1].xyzz, TEMP[15].xyzz 503: MUL TEMP[16].xyz, TEMP[16].xxxx, TEMP[1].xyzz 504: MUL TEMP[16].xyz, IMM[0].zzzz, TEMP[16].xyzz 505: ADD TEMP[16].xyz, TEMP[15].xyzz, -TEMP[16].xyzz 506: DP3 TEMP[17].x, TEMP[15].xyzz, TEMP[1].xyzz 507: FSLT TEMP[17].x, TEMP[17].xxxx, IMM[0].xxxx 508: UIF TEMP[17].xxxx :2 509: MOV TEMP[16].xyz, TEMP[16].xyzx 510: ELSE :2 511: MOV TEMP[16].xyz, TEMP[15].xyzx 512: ENDIF 513: MOV TEMP[15].w, IMM[6].xxxx 514: MOV TEMP[15].x, TEMP[16].xxxx 515: MOV TEMP[15].y, TEMP[16].yyyy 516: MOV TEMP[15].z, TEMP[16].zzzz 517: MOV TEMP[10].x, TEMP[10].xxxx 518: MOV TEMP[10].y, TEMP[12].xxxx 519: MOV TEMP[10].z, TEMP[14].xxxx 520: DP4 TEMP[12].x, TEMP[7], TEMP[15] 521: MOV TEMP[10].w, TEMP[12].xxxx 522: DP4 TEMP[12].x, TEMP[15], TEMP[9] 523: DP4 TEMP[14].x, TEMP[15], TEMP[8] 524: MOV TEMP[12].y, TEMP[14].xxxx 525: MOV TEMP[5].x, TEMP[5].xxxx 526: MOV TEMP[5].y, TEMP[11].xxxx 527: MOV TEMP[5].z, TEMP[13].xxxx 528: DP4 TEMP[11].x, TEMP[15], TEMP[6] 529: RCP TEMP[11].xy, TEMP[11].xxxx 530: MUL TEMP[11].xy, TEMP[12].xyyy, TEMP[11].xyyy 531: MOV TEMP[11].xy, TEMP[11].xyyy 532: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 533: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 534: MOV TEMP[5].w, TEMP[11].xxxx 535: ADD TEMP[5], TEMP[10], -TEMP[5] 536: ADD TEMP[5], IMM[6].wwww, TEMP[5] 537: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].xxxx 538: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 539: MOV_SAT TEMP[10].x, TEMP[10].xxxx 540: FSLT TEMP[11].x, TEMP[5].xxxx, IMM[0].xxxx 541: UIF TEMP[11].xxxx :2 542: MOV TEMP[11].x, IMM[0].xxxx 543: ELSE :2 544: MOV TEMP[11].x, TEMP[10].xxxx 545: ENDIF 546: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].yyyy 547: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 548: MOV_SAT TEMP[10].x, TEMP[10].xxxx 549: FSLT TEMP[12].x, TEMP[5].yyyy, IMM[0].xxxx 550: UIF TEMP[12].xxxx :2 551: MOV TEMP[12].x, IMM[0].xxxx 552: ELSE :2 553: MOV TEMP[12].x, TEMP[10].xxxx 554: ENDIF 555: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].zzzz 556: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 557: MOV_SAT TEMP[10].x, TEMP[10].xxxx 558: FSLT TEMP[13].x, TEMP[5].zzzz, IMM[0].xxxx 559: UIF TEMP[13].xxxx :2 560: MOV TEMP[13].x, IMM[0].xxxx 561: ELSE :2 562: MOV TEMP[13].x, TEMP[10].xxxx 563: ENDIF 564: MUL TEMP[10].x, CONST[1][3].xxxx, TEMP[5].wwww 565: ADD TEMP[10].x, IMM[6].xxxx, -TEMP[10].xxxx 566: MOV_SAT TEMP[10].x, TEMP[10].xxxx 567: FSLT TEMP[5].x, TEMP[5].wwww, IMM[0].xxxx 568: UIF TEMP[5].xxxx :2 569: MOV TEMP[5].x, IMM[0].xxxx 570: ELSE :2 571: MOV TEMP[5].x, TEMP[10].xxxx 572: ENDIF 573: MOV TEMP[10].x, TEMP[11].xxxx 574: MOV TEMP[10].y, TEMP[12].xxxx 575: MOV TEMP[10].z, TEMP[13].xxxx 576: MOV TEMP[10].w, TEMP[5].xxxx 577: DP4 TEMP[5].x, TEMP[10], IMM[7].wwww 578: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 579: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[18].xyzz 580: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[3].xyzz 581: MUL TEMP[5].xyz, IMM[0].zzzz, TEMP[5].xyzz 582: ADD TEMP[5].xyz, IMM[18].xyzz, -TEMP[5].xyzz 583: DP3 TEMP[10].x, TEMP[1].xyzz, TEMP[5].xyzz 584: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[1].xyzz 585: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz 586: ADD TEMP[10].xyz, TEMP[5].xyzz, -TEMP[10].xyzz 587: DP3 TEMP[11].x, TEMP[5].xyzz, TEMP[1].xyzz 588: FSLT TEMP[11].x, TEMP[11].xxxx, IMM[0].xxxx 589: UIF TEMP[11].xxxx :2 590: MOV TEMP[10].xyz, TEMP[10].xyzx 591: ELSE :2 592: MOV TEMP[10].xyz, TEMP[5].xyzx 593: ENDIF 594: MOV TEMP[5].w, IMM[6].xxxx 595: MOV TEMP[5].x, TEMP[10].xxxx 596: MOV TEMP[5].y, TEMP[10].yyyy 597: MOV TEMP[5].z, TEMP[10].zzzz 598: DP4 TEMP[10].x, TEMP[7], TEMP[5] 599: DP4 TEMP[11].x, TEMP[5], TEMP[9] 600: DP4 TEMP[12].x, TEMP[5], TEMP[8] 601: MOV TEMP[11].y, TEMP[12].xxxx 602: DP4 TEMP[5].x, TEMP[5], TEMP[6] 603: RCP TEMP[5].xy, TEMP[5].xxxx 604: MUL TEMP[5].xy, TEMP[11].xyyy, TEMP[5].xyyy 605: MOV TEMP[5].xy, TEMP[5].xyyy 606: TEX TEMP[5].xy, TEMP[5], SAMP[0], 2D 607: DP2 TEMP[5].x, TEMP[5].xyyy, IMM[6].yzzz 608: DP3 TEMP[11].x, TEMP[3].xyzz, IMM[19].xyzz 609: MUL TEMP[11].xyz, TEMP[11].xxxx, TEMP[3].xyzz 610: MUL TEMP[11].xyz, IMM[0].zzzz, TEMP[11].xyzz 611: ADD TEMP[11].xyz, IMM[19].xyzz, -TEMP[11].xyzz 612: DP3 TEMP[12].x, TEMP[1].xyzz, TEMP[11].xyzz 613: MUL TEMP[12].xyz, TEMP[12].xxxx, TEMP[1].xyzz 614: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz 615: ADD TEMP[12].xyz, TEMP[11].xyzz, -TEMP[12].xyzz 616: DP3 TEMP[13].x, TEMP[11].xyzz, TEMP[1].xyzz 617: FSLT TEMP[13].x, TEMP[13].xxxx, IMM[0].xxxx 618: UIF TEMP[13].xxxx :2 619: MOV TEMP[12].xyz, TEMP[12].xyzx 620: ELSE :2 621: MOV TEMP[12].xyz, TEMP[11].xyzx 622: ENDIF 623: MOV TEMP[11].w, IMM[6].xxxx 624: MOV TEMP[11].x, TEMP[12].xxxx 625: MOV TEMP[11].y, TEMP[12].yyyy 626: MOV TEMP[11].z, TEMP[12].zzzz 627: DP4 TEMP[12].x, TEMP[7], TEMP[11] 628: DP4 TEMP[13].x, TEMP[11], TEMP[9] 629: DP4 TEMP[14].x, TEMP[11], TEMP[8] 630: MOV TEMP[13].y, TEMP[14].xxxx 631: DP4 TEMP[11].x, TEMP[11], TEMP[6] 632: RCP TEMP[11].xy, TEMP[11].xxxx 633: MUL TEMP[11].xy, TEMP[13].xyyy, TEMP[11].xyyy 634: MOV TEMP[11].xy, TEMP[11].xyyy 635: TEX TEMP[11].xy, TEMP[11], SAMP[0], 2D 636: DP2 TEMP[11].x, TEMP[11].xyyy, IMM[6].yzzz 637: DP3 TEMP[13].x, TEMP[3].xyzz, IMM[20].xyzz 638: MUL TEMP[13].xyz, TEMP[13].xxxx, TEMP[3].xyzz 639: MUL TEMP[13].xyz, IMM[0].zzzz, TEMP[13].xyzz 640: ADD TEMP[13].xyz, IMM[20].xyzz, -TEMP[13].xyzz 641: DP3 TEMP[14].x, TEMP[1].xyzz, TEMP[13].xyzz 642: MUL TEMP[14].xyz, TEMP[14].xxxx, TEMP[1].xyzz 643: MUL TEMP[14].xyz, IMM[0].zzzz, TEMP[14].xyzz 644: ADD TEMP[14].xyz, TEMP[13].xyzz, -TEMP[14].xyzz 645: DP3 TEMP[15].x, TEMP[13].xyzz, TEMP[1].xyzz 646: FSLT TEMP[15].x, TEMP[15].xxxx, IMM[0].xxxx 647: UIF TEMP[15].xxxx :2 648: MOV TEMP[14].xyz, TEMP[14].xyzx 649: ELSE :2 650: MOV TEMP[14].xyz, TEMP[13].xyzx 651: ENDIF 652: MOV TEMP[13].w, IMM[6].xxxx 653: MOV TEMP[13].x, TEMP[14].xxxx 654: MOV TEMP[13].y, TEMP[14].yyyy 655: MOV TEMP[13].z, TEMP[14].zzzz 656: DP4 TEMP[14].x, TEMP[7], TEMP[13] 657: DP4 TEMP[15].x, TEMP[13], TEMP[9] 658: DP4 TEMP[16].x, TEMP[13], TEMP[8] 659: MOV TEMP[15].y, TEMP[16].xxxx 660: DP4 TEMP[13].x, TEMP[13], TEMP[6] 661: RCP TEMP[13].xy, TEMP[13].xxxx 662: MUL TEMP[13].xy, TEMP[15].xyyy, TEMP[13].xyyy 663: MOV TEMP[13].xy, TEMP[13].xyyy 664: TEX TEMP[13].xy, TEMP[13], SAMP[0], 2D 665: DP2 TEMP[13].x, TEMP[13].xyyy, IMM[6].yzzz 666: DP3 TEMP[15].x, TEMP[3].xyzz, IMM[21].xyzz 667: MUL TEMP[3].xyz, TEMP[15].xxxx, TEMP[3].xyzz 668: MUL TEMP[3].xyz, IMM[0].zzzz, TEMP[3].xyzz 669: ADD TEMP[3].xyz, IMM[21].xyzz, -TEMP[3].xyzz 670: DP3 TEMP[15].x, TEMP[1].xyzz, TEMP[3].xyzz 671: MUL TEMP[15].xyz, TEMP[15].xxxx, TEMP[1].xyzz 672: MUL TEMP[15].xyz, IMM[0].zzzz, TEMP[15].xyzz 673: ADD TEMP[15].xyz, TEMP[3].xyzz, -TEMP[15].xyzz 674: DP3 TEMP[16].x, TEMP[3].xyzz, TEMP[1].xyzz 675: FSLT TEMP[16].x, TEMP[16].xxxx, IMM[0].xxxx 676: UIF TEMP[16].xxxx :2 677: MOV TEMP[15].xyz, TEMP[15].xyzx 678: ELSE :2 679: MOV TEMP[15].xyz, TEMP[3].xyzx 680: ENDIF 681: MOV TEMP[3].w, IMM[6].xxxx 682: MOV TEMP[3].x, TEMP[15].xxxx 683: MOV TEMP[3].y, TEMP[15].yyyy 684: MOV TEMP[3].z, TEMP[15].zzzz 685: MOV TEMP[10].x, TEMP[10].xxxx 686: MOV TEMP[10].y, TEMP[12].xxxx 687: MOV TEMP[10].z, TEMP[14].xxxx 688: DP4 TEMP[7].x, TEMP[7], TEMP[3] 689: MOV TEMP[10].w, TEMP[7].xxxx 690: DP4 TEMP[7].x, TEMP[3], TEMP[9] 691: DP4 TEMP[8].x, TEMP[3], TEMP[8] 692: MOV TEMP[7].y, TEMP[8].xxxx 693: MOV TEMP[5].x, TEMP[5].xxxx 694: MOV TEMP[5].y, TEMP[11].xxxx 695: MOV TEMP[5].z, TEMP[13].xxxx 696: DP4 TEMP[3].x, TEMP[3], TEMP[6] 697: RCP TEMP[3].xy, TEMP[3].xxxx 698: MUL TEMP[3].xy, TEMP[7].xyyy, TEMP[3].xyyy 699: MOV TEMP[3].xy, TEMP[3].xyyy 700: TEX TEMP[3].xy, TEMP[3], SAMP[0], 2D 701: DP2 TEMP[3].x, TEMP[3].xyyy, IMM[6].yzzz 702: MOV TEMP[5].w, TEMP[3].xxxx 703: ADD TEMP[3], TEMP[10], -TEMP[5] 704: ADD TEMP[3], IMM[6].wwww, TEMP[3] 705: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].xxxx 706: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 707: MOV_SAT TEMP[5].x, TEMP[5].xxxx 708: FSLT TEMP[6].x, TEMP[3].xxxx, IMM[0].xxxx 709: UIF TEMP[6].xxxx :2 710: MOV TEMP[6].x, IMM[0].xxxx 711: ELSE :2 712: MOV TEMP[6].x, TEMP[5].xxxx 713: ENDIF 714: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].yyyy 715: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 716: MOV_SAT TEMP[5].x, TEMP[5].xxxx 717: FSLT TEMP[7].x, TEMP[3].yyyy, IMM[0].xxxx 718: UIF TEMP[7].xxxx :2 719: MOV TEMP[7].x, IMM[0].xxxx 720: ELSE :2 721: MOV TEMP[7].x, TEMP[5].xxxx 722: ENDIF 723: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].zzzz 724: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 725: MOV_SAT TEMP[5].x, TEMP[5].xxxx 726: FSLT TEMP[8].x, TEMP[3].zzzz, IMM[0].xxxx 727: UIF TEMP[8].xxxx :2 728: MOV TEMP[8].x, IMM[0].xxxx 729: ELSE :2 730: MOV TEMP[8].x, TEMP[5].xxxx 731: ENDIF 732: MUL TEMP[5].x, CONST[1][3].xxxx, TEMP[3].wwww 733: ADD TEMP[5].x, IMM[6].xxxx, -TEMP[5].xxxx 734: MOV_SAT TEMP[5].x, TEMP[5].xxxx 735: FSLT TEMP[3].x, TEMP[3].wwww, IMM[0].xxxx 736: UIF TEMP[3].xxxx :2 737: MOV TEMP[3].x, IMM[0].xxxx 738: ELSE :2 739: MOV TEMP[3].x, TEMP[5].xxxx 740: ENDIF 741: MOV TEMP[5].x, TEMP[6].xxxx 742: MOV TEMP[5].y, TEMP[7].xxxx 743: MOV TEMP[5].z, TEMP[8].xxxx 744: MOV TEMP[5].w, TEMP[3].xxxx 745: RCP TEMP[3].x, CONST[2][23].xxxx 746: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx 747: MUL TEMP[2].x, TEMP[2].xxxx, IMM[8].wwww 748: FLR TEMP[2].x, TEMP[2].xxxx 749: MUL TEMP[3].x, TEMP[2].xxxx, IMM[9].wwww 750: FLR TEMP[3].x, TEMP[3].xxxx 751: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[10].wwww, IMM[11].wwww 752: FLR TEMP[1].xy, TEMP[1].xyyy 753: DP4 TEMP[5].x, TEMP[5], IMM[7].wwww 754: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 755: MUL TEMP[5].x, TEMP[3].xxxx, IMM[12].wwww 756: MOV TEMP[4].y, TEMP[5].xxxx 757: MUL TEMP[3].x, TEMP[3].xxxx, IMM[13].wwww 758: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 759: MUL TEMP[2].x, TEMP[2].xxxx, IMM[12].wwww 760: MOV TEMP[4].z, TEMP[2].xxxx 761: MUL TEMP[2].x, TEMP[1].xxxx, IMM[15].wwww 762: MAD TEMP[1].x, TEMP[1].yyyy, IMM[14].wwww, TEMP[2].xxxx 763: MOV TEMP[4].w, TEMP[1].xxxx 764: MOV OUT[0], TEMP[4] 765: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 48) %35 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = call float @llvm.SI.load.const(<16 x i8> %36, i32 0) %38 = call float @llvm.SI.load.const(<16 x i8> %36, i32 4) %39 = call float @llvm.SI.load.const(<16 x i8> %36, i32 12) %40 = call float @llvm.SI.load.const(<16 x i8> %36, i32 16) %41 = call float @llvm.SI.load.const(<16 x i8> %36, i32 20) %42 = call float @llvm.SI.load.const(<16 x i8> %36, i32 28) %43 = call float @llvm.SI.load.const(<16 x i8> %36, i32 32) %44 = call float @llvm.SI.load.const(<16 x i8> %36, i32 36) %45 = call float @llvm.SI.load.const(<16 x i8> %36, i32 44) %46 = call float @llvm.SI.load.const(<16 x i8> %36, i32 48) %47 = call float @llvm.SI.load.const(<16 x i8> %36, i32 52) %48 = call float @llvm.SI.load.const(<16 x i8> %36, i32 60) %49 = call float @llvm.SI.load.const(<16 x i8> %36, i32 304) %50 = call float @llvm.SI.load.const(<16 x i8> %36, i32 308) %51 = call float @llvm.SI.load.const(<16 x i8> %36, i32 312) %52 = call float @llvm.SI.load.const(<16 x i8> %36, i32 320) %53 = call float @llvm.SI.load.const(<16 x i8> %36, i32 324) %54 = call float @llvm.SI.load.const(<16 x i8> %36, i32 328) %55 = call float @llvm.SI.load.const(<16 x i8> %36, i32 368) %56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0 %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)* %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)* %65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0 %66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)* %68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0 %69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)* %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %73 = bitcast <8 x i32> addrspace(2)* %72 to <32 x i8> addrspace(2)* %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, align 32, !tbaa !0 %75 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %76 = bitcast <4 x i32> addrspace(2)* %75 to <16 x i8> addrspace(2)* %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %83 = fmul float %24, %15 %84 = fadd float %83, %25 %85 = bitcast float %78 to i32 %86 = bitcast float %79 to i32 %87 = insertelement <4 x i32> undef, i32 %85, i32 0 %88 = insertelement <4 x i32> %87, i32 %86, i32 1 %89 = insertelement <4 x i32> %88, i32 0, i32 2 %90 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %89, <32 x i8> %62, <16 x i8> %65, i32 2) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = fadd float %91, -5.000000e-01 %95 = fadd float %92, -5.000000e-01 %96 = fadd float %93, -5.000000e-01 %97 = fmul float %94, 2.000000e+00 %98 = fmul float %95, 2.000000e+00 %99 = fmul float %96, 2.000000e+00 %100 = bitcast float %78 to i32 %101 = bitcast float %79 to i32 %102 = insertelement <4 x i32> undef, i32 %100, i32 0 %103 = insertelement <4 x i32> %102, i32 %101, i32 1 %104 = insertelement <4 x i32> %103, i32 0, i32 2 %105 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %104, <32 x i8> %68, <16 x i8> %71, i32 2) %106 = extractelement <4 x float> %105, i32 0 %107 = fmul float %14, 2.500000e-01 %108 = fmul float %84, 2.500000e-01 %109 = bitcast float %107 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %74, <16 x i8> %77, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = extractelement <4 x float> %113, i32 2 %117 = extractelement <4 x float> %113, i32 3 %118 = fmul float %114, 2.000000e+00 %119 = fadd float %118, -1.000000e+00 %120 = fmul float %115, 2.000000e+00 %121 = fadd float %120, -1.000000e+00 %122 = fmul float %116, 2.000000e+00 %123 = fadd float %122, -1.000000e+00 %124 = fmul float %117, 2.000000e+00 %125 = fadd float %124, -1.000000e+00 %126 = fmul float %119, %119 %127 = fmul float %121, %121 %128 = fadd float %126, %127 %129 = fmul float %123, %123 %130 = fadd float %128, %129 %131 = fmul float %125, %125 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %119, %133 %135 = fmul float %121, %133 %136 = fmul float %123, %133 %137 = fmul float %80, %106 %138 = fadd float %137, %49 %139 = fmul float %81, %106 %140 = fadd float %139, %50 %141 = fmul float %82, %106 %142 = fadd float %141, %51 %143 = fmul float %28, %97 %144 = fadd float %143, %138 %145 = fmul float %28, %98 %146 = fadd float %145, %140 %147 = fmul float %28, %99 %148 = fadd float %147, %142 %149 = fdiv float 1.000000e+00, %55 %150 = fmul float %52, %149 %151 = fmul float %53, %149 %152 = fmul float %54, %149 %153 = fmul float %29, %150 %154 = fmul float %29, %151 %155 = fmul float %29, %152 %156 = fmul float %150, %144 %157 = fmul float %151, %146 %158 = fadd float %157, %156 %159 = fmul float %152, %148 %160 = fadd float %158, %159 %161 = fmul float %150, %49 %162 = fmul float %151, %50 %163 = fadd float %162, %161 %164 = fmul float %152, %51 %165 = fadd float %163, %164 %166 = fsub float %160, %165 %167 = fmul float %37, %30 %168 = fmul float %39, %32 %169 = fadd float %168, %167 %170 = fmul float %40, %30 %171 = fmul float %42, %32 %172 = fadd float %171, %170 %173 = fmul float %43, %30 %174 = fmul float %45, %32 %175 = fadd float %174, %173 %176 = fmul float %38, %31 %177 = fmul float %39, %33 %178 = fadd float %177, %176 %179 = fmul float %41, %31 %180 = fmul float %42, %33 %181 = fadd float %180, %179 %182 = fmul float %44, %31 %183 = fmul float %45, %33 %184 = fadd float %183, %182 %185 = fmul float %169, %29 %186 = fmul float %172, %29 %187 = fmul float %175, %29 %188 = fmul float %46, %30 %189 = fmul float %48, %32 %190 = fadd float %189, %188 %191 = fmul float %169, %144 %192 = fmul float %172, %146 %193 = fadd float %192, %191 %194 = fmul float %175, %148 %195 = fadd float %193, %194 %196 = fadd float %190, %195 %197 = fmul float %29, %178 %198 = fmul float %29, %181 %199 = fmul float %29, %184 %200 = fmul float %47, %31 %201 = fmul float %48, %33 %202 = fadd float %201, %200 %203 = fmul float %178, %144 %204 = fmul float %181, %146 %205 = fadd float %204, %203 %206 = fmul float %184, %148 %207 = fadd float %205, %206 %208 = fadd float %202, %207 %209 = fmul float %29, %39 %210 = fmul float %29, %42 %211 = fmul float %29, %45 %212 = fmul float %39, %144 %213 = fmul float %42, %146 %214 = fadd float %213, %212 %215 = fmul float %45, %148 %216 = fadd float %214, %215 %217 = fadd float %48, %216 %218 = fmul float %134, 0x3FE3AFAD60000000 %219 = fmul float %135, 0x3FB9643020000000 %220 = fadd float %219, %218 %221 = fmul float %136, 0x3FDDFD1DC0000000 %222 = fadd float %220, %221 %223 = fmul float %222, %134 %224 = fmul float %222, %135 %225 = fmul float %222, %136 %226 = fmul float %223, 2.000000e+00 %227 = fmul float %224, 2.000000e+00 %228 = fmul float %225, 2.000000e+00 %229 = fsub float 0x3FE3AFAD60000000, %226 %230 = fsub float 0x3FB9643020000000, %227 %231 = fsub float 0x3FDDFD1DC0000000, %228 %232 = fmul float %97, %229 %233 = fmul float %98, %230 %234 = fadd float %233, %232 %235 = fmul float %99, %231 %236 = fadd float %234, %235 %237 = fmul float %236, %97 %238 = fmul float %236, %98 %239 = fmul float %236, %99 %240 = fmul float %237, 2.000000e+00 %241 = fmul float %238, 2.000000e+00 %242 = fmul float %239, 2.000000e+00 %243 = fsub float %229, %240 %244 = fsub float %230, %241 %245 = fsub float %231, %242 %246 = fmul float %229, %97 %247 = fmul float %230, %98 %248 = fadd float %247, %246 %249 = fmul float %231, %99 %250 = fadd float %248, %249 %251 = fcmp olt float %250, 0.000000e+00 %. = select i1 %251, float %243, float %229 %.165 = select i1 %251, float %244, float %230 %.166 = select i1 %251, float %245, float %231 %252 = fmul float %153, %. %253 = fmul float %154, %.165 %254 = fadd float %252, %253 %255 = fmul float %155, %.166 %256 = fadd float %254, %255 %257 = fadd float %256, %166 %258 = fmul float %., %185 %259 = fmul float %.165, %186 %260 = fadd float %258, %259 %261 = fmul float %.166, %187 %262 = fadd float %260, %261 %263 = fadd float %262, %196 %264 = fmul float %., %197 %265 = fmul float %.165, %198 %266 = fadd float %264, %265 %267 = fmul float %.166, %199 %268 = fadd float %266, %267 %269 = fadd float %268, %208 %270 = fmul float %., %209 %271 = fmul float %.165, %210 %272 = fadd float %270, %271 %273 = fmul float %.166, %211 %274 = fadd float %272, %273 %275 = fadd float %274, %217 %276 = fdiv float 1.000000e+00, %275 %277 = fmul float %263, %276 %278 = fmul float %269, %276 %279 = bitcast float %277 to i32 %280 = bitcast float %278 to i32 %281 = insertelement <2 x i32> undef, i32 %279, i32 0 %282 = insertelement <2 x i32> %281, i32 %280, i32 1 %283 = bitcast <8 x i32> %57 to <32 x i8> %284 = bitcast <4 x i32> %59 to <16 x i8> %285 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %282, <32 x i8> %283, <16 x i8> %284, i32 2) %286 = extractelement <4 x float> %285, i32 0 %287 = extractelement <4 x float> %285, i32 1 %288 = fmul float %286, 0x3FEFE02000000000 %289 = fmul float %287, 0x3F6FE01F80000000 %290 = fadd float %288, %289 %291 = fmul float %134, 0xBFCF92AFA0000000 %292 = fmul float %135, 0x3FDFB71B80000000 %293 = fadd float %292, %291 %294 = fmul float %136, 0x3FE45F8900000000 %295 = fadd float %293, %294 %296 = fmul float %295, %134 %297 = fmul float %295, %135 %298 = fmul float %295, %136 %299 = fmul float %296, 2.000000e+00 %300 = fmul float %297, 2.000000e+00 %301 = fmul float %298, 2.000000e+00 %302 = fsub float 0xBFCF92AFA0000000, %299 %303 = fsub float 0x3FDFB71B80000000, %300 %304 = fsub float 0x3FE45F8900000000, %301 %305 = fmul float %97, %302 %306 = fmul float %98, %303 %307 = fadd float %306, %305 %308 = fmul float %99, %304 %309 = fadd float %307, %308 %310 = fmul float %309, %97 %311 = fmul float %309, %98 %312 = fmul float %309, %99 %313 = fmul float %310, 2.000000e+00 %314 = fmul float %311, 2.000000e+00 %315 = fmul float %312, 2.000000e+00 %316 = fsub float %302, %313 %317 = fsub float %303, %314 %318 = fsub float %304, %315 %319 = fmul float %302, %97 %320 = fmul float %303, %98 %321 = fadd float %320, %319 %322 = fmul float %304, %99 %323 = fadd float %321, %322 %324 = fcmp olt float %323, 0.000000e+00 %temp44.0 = select i1 %324, float %316, float %302 %temp45.0 = select i1 %324, float %317, float %303 %temp46.0 = select i1 %324, float %318, float %304 %325 = fmul float %153, %temp44.0 %326 = fmul float %154, %temp45.0 %327 = fadd float %325, %326 %328 = fmul float %155, %temp46.0 %329 = fadd float %327, %328 %330 = fadd float %329, %166 %331 = fmul float %temp44.0, %185 %332 = fmul float %temp45.0, %186 %333 = fadd float %331, %332 %334 = fmul float %temp46.0, %187 %335 = fadd float %333, %334 %336 = fadd float %335, %196 %337 = fmul float %temp44.0, %197 %338 = fmul float %temp45.0, %198 %339 = fadd float %337, %338 %340 = fmul float %temp46.0, %199 %341 = fadd float %339, %340 %342 = fadd float %341, %208 %343 = fmul float %temp44.0, %209 %344 = fmul float %temp45.0, %210 %345 = fadd float %343, %344 %346 = fmul float %temp46.0, %211 %347 = fadd float %345, %346 %348 = fadd float %347, %217 %349 = fdiv float 1.000000e+00, %348 %350 = fmul float %336, %349 %351 = fmul float %342, %349 %352 = bitcast float %350 to i32 %353 = bitcast float %351 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %57 to <32 x i8> %357 = bitcast <4 x i32> %59 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = extractelement <4 x float> %358, i32 1 %361 = fmul float %359, 0x3FEFE02000000000 %362 = fmul float %360, 0x3F6FE01F80000000 %363 = fadd float %361, %362 %364 = fmul float %134, 0xBFD35F6BA0000000 %365 = fmul float %135, 0x3FC942F620000000 %366 = fadd float %365, %364 %367 = fmul float %136, 0x3FEDBA4D60000000 %368 = fadd float %366, %367 %369 = fmul float %368, %134 %370 = fmul float %368, %135 %371 = fmul float %368, %136 %372 = fmul float %369, 2.000000e+00 %373 = fmul float %370, 2.000000e+00 %374 = fmul float %371, 2.000000e+00 %375 = fsub float 0xBFD35F6BA0000000, %372 %376 = fsub float 0x3FC942F620000000, %373 %377 = fsub float 0x3FEDBA4D60000000, %374 %378 = fmul float %97, %375 %379 = fmul float %98, %376 %380 = fadd float %379, %378 %381 = fmul float %99, %377 %382 = fadd float %380, %381 %383 = fmul float %382, %97 %384 = fmul float %382, %98 %385 = fmul float %382, %99 %386 = fmul float %383, 2.000000e+00 %387 = fmul float %384, 2.000000e+00 %388 = fmul float %385, 2.000000e+00 %389 = fsub float %375, %386 %390 = fsub float %376, %387 %391 = fsub float %377, %388 %392 = fmul float %375, %97 %393 = fmul float %376, %98 %394 = fadd float %393, %392 %395 = fmul float %377, %99 %396 = fadd float %394, %395 %397 = fcmp olt float %396, 0.000000e+00 %.167 = select i1 %397, float %389, float %375 %.168 = select i1 %397, float %390, float %376 %.169 = select i1 %397, float %391, float %377 %398 = fmul float %153, %.167 %399 = fmul float %154, %.168 %400 = fadd float %398, %399 %401 = fmul float %155, %.169 %402 = fadd float %400, %401 %403 = fadd float %402, %166 %404 = fmul float %.167, %185 %405 = fmul float %.168, %186 %406 = fadd float %404, %405 %407 = fmul float %.169, %187 %408 = fadd float %406, %407 %409 = fadd float %408, %196 %410 = fmul float %.167, %197 %411 = fmul float %.168, %198 %412 = fadd float %410, %411 %413 = fmul float %.169, %199 %414 = fadd float %412, %413 %415 = fadd float %414, %208 %416 = fmul float %.167, %209 %417 = fmul float %.168, %210 %418 = fadd float %416, %417 %419 = fmul float %.169, %211 %420 = fadd float %418, %419 %421 = fadd float %420, %217 %422 = fdiv float 1.000000e+00, %421 %423 = fmul float %409, %422 %424 = fmul float %415, %422 %425 = bitcast float %423 to i32 %426 = bitcast float %424 to i32 %427 = insertelement <2 x i32> undef, i32 %425, i32 0 %428 = insertelement <2 x i32> %427, i32 %426, i32 1 %429 = bitcast <8 x i32> %57 to <32 x i8> %430 = bitcast <4 x i32> %59 to <16 x i8> %431 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2) %432 = extractelement <4 x float> %431, i32 0 %433 = extractelement <4 x float> %431, i32 1 %434 = fmul float %432, 0x3FEFE02000000000 %435 = fmul float %433, 0x3F6FE01F80000000 %436 = fadd float %434, %435 %437 = fmul float %134, 0x3FD51ACDE0000000 %438 = fmul float %135, 0xBFCF378680000000 %439 = fadd float %438, %437 %440 = fmul float %136, 0x3FDD070760000000 %441 = fadd float %439, %440 %442 = fmul float %441, %134 %443 = fmul float %441, %135 %444 = fmul float %441, %136 %445 = fmul float %442, 2.000000e+00 %446 = fmul float %443, 2.000000e+00 %447 = fmul float %444, 2.000000e+00 %448 = fsub float 0x3FD51ACDE0000000, %445 %449 = fsub float 0xBFCF378680000000, %446 %450 = fsub float 0x3FDD070760000000, %447 %451 = fmul float %97, %448 %452 = fmul float %98, %449 %453 = fadd float %452, %451 %454 = fmul float %99, %450 %455 = fadd float %453, %454 %456 = fmul float %455, %97 %457 = fmul float %455, %98 %458 = fmul float %455, %99 %459 = fmul float %456, 2.000000e+00 %460 = fmul float %457, 2.000000e+00 %461 = fmul float %458, 2.000000e+00 %462 = fsub float %448, %459 %463 = fsub float %449, %460 %464 = fsub float %450, %461 %465 = fmul float %448, %97 %466 = fmul float %449, %98 %467 = fadd float %466, %465 %468 = fmul float %450, %99 %469 = fadd float %467, %468 %470 = fcmp olt float %469, 0.000000e+00 %temp60.0 = select i1 %470, float %462, float %448 %temp61.0 = select i1 %470, float %463, float %449 %temp62.0 = select i1 %470, float %464, float %450 %471 = fmul float %153, %temp60.0 %472 = fmul float %154, %temp61.0 %473 = fadd float %471, %472 %474 = fmul float %155, %temp62.0 %475 = fadd float %473, %474 %476 = fadd float %475, %166 %477 = fmul float %temp60.0, %185 %478 = fmul float %temp61.0, %186 %479 = fadd float %477, %478 %480 = fmul float %temp62.0, %187 %481 = fadd float %479, %480 %482 = fadd float %481, %196 %483 = fmul float %temp60.0, %197 %484 = fmul float %temp61.0, %198 %485 = fadd float %483, %484 %486 = fmul float %temp62.0, %199 %487 = fadd float %485, %486 %488 = fadd float %487, %208 %489 = fmul float %temp60.0, %209 %490 = fmul float %temp61.0, %210 %491 = fadd float %489, %490 %492 = fmul float %temp62.0, %211 %493 = fadd float %491, %492 %494 = fadd float %493, %217 %495 = fdiv float 1.000000e+00, %494 %496 = fmul float %482, %495 %497 = fmul float %488, %495 %498 = bitcast float %496 to i32 %499 = bitcast float %497 to i32 %500 = insertelement <2 x i32> undef, i32 %498, i32 0 %501 = insertelement <2 x i32> %500, i32 %499, i32 1 %502 = bitcast <8 x i32> %57 to <32 x i8> %503 = bitcast <4 x i32> %59 to <16 x i8> %504 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %501, <32 x i8> %502, <16 x i8> %503, i32 2) %505 = extractelement <4 x float> %504, i32 0 %506 = extractelement <4 x float> %504, i32 1 %507 = fmul float %505, 0x3FEFE02000000000 %508 = fmul float %506, 0x3F6FE01F80000000 %509 = fadd float %507, %508 %510 = fsub float %257, %290 %511 = fsub float %330, %363 %512 = fsub float %403, %436 %513 = fsub float %476, %509 %514 = fadd float %510, 0xBF59000120000000 %515 = fadd float %511, 0xBF59000120000000 %516 = fadd float %512, 0xBF59000120000000 %517 = fadd float %513, 0xBF59000120000000 %518 = fmul float %34, %514 %519 = fsub float 1.000000e+00, %518 %520 = call float @llvm.AMDIL.clamp.(float %519, float 0.000000e+00, float 1.000000e+00) %521 = fcmp olt float %514, 0.000000e+00 %522 = fmul float %34, %515 %523 = fsub float 1.000000e+00, %522 %524 = call float @llvm.AMDIL.clamp.(float %523, float 0.000000e+00, float 1.000000e+00) %525 = fcmp olt float %515, 0.000000e+00 %526 = fmul float %34, %516 %527 = fsub float 1.000000e+00, %526 %528 = call float @llvm.AMDIL.clamp.(float %527, float 0.000000e+00, float 1.000000e+00) %529 = fcmp olt float %516, 0.000000e+00 %530 = fmul float %34, %517 %531 = fsub float 1.000000e+00, %530 %532 = call float @llvm.AMDIL.clamp.(float %531, float 0.000000e+00, float 1.000000e+00) %533 = fcmp olt float %517, 0.000000e+00 %.op = fmul float %520, 6.250000e-02 %534 = select i1 %521, float 0.000000e+00, float %.op %.op196 = fmul float %524, 6.250000e-02 %535 = select i1 %525, float 0.000000e+00, float %.op196 %536 = fadd float %534, %535 %.op197 = fmul float %528, 6.250000e-02 %537 = select i1 %529, float 0.000000e+00, float %.op197 %538 = fadd float %536, %537 %.op198 = fmul float %532, 6.250000e-02 %539 = select i1 %533, float 0.000000e+00, float %.op198 %540 = fadd float %538, %539 %541 = fmul float %134, 0x3FCD4A98A0000000 %542 = fmul float %135, 0x3FD28A04E0000000 %543 = fadd float %542, %541 %544 = fmul float %136, 0x3FC10B0F20000000 %545 = fadd float %543, %544 %546 = fmul float %545, %134 %547 = fmul float %545, %135 %548 = fmul float %545, %136 %549 = fmul float %546, 2.000000e+00 %550 = fmul float %547, 2.000000e+00 %551 = fmul float %548, 2.000000e+00 %552 = fsub float 0x3FCD4A98A0000000, %549 %553 = fsub float 0x3FD28A04E0000000, %550 %554 = fsub float 0x3FC10B0F20000000, %551 %555 = fmul float %97, %552 %556 = fmul float %98, %553 %557 = fadd float %556, %555 %558 = fmul float %99, %554 %559 = fadd float %557, %558 %560 = fmul float %559, %97 %561 = fmul float %559, %98 %562 = fmul float %559, %99 %563 = fmul float %560, 2.000000e+00 %564 = fmul float %561, 2.000000e+00 %565 = fmul float %562, 2.000000e+00 %566 = fsub float %552, %563 %567 = fsub float %553, %564 %568 = fsub float %554, %565 %569 = fmul float %552, %97 %570 = fmul float %553, %98 %571 = fadd float %570, %569 %572 = fmul float %554, %99 %573 = fadd float %571, %572 %574 = fcmp olt float %573, 0.000000e+00 %.172 = select i1 %574, float %566, float %552 %.173 = select i1 %574, float %567, float %553 %.174 = select i1 %574, float %568, float %554 %575 = fmul float %153, %.172 %576 = fmul float %154, %.173 %577 = fadd float %575, %576 %578 = fmul float %155, %.174 %579 = fadd float %577, %578 %580 = fadd float %579, %166 %581 = fmul float %.172, %185 %582 = fmul float %.173, %186 %583 = fadd float %581, %582 %584 = fmul float %.174, %187 %585 = fadd float %583, %584 %586 = fadd float %585, %196 %587 = fmul float %.172, %197 %588 = fmul float %.173, %198 %589 = fadd float %587, %588 %590 = fmul float %.174, %199 %591 = fadd float %589, %590 %592 = fadd float %591, %208 %593 = fmul float %.172, %209 %594 = fmul float %.173, %210 %595 = fadd float %593, %594 %596 = fmul float %.174, %211 %597 = fadd float %595, %596 %598 = fadd float %597, %217 %599 = fdiv float 1.000000e+00, %598 %600 = fmul float %586, %599 %601 = fmul float %592, %599 %602 = bitcast float %600 to i32 %603 = bitcast float %601 to i32 %604 = insertelement <2 x i32> undef, i32 %602, i32 0 %605 = insertelement <2 x i32> %604, i32 %603, i32 1 %606 = bitcast <8 x i32> %57 to <32 x i8> %607 = bitcast <4 x i32> %59 to <16 x i8> %608 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %605, <32 x i8> %606, <16 x i8> %607, i32 2) %609 = extractelement <4 x float> %608, i32 0 %610 = extractelement <4 x float> %608, i32 1 %611 = fmul float %609, 0x3FEFE02000000000 %612 = fmul float %610, 0x3F6FE01F80000000 %613 = fadd float %611, %612 %614 = fmul float %134, 0xBFCC54D620000000 %615 = fmul float %135, 0xBFC2FF10E0000000 %616 = fadd float %615, %614 %617 = fmul float %136, 0x3FC0DD6160000000 %618 = fadd float %616, %617 %619 = fmul float %618, %134 %620 = fmul float %618, %135 %621 = fmul float %618, %136 %622 = fmul float %619, 2.000000e+00 %623 = fmul float %620, 2.000000e+00 %624 = fmul float %621, 2.000000e+00 %625 = fsub float 0xBFCC54D620000000, %622 %626 = fsub float 0xBFC2FF10E0000000, %623 %627 = fsub float 0x3FC0DD6160000000, %624 %628 = fmul float %97, %625 %629 = fmul float %98, %626 %630 = fadd float %629, %628 %631 = fmul float %99, %627 %632 = fadd float %630, %631 %633 = fmul float %632, %97 %634 = fmul float %632, %98 %635 = fmul float %632, %99 %636 = fmul float %633, 2.000000e+00 %637 = fmul float %634, 2.000000e+00 %638 = fmul float %635, 2.000000e+00 %639 = fsub float %625, %636 %640 = fsub float %626, %637 %641 = fsub float %627, %638 %642 = fmul float %625, %97 %643 = fmul float %626, %98 %644 = fadd float %643, %642 %645 = fmul float %627, %99 %646 = fadd float %644, %645 %647 = fcmp olt float %646, 0.000000e+00 %temp48.1 = select i1 %647, float %639, float %625 %temp49.0 = select i1 %647, float %640, float %626 %temp50.0 = select i1 %647, float %641, float %627 %648 = fmul float %153, %temp48.1 %649 = fmul float %154, %temp49.0 %650 = fadd float %648, %649 %651 = fmul float %155, %temp50.0 %652 = fadd float %650, %651 %653 = fadd float %652, %166 %654 = fmul float %temp48.1, %185 %655 = fmul float %temp49.0, %186 %656 = fadd float %654, %655 %657 = fmul float %temp50.0, %187 %658 = fadd float %656, %657 %659 = fadd float %658, %196 %660 = fmul float %temp48.1, %197 %661 = fmul float %temp49.0, %198 %662 = fadd float %660, %661 %663 = fmul float %temp50.0, %199 %664 = fadd float %662, %663 %665 = fadd float %664, %208 %666 = fmul float %temp48.1, %209 %667 = fmul float %temp49.0, %210 %668 = fadd float %666, %667 %669 = fmul float %temp50.0, %211 %670 = fadd float %668, %669 %671 = fadd float %670, %217 %672 = fdiv float 1.000000e+00, %671 %673 = fmul float %659, %672 %674 = fmul float %665, %672 %675 = bitcast float %673 to i32 %676 = bitcast float %674 to i32 %677 = insertelement <2 x i32> undef, i32 %675, i32 0 %678 = insertelement <2 x i32> %677, i32 %676, i32 1 %679 = bitcast <8 x i32> %57 to <32 x i8> %680 = bitcast <4 x i32> %59 to <16 x i8> %681 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %678, <32 x i8> %679, <16 x i8> %680, i32 2) %682 = extractelement <4 x float> %681, i32 0 %683 = extractelement <4 x float> %681, i32 1 %684 = fmul float %682, 0x3FEFE02000000000 %685 = fmul float %683, 0x3F6FE01F80000000 %686 = fadd float %684, %685 %687 = fmul float %134, 0xBFDD48CF80000000 %688 = fmul float %135, 0xBFE5517C60000000 %689 = fadd float %688, %687 %690 = fmul float %136, 0x3FCD5F1380000000 %691 = fadd float %689, %690 %692 = fmul float %691, %134 %693 = fmul float %691, %135 %694 = fmul float %691, %136 %695 = fmul float %692, 2.000000e+00 %696 = fmul float %693, 2.000000e+00 %697 = fmul float %694, 2.000000e+00 %698 = fsub float 0xBFDD48CF80000000, %695 %699 = fsub float 0xBFE5517C60000000, %696 %700 = fsub float 0x3FCD5F1380000000, %697 %701 = fmul float %97, %698 %702 = fmul float %98, %699 %703 = fadd float %702, %701 %704 = fmul float %99, %700 %705 = fadd float %703, %704 %706 = fmul float %705, %97 %707 = fmul float %705, %98 %708 = fmul float %705, %99 %709 = fmul float %706, 2.000000e+00 %710 = fmul float %707, 2.000000e+00 %711 = fmul float %708, 2.000000e+00 %712 = fsub float %698, %709 %713 = fsub float %699, %710 %714 = fsub float %700, %711 %715 = fmul float %698, %97 %716 = fmul float %699, %98 %717 = fadd float %716, %715 %718 = fmul float %700, %99 %719 = fadd float %717, %718 %720 = fcmp olt float %719, 0.000000e+00 %.175 = select i1 %720, float %712, float %698 %.176 = select i1 %720, float %713, float %699 %.177 = select i1 %720, float %714, float %700 %721 = fmul float %153, %.175 %722 = fmul float %154, %.176 %723 = fadd float %721, %722 %724 = fmul float %155, %.177 %725 = fadd float %723, %724 %726 = fadd float %725, %166 %727 = fmul float %.175, %185 %728 = fmul float %.176, %186 %729 = fadd float %727, %728 %730 = fmul float %.177, %187 %731 = fadd float %729, %730 %732 = fadd float %731, %196 %733 = fmul float %.175, %197 %734 = fmul float %.176, %198 %735 = fadd float %733, %734 %736 = fmul float %.177, %199 %737 = fadd float %735, %736 %738 = fadd float %737, %208 %739 = fmul float %.175, %209 %740 = fmul float %.176, %210 %741 = fadd float %739, %740 %742 = fmul float %.177, %211 %743 = fadd float %741, %742 %744 = fadd float %743, %217 %745 = fdiv float 1.000000e+00, %744 %746 = fmul float %732, %745 %747 = fmul float %738, %745 %748 = bitcast float %746 to i32 %749 = bitcast float %747 to i32 %750 = insertelement <2 x i32> undef, i32 %748, i32 0 %751 = insertelement <2 x i32> %750, i32 %749, i32 1 %752 = bitcast <8 x i32> %57 to <32 x i8> %753 = bitcast <4 x i32> %59 to <16 x i8> %754 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %751, <32 x i8> %752, <16 x i8> %753, i32 2) %755 = extractelement <4 x float> %754, i32 0 %756 = extractelement <4 x float> %754, i32 1 %757 = fmul float %755, 0x3FEFE02000000000 %758 = fmul float %756, 0x3F6FE01F80000000 %759 = fadd float %757, %758 %760 = fmul float %134, 0xBFB2E9EE40000000 %761 = fmul float %135, 0x3FAC2ACB80000000 %762 = fadd float %761, %760 %763 = fmul float %136, 0xBF83DE1E20000000 %764 = fadd float %762, %763 %765 = fmul float %764, %134 %766 = fmul float %764, %135 %767 = fmul float %764, %136 %768 = fmul float %765, 2.000000e+00 %769 = fmul float %766, 2.000000e+00 %770 = fmul float %767, 2.000000e+00 %771 = fsub float 0xBFB2E9EE40000000, %768 %772 = fsub float 0x3FAC2ACB80000000, %769 %773 = fsub float 0xBF83DE1E20000000, %770 %774 = fmul float %97, %771 %775 = fmul float %98, %772 %776 = fadd float %775, %774 %777 = fmul float %99, %773 %778 = fadd float %776, %777 %779 = fmul float %778, %97 %780 = fmul float %778, %98 %781 = fmul float %778, %99 %782 = fmul float %779, 2.000000e+00 %783 = fmul float %780, 2.000000e+00 %784 = fmul float %781, 2.000000e+00 %785 = fsub float %771, %782 %786 = fsub float %772, %783 %787 = fsub float %773, %784 %788 = fmul float %771, %97 %789 = fmul float %772, %98 %790 = fadd float %789, %788 %791 = fmul float %773, %99 %792 = fadd float %790, %791 %793 = fcmp olt float %792, 0.000000e+00 %temp64.0 = select i1 %793, float %785, float %771 %temp65.0 = select i1 %793, float %786, float %772 %temp66.0 = select i1 %793, float %787, float %773 %794 = fmul float %153, %temp64.0 %795 = fmul float %154, %temp65.0 %796 = fadd float %794, %795 %797 = fmul float %155, %temp66.0 %798 = fadd float %796, %797 %799 = fadd float %798, %166 %800 = fmul float %temp64.0, %185 %801 = fmul float %temp65.0, %186 %802 = fadd float %800, %801 %803 = fmul float %temp66.0, %187 %804 = fadd float %802, %803 %805 = fadd float %804, %196 %806 = fmul float %temp64.0, %197 %807 = fmul float %temp65.0, %198 %808 = fadd float %806, %807 %809 = fmul float %temp66.0, %199 %810 = fadd float %808, %809 %811 = fadd float %810, %208 %812 = fmul float %temp64.0, %209 %813 = fmul float %temp65.0, %210 %814 = fadd float %812, %813 %815 = fmul float %temp66.0, %211 %816 = fadd float %814, %815 %817 = fadd float %816, %217 %818 = fdiv float 1.000000e+00, %817 %819 = fmul float %805, %818 %820 = fmul float %811, %818 %821 = bitcast float %819 to i32 %822 = bitcast float %820 to i32 %823 = insertelement <2 x i32> undef, i32 %821, i32 0 %824 = insertelement <2 x i32> %823, i32 %822, i32 1 %825 = bitcast <8 x i32> %57 to <32 x i8> %826 = bitcast <4 x i32> %59 to <16 x i8> %827 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %824, <32 x i8> %825, <16 x i8> %826, i32 2) %828 = extractelement <4 x float> %827, i32 0 %829 = extractelement <4 x float> %827, i32 1 %830 = fmul float %828, 0x3FEFE02000000000 %831 = fmul float %829, 0x3F6FE01F80000000 %832 = fadd float %830, %831 %833 = fsub float %580, %613 %834 = fsub float %653, %686 %835 = fsub float %726, %759 %836 = fsub float %799, %832 %837 = fadd float %833, 0xBF59000120000000 %838 = fadd float %834, 0xBF59000120000000 %839 = fadd float %835, 0xBF59000120000000 %840 = fadd float %836, 0xBF59000120000000 %841 = fmul float %34, %837 %842 = fsub float 1.000000e+00, %841 %843 = call float @llvm.AMDIL.clamp.(float %842, float 0.000000e+00, float 1.000000e+00) %844 = fcmp olt float %837, 0.000000e+00 %845 = fmul float %34, %838 %846 = fsub float 1.000000e+00, %845 %847 = call float @llvm.AMDIL.clamp.(float %846, float 0.000000e+00, float 1.000000e+00) %848 = fcmp olt float %838, 0.000000e+00 %849 = fmul float %34, %839 %850 = fsub float 1.000000e+00, %849 %851 = call float @llvm.AMDIL.clamp.(float %850, float 0.000000e+00, float 1.000000e+00) %852 = fcmp olt float %839, 0.000000e+00 %853 = fmul float %34, %840 %854 = fsub float 1.000000e+00, %853 %855 = call float @llvm.AMDIL.clamp.(float %854, float 0.000000e+00, float 1.000000e+00) %856 = fcmp olt float %840, 0.000000e+00 %.op199 = fmul float %843, 6.250000e-02 %857 = select i1 %844, float 0.000000e+00, float %.op199 %.op200 = fmul float %847, 6.250000e-02 %858 = select i1 %848, float 0.000000e+00, float %.op200 %859 = fadd float %857, %858 %.op201 = fmul float %851, 6.250000e-02 %860 = select i1 %852, float 0.000000e+00, float %.op201 %861 = fadd float %859, %860 %.op202 = fmul float %855, 6.250000e-02 %862 = select i1 %856, float 0.000000e+00, float %.op202 %863 = fadd float %861, %862 %864 = fadd float %540, %863 %865 = fmul float %134, 0xBFA398A660000000 %866 = fmul float %135, 0xBFA4790B80000000 %867 = fadd float %866, %865 %868 = fmul float %136, 0x3F841B75A0000000 %869 = fadd float %867, %868 %870 = fmul float %869, %134 %871 = fmul float %869, %135 %872 = fmul float %869, %136 %873 = fmul float %870, 2.000000e+00 %874 = fmul float %871, 2.000000e+00 %875 = fmul float %872, 2.000000e+00 %876 = fsub float 0xBFA398A660000000, %873 %877 = fsub float 0xBFA4790B80000000, %874 %878 = fsub float 0x3F841B75A0000000, %875 %879 = fmul float %97, %876 %880 = fmul float %98, %877 %881 = fadd float %880, %879 %882 = fmul float %99, %878 %883 = fadd float %881, %882 %884 = fmul float %883, %97 %885 = fmul float %883, %98 %886 = fmul float %883, %99 %887 = fmul float %884, 2.000000e+00 %888 = fmul float %885, 2.000000e+00 %889 = fmul float %886, 2.000000e+00 %890 = fsub float %876, %887 %891 = fsub float %877, %888 %892 = fsub float %878, %889 %893 = fmul float %876, %97 %894 = fmul float %877, %98 %895 = fadd float %894, %893 %896 = fmul float %878, %99 %897 = fadd float %895, %896 %898 = fcmp olt float %897, 0.000000e+00 %.180 = select i1 %898, float %890, float %876 %.181 = select i1 %898, float %891, float %877 %.182 = select i1 %898, float %892, float %878 %899 = fmul float %153, %.180 %900 = fmul float %154, %.181 %901 = fadd float %899, %900 %902 = fmul float %155, %.182 %903 = fadd float %901, %902 %904 = fadd float %903, %166 %905 = fmul float %.180, %185 %906 = fmul float %.181, %186 %907 = fadd float %905, %906 %908 = fmul float %.182, %187 %909 = fadd float %907, %908 %910 = fadd float %909, %196 %911 = fmul float %.180, %197 %912 = fmul float %.181, %198 %913 = fadd float %911, %912 %914 = fmul float %.182, %199 %915 = fadd float %913, %914 %916 = fadd float %915, %208 %917 = fmul float %.180, %209 %918 = fmul float %.181, %210 %919 = fadd float %917, %918 %920 = fmul float %.182, %211 %921 = fadd float %919, %920 %922 = fadd float %921, %217 %923 = fdiv float 1.000000e+00, %922 %924 = fmul float %910, %923 %925 = fmul float %916, %923 %926 = bitcast float %924 to i32 %927 = bitcast float %925 to i32 %928 = insertelement <2 x i32> undef, i32 %926, i32 0 %929 = insertelement <2 x i32> %928, i32 %927, i32 1 %930 = bitcast <8 x i32> %57 to <32 x i8> %931 = bitcast <4 x i32> %59 to <16 x i8> %932 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %929, <32 x i8> %930, <16 x i8> %931, i32 2) %933 = extractelement <4 x float> %932, i32 0 %934 = extractelement <4 x float> %932, i32 1 %935 = fmul float %933, 0x3FEFE02000000000 %936 = fmul float %934, 0x3F6FE01F80000000 %937 = fadd float %935, %936 %938 = fmul float %134, 0x3FE7D86220000000 %939 = fmul float %135, 0x3FD4B7CFE0000000 %940 = fadd float %939, %938 %941 = fmul float %136, 0xBFDB761880000000 %942 = fadd float %940, %941 %943 = fmul float %942, %134 %944 = fmul float %942, %135 %945 = fmul float %942, %136 %946 = fmul float %943, 2.000000e+00 %947 = fmul float %944, 2.000000e+00 %948 = fmul float %945, 2.000000e+00 %949 = fsub float 0x3FE7D86220000000, %946 %950 = fsub float 0x3FD4B7CFE0000000, %947 %951 = fsub float 0xBFDB761880000000, %948 %952 = fmul float %97, %949 %953 = fmul float %98, %950 %954 = fadd float %953, %952 %955 = fmul float %99, %951 %956 = fadd float %954, %955 %957 = fmul float %956, %97 %958 = fmul float %956, %98 %959 = fmul float %956, %99 %960 = fmul float %957, 2.000000e+00 %961 = fmul float %958, 2.000000e+00 %962 = fmul float %959, 2.000000e+00 %963 = fsub float %949, %960 %964 = fsub float %950, %961 %965 = fsub float %951, %962 %966 = fmul float %949, %97 %967 = fmul float %950, %98 %968 = fadd float %967, %966 %969 = fmul float %951, %99 %970 = fadd float %968, %969 %971 = fcmp olt float %970, 0.000000e+00 %temp48.3 = select i1 %971, float %963, float %949 %temp49.1 = select i1 %971, float %964, float %950 %temp50.1 = select i1 %971, float %965, float %951 %972 = fmul float %153, %temp48.3 %973 = fmul float %154, %temp49.1 %974 = fadd float %972, %973 %975 = fmul float %155, %temp50.1 %976 = fadd float %974, %975 %977 = fadd float %976, %166 %978 = fmul float %temp48.3, %185 %979 = fmul float %temp49.1, %186 %980 = fadd float %978, %979 %981 = fmul float %temp50.1, %187 %982 = fadd float %980, %981 %983 = fadd float %982, %196 %984 = fmul float %temp48.3, %197 %985 = fmul float %temp49.1, %198 %986 = fadd float %984, %985 %987 = fmul float %temp50.1, %199 %988 = fadd float %986, %987 %989 = fadd float %988, %208 %990 = fmul float %temp48.3, %209 %991 = fmul float %temp49.1, %210 %992 = fadd float %990, %991 %993 = fmul float %temp50.1, %211 %994 = fadd float %992, %993 %995 = fadd float %994, %217 %996 = fdiv float 1.000000e+00, %995 %997 = fmul float %983, %996 %998 = fmul float %989, %996 %999 = bitcast float %997 to i32 %1000 = bitcast float %998 to i32 %1001 = insertelement <2 x i32> undef, i32 %999, i32 0 %1002 = insertelement <2 x i32> %1001, i32 %1000, i32 1 %1003 = bitcast <8 x i32> %57 to <32 x i8> %1004 = bitcast <4 x i32> %59 to <16 x i8> %1005 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1002, <32 x i8> %1003, <16 x i8> %1004, i32 2) %1006 = extractelement <4 x float> %1005, i32 0 %1007 = extractelement <4 x float> %1005, i32 1 %1008 = fmul float %1006, 0x3FEFE02000000000 %1009 = fmul float %1007, 0x3F6FE01F80000000 %1010 = fadd float %1008, %1009 %1011 = fmul float %134, 0xBF8F82B200000000 %1012 = fmul float %135, 0x3FD11D7DC0000000 %1013 = fadd float %1012, %1011 %1014 = fmul float %136, 0xBFB0E7BC40000000 %1015 = fadd float %1013, %1014 %1016 = fmul float %1015, %134 %1017 = fmul float %1015, %135 %1018 = fmul float %1015, %136 %1019 = fmul float %1016, 2.000000e+00 %1020 = fmul float %1017, 2.000000e+00 %1021 = fmul float %1018, 2.000000e+00 %1022 = fsub float 0xBF8F82B200000000, %1019 %1023 = fsub float 0x3FD11D7DC0000000, %1020 %1024 = fsub float 0xBFB0E7BC40000000, %1021 %1025 = fmul float %97, %1022 %1026 = fmul float %98, %1023 %1027 = fadd float %1026, %1025 %1028 = fmul float %99, %1024 %1029 = fadd float %1027, %1028 %1030 = fmul float %1029, %97 %1031 = fmul float %1029, %98 %1032 = fmul float %1029, %99 %1033 = fmul float %1030, 2.000000e+00 %1034 = fmul float %1031, 2.000000e+00 %1035 = fmul float %1032, 2.000000e+00 %1036 = fsub float %1022, %1033 %1037 = fsub float %1023, %1034 %1038 = fsub float %1024, %1035 %1039 = fmul float %1022, %97 %1040 = fmul float %1023, %98 %1041 = fadd float %1040, %1039 %1042 = fmul float %1024, %99 %1043 = fadd float %1041, %1042 %1044 = fcmp olt float %1043, 0.000000e+00 %.183 = select i1 %1044, float %1036, float %1022 %.184 = select i1 %1044, float %1037, float %1023 %.185 = select i1 %1044, float %1038, float %1024 %1045 = fmul float %153, %.183 %1046 = fmul float %154, %.184 %1047 = fadd float %1045, %1046 %1048 = fmul float %155, %.185 %1049 = fadd float %1047, %1048 %1050 = fadd float %1049, %166 %1051 = fmul float %.183, %185 %1052 = fmul float %.184, %186 %1053 = fadd float %1051, %1052 %1054 = fmul float %.185, %187 %1055 = fadd float %1053, %1054 %1056 = fadd float %1055, %196 %1057 = fmul float %.183, %197 %1058 = fmul float %.184, %198 %1059 = fadd float %1057, %1058 %1060 = fmul float %.185, %199 %1061 = fadd float %1059, %1060 %1062 = fadd float %1061, %208 %1063 = fmul float %.183, %209 %1064 = fmul float %.184, %210 %1065 = fadd float %1063, %1064 %1066 = fmul float %.185, %211 %1067 = fadd float %1065, %1066 %1068 = fadd float %1067, %217 %1069 = fdiv float 1.000000e+00, %1068 %1070 = fmul float %1056, %1069 %1071 = fmul float %1062, %1069 %1072 = bitcast float %1070 to i32 %1073 = bitcast float %1071 to i32 %1074 = insertelement <2 x i32> undef, i32 %1072, i32 0 %1075 = insertelement <2 x i32> %1074, i32 %1073, i32 1 %1076 = bitcast <8 x i32> %57 to <32 x i8> %1077 = bitcast <4 x i32> %59 to <16 x i8> %1078 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1075, <32 x i8> %1076, <16 x i8> %1077, i32 2) %1079 = extractelement <4 x float> %1078, i32 0 %1080 = extractelement <4 x float> %1078, i32 1 %1081 = fmul float %1079, 0x3FEFE02000000000 %1082 = fmul float %1080, 0x3F6FE01F80000000 %1083 = fadd float %1081, %1082 %1084 = fmul float %134, 0xBFB89EFD80000000 %1085 = fmul float %135, 0xBFE0BBE660000000 %1086 = fadd float %1085, %1084 %1087 = fmul float %136, 0xBFD00DD820000000 %1088 = fadd float %1086, %1087 %1089 = fmul float %1088, %134 %1090 = fmul float %1088, %135 %1091 = fmul float %1088, %136 %1092 = fmul float %1089, 2.000000e+00 %1093 = fmul float %1090, 2.000000e+00 %1094 = fmul float %1091, 2.000000e+00 %1095 = fsub float 0xBFB89EFD80000000, %1092 %1096 = fsub float 0xBFE0BBE660000000, %1093 %1097 = fsub float 0xBFD00DD820000000, %1094 %1098 = fmul float %97, %1095 %1099 = fmul float %98, %1096 %1100 = fadd float %1099, %1098 %1101 = fmul float %99, %1097 %1102 = fadd float %1100, %1101 %1103 = fmul float %1102, %97 %1104 = fmul float %1102, %98 %1105 = fmul float %1102, %99 %1106 = fmul float %1103, 2.000000e+00 %1107 = fmul float %1104, 2.000000e+00 %1108 = fmul float %1105, 2.000000e+00 %1109 = fsub float %1095, %1106 %1110 = fsub float %1096, %1107 %1111 = fsub float %1097, %1108 %1112 = fmul float %1095, %97 %1113 = fmul float %1096, %98 %1114 = fadd float %1113, %1112 %1115 = fmul float %1097, %99 %1116 = fadd float %1114, %1115 %1117 = fcmp olt float %1116, 0.000000e+00 %temp64.1 = select i1 %1117, float %1109, float %1095 %temp65.1 = select i1 %1117, float %1110, float %1096 %temp66.1 = select i1 %1117, float %1111, float %1097 %1118 = fmul float %153, %temp64.1 %1119 = fmul float %154, %temp65.1 %1120 = fadd float %1118, %1119 %1121 = fmul float %155, %temp66.1 %1122 = fadd float %1120, %1121 %1123 = fadd float %1122, %166 %1124 = fmul float %temp64.1, %185 %1125 = fmul float %temp65.1, %186 %1126 = fadd float %1124, %1125 %1127 = fmul float %temp66.1, %187 %1128 = fadd float %1126, %1127 %1129 = fadd float %1128, %196 %1130 = fmul float %temp64.1, %197 %1131 = fmul float %temp65.1, %198 %1132 = fadd float %1130, %1131 %1133 = fmul float %temp66.1, %199 %1134 = fadd float %1132, %1133 %1135 = fadd float %1134, %208 %1136 = fmul float %temp64.1, %209 %1137 = fmul float %temp65.1, %210 %1138 = fadd float %1136, %1137 %1139 = fmul float %temp66.1, %211 %1140 = fadd float %1138, %1139 %1141 = fadd float %1140, %217 %1142 = fdiv float 1.000000e+00, %1141 %1143 = fmul float %1129, %1142 %1144 = fmul float %1135, %1142 %1145 = bitcast float %1143 to i32 %1146 = bitcast float %1144 to i32 %1147 = insertelement <2 x i32> undef, i32 %1145, i32 0 %1148 = insertelement <2 x i32> %1147, i32 %1146, i32 1 %1149 = bitcast <8 x i32> %57 to <32 x i8> %1150 = bitcast <4 x i32> %59 to <16 x i8> %1151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1148, <32 x i8> %1149, <16 x i8> %1150, i32 2) %1152 = extractelement <4 x float> %1151, i32 0 %1153 = extractelement <4 x float> %1151, i32 1 %1154 = fmul float %1152, 0x3FEFE02000000000 %1155 = fmul float %1153, 0x3F6FE01F80000000 %1156 = fadd float %1154, %1155 %1157 = fsub float %904, %937 %1158 = fsub float %977, %1010 %1159 = fsub float %1050, %1083 %1160 = fsub float %1123, %1156 %1161 = fadd float %1157, 0xBF59000120000000 %1162 = fadd float %1158, 0xBF59000120000000 %1163 = fadd float %1159, 0xBF59000120000000 %1164 = fadd float %1160, 0xBF59000120000000 %1165 = fmul float %34, %1161 %1166 = fsub float 1.000000e+00, %1165 %1167 = call float @llvm.AMDIL.clamp.(float %1166, float 0.000000e+00, float 1.000000e+00) %1168 = fcmp olt float %1161, 0.000000e+00 %1169 = fmul float %34, %1162 %1170 = fsub float 1.000000e+00, %1169 %1171 = call float @llvm.AMDIL.clamp.(float %1170, float 0.000000e+00, float 1.000000e+00) %1172 = fcmp olt float %1162, 0.000000e+00 %1173 = fmul float %34, %1163 %1174 = fsub float 1.000000e+00, %1173 %1175 = call float @llvm.AMDIL.clamp.(float %1174, float 0.000000e+00, float 1.000000e+00) %1176 = fcmp olt float %1163, 0.000000e+00 %1177 = fmul float %34, %1164 %1178 = fsub float 1.000000e+00, %1177 %1179 = call float @llvm.AMDIL.clamp.(float %1178, float 0.000000e+00, float 1.000000e+00) %1180 = fcmp olt float %1164, 0.000000e+00 %.op203 = fmul float %1167, 6.250000e-02 %1181 = select i1 %1168, float 0.000000e+00, float %.op203 %.op204 = fmul float %1171, 6.250000e-02 %1182 = select i1 %1172, float 0.000000e+00, float %.op204 %1183 = fadd float %1181, %1182 %.op205 = fmul float %1175, 6.250000e-02 %1184 = select i1 %1176, float 0.000000e+00, float %.op205 %1185 = fadd float %1183, %1184 %.op206 = fmul float %1179, 6.250000e-02 %1186 = select i1 %1180, float 0.000000e+00, float %.op206 %1187 = fadd float %1185, %1186 %1188 = fadd float %864, %1187 %1189 = fmul float %134, 0x3FDF56FFC0000000 %1190 = fmul float %135, 0xBFDDB8C760000000 %1191 = fadd float %1190, %1189 %1192 = fmul float %136, 0xBFC31A4BE0000000 %1193 = fadd float %1191, %1192 %1194 = fmul float %1193, %134 %1195 = fmul float %1193, %135 %1196 = fmul float %1193, %136 %1197 = fmul float %1194, 2.000000e+00 %1198 = fmul float %1195, 2.000000e+00 %1199 = fmul float %1196, 2.000000e+00 %1200 = fsub float 0x3FDF56FFC0000000, %1197 %1201 = fsub float 0xBFDDB8C760000000, %1198 %1202 = fsub float 0xBFC31A4BE0000000, %1199 %1203 = fmul float %97, %1200 %1204 = fmul float %98, %1201 %1205 = fadd float %1204, %1203 %1206 = fmul float %99, %1202 %1207 = fadd float %1205, %1206 %1208 = fmul float %1207, %97 %1209 = fmul float %1207, %98 %1210 = fmul float %1207, %99 %1211 = fmul float %1208, 2.000000e+00 %1212 = fmul float %1209, 2.000000e+00 %1213 = fmul float %1210, 2.000000e+00 %1214 = fsub float %1200, %1211 %1215 = fsub float %1201, %1212 %1216 = fsub float %1202, %1213 %1217 = fmul float %1200, %97 %1218 = fmul float %1201, %98 %1219 = fadd float %1218, %1217 %1220 = fmul float %1202, %99 %1221 = fadd float %1219, %1220 %1222 = fcmp olt float %1221, 0.000000e+00 %.188 = select i1 %1222, float %1214, float %1200 %.189 = select i1 %1222, float %1215, float %1201 %.190 = select i1 %1222, float %1216, float %1202 %1223 = fmul float %153, %.188 %1224 = fmul float %154, %.189 %1225 = fadd float %1223, %1224 %1226 = fmul float %155, %.190 %1227 = fadd float %1225, %1226 %1228 = fadd float %1227, %166 %1229 = fmul float %.188, %185 %1230 = fmul float %.189, %186 %1231 = fadd float %1229, %1230 %1232 = fmul float %.190, %187 %1233 = fadd float %1231, %1232 %1234 = fadd float %1233, %196 %1235 = fmul float %.188, %197 %1236 = fmul float %.189, %198 %1237 = fadd float %1235, %1236 %1238 = fmul float %.190, %199 %1239 = fadd float %1237, %1238 %1240 = fadd float %1239, %208 %1241 = fmul float %.188, %209 %1242 = fmul float %.189, %210 %1243 = fadd float %1241, %1242 %1244 = fmul float %.190, %211 %1245 = fadd float %1243, %1244 %1246 = fadd float %1245, %217 %1247 = fdiv float 1.000000e+00, %1246 %1248 = fmul float %1234, %1247 %1249 = fmul float %1240, %1247 %1250 = bitcast float %1248 to i32 %1251 = bitcast float %1249 to i32 %1252 = insertelement <2 x i32> undef, i32 %1250, i32 0 %1253 = insertelement <2 x i32> %1252, i32 %1251, i32 1 %1254 = bitcast <8 x i32> %57 to <32 x i8> %1255 = bitcast <4 x i32> %59 to <16 x i8> %1256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1253, <32 x i8> %1254, <16 x i8> %1255, i32 2) %1257 = extractelement <4 x float> %1256, i32 0 %1258 = extractelement <4 x float> %1256, i32 1 %1259 = fmul float %1257, 0x3FEFE02000000000 %1260 = fmul float %1258, 0x3F6FE01F80000000 %1261 = fadd float %1259, %1260 %1262 = fmul float %134, 0xBFC0C98E60000000 %1263 = fmul float %135, 0x3FC1CF3DC0000000 %1264 = fadd float %1263, %1262 %1265 = fmul float %136, 0xBFE66D9BE0000000 %1266 = fadd float %1264, %1265 %1267 = fmul float %1266, %134 %1268 = fmul float %1266, %135 %1269 = fmul float %1266, %136 %1270 = fmul float %1267, 2.000000e+00 %1271 = fmul float %1268, 2.000000e+00 %1272 = fmul float %1269, 2.000000e+00 %1273 = fsub float 0xBFC0C98E60000000, %1270 %1274 = fsub float 0x3FC1CF3DC0000000, %1271 %1275 = fsub float 0xBFE66D9BE0000000, %1272 %1276 = fmul float %97, %1273 %1277 = fmul float %98, %1274 %1278 = fadd float %1277, %1276 %1279 = fmul float %99, %1275 %1280 = fadd float %1278, %1279 %1281 = fmul float %1280, %97 %1282 = fmul float %1280, %98 %1283 = fmul float %1280, %99 %1284 = fmul float %1281, 2.000000e+00 %1285 = fmul float %1282, 2.000000e+00 %1286 = fmul float %1283, 2.000000e+00 %1287 = fsub float %1273, %1284 %1288 = fsub float %1274, %1285 %1289 = fsub float %1275, %1286 %1290 = fmul float %1273, %97 %1291 = fmul float %1274, %98 %1292 = fadd float %1291, %1290 %1293 = fmul float %1275, %99 %1294 = fadd float %1292, %1293 %1295 = fcmp olt float %1294, 0.000000e+00 %temp48.5 = select i1 %1295, float %1287, float %1273 %temp49.2 = select i1 %1295, float %1288, float %1274 %temp50.2 = select i1 %1295, float %1289, float %1275 %1296 = fmul float %153, %temp48.5 %1297 = fmul float %154, %temp49.2 %1298 = fadd float %1296, %1297 %1299 = fmul float %155, %temp50.2 %1300 = fadd float %1298, %1299 %1301 = fadd float %1300, %166 %1302 = fmul float %temp48.5, %185 %1303 = fmul float %temp49.2, %186 %1304 = fadd float %1302, %1303 %1305 = fmul float %temp50.2, %187 %1306 = fadd float %1304, %1305 %1307 = fadd float %1306, %196 %1308 = fmul float %temp48.5, %197 %1309 = fmul float %temp49.2, %198 %1310 = fadd float %1308, %1309 %1311 = fmul float %temp50.2, %199 %1312 = fadd float %1310, %1311 %1313 = fadd float %1312, %208 %1314 = fmul float %temp48.5, %209 %1315 = fmul float %temp49.2, %210 %1316 = fadd float %1314, %1315 %1317 = fmul float %temp50.2, %211 %1318 = fadd float %1316, %1317 %1319 = fadd float %1318, %217 %1320 = fdiv float 1.000000e+00, %1319 %1321 = fmul float %1307, %1320 %1322 = fmul float %1313, %1320 %1323 = bitcast float %1321 to i32 %1324 = bitcast float %1322 to i32 %1325 = insertelement <2 x i32> undef, i32 %1323, i32 0 %1326 = insertelement <2 x i32> %1325, i32 %1324, i32 1 %1327 = bitcast <8 x i32> %57 to <32 x i8> %1328 = bitcast <4 x i32> %59 to <16 x i8> %1329 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1326, <32 x i8> %1327, <16 x i8> %1328, i32 2) %1330 = extractelement <4 x float> %1329, i32 0 %1331 = extractelement <4 x float> %1329, i32 1 %1332 = fmul float %1330, 0x3FEFE02000000000 %1333 = fmul float %1331, 0x3F6FE01F80000000 %1334 = fadd float %1332, %1333 %1335 = fmul float %134, 0xBFD7307280000000 %1336 = fmul float %135, 0xBFB9BA7FC0000000 %1337 = fadd float %1336, %1335 %1338 = fmul float %136, 0xBFD3996320000000 %1339 = fadd float %1337, %1338 %1340 = fmul float %1339, %134 %1341 = fmul float %1339, %135 %1342 = fmul float %1339, %136 %1343 = fmul float %1340, 2.000000e+00 %1344 = fmul float %1341, 2.000000e+00 %1345 = fmul float %1342, 2.000000e+00 %1346 = fsub float 0xBFD7307280000000, %1343 %1347 = fsub float 0xBFB9BA7FC0000000, %1344 %1348 = fsub float 0xBFD3996320000000, %1345 %1349 = fmul float %97, %1346 %1350 = fmul float %98, %1347 %1351 = fadd float %1350, %1349 %1352 = fmul float %99, %1348 %1353 = fadd float %1351, %1352 %1354 = fmul float %1353, %97 %1355 = fmul float %1353, %98 %1356 = fmul float %1353, %99 %1357 = fmul float %1354, 2.000000e+00 %1358 = fmul float %1355, 2.000000e+00 %1359 = fmul float %1356, 2.000000e+00 %1360 = fsub float %1346, %1357 %1361 = fsub float %1347, %1358 %1362 = fsub float %1348, %1359 %1363 = fmul float %1346, %97 %1364 = fmul float %1347, %98 %1365 = fadd float %1364, %1363 %1366 = fmul float %1348, %99 %1367 = fadd float %1365, %1366 %1368 = fcmp olt float %1367, 0.000000e+00 %.191 = select i1 %1368, float %1360, float %1346 %.192 = select i1 %1368, float %1361, float %1347 %.193 = select i1 %1368, float %1362, float %1348 %1369 = fmul float %153, %.191 %1370 = fmul float %154, %.192 %1371 = fadd float %1369, %1370 %1372 = fmul float %155, %.193 %1373 = fadd float %1371, %1372 %1374 = fadd float %1373, %166 %1375 = fmul float %.191, %185 %1376 = fmul float %.192, %186 %1377 = fadd float %1375, %1376 %1378 = fmul float %.193, %187 %1379 = fadd float %1377, %1378 %1380 = fadd float %1379, %196 %1381 = fmul float %.191, %197 %1382 = fmul float %.192, %198 %1383 = fadd float %1381, %1382 %1384 = fmul float %.193, %199 %1385 = fadd float %1383, %1384 %1386 = fadd float %1385, %208 %1387 = fmul float %.191, %209 %1388 = fmul float %.192, %210 %1389 = fadd float %1387, %1388 %1390 = fmul float %.193, %211 %1391 = fadd float %1389, %1390 %1392 = fadd float %1391, %217 %1393 = fdiv float 1.000000e+00, %1392 %1394 = fmul float %1380, %1393 %1395 = fmul float %1386, %1393 %1396 = bitcast float %1394 to i32 %1397 = bitcast float %1395 to i32 %1398 = insertelement <2 x i32> undef, i32 %1396, i32 0 %1399 = insertelement <2 x i32> %1398, i32 %1397, i32 1 %1400 = bitcast <8 x i32> %57 to <32 x i8> %1401 = bitcast <4 x i32> %59 to <16 x i8> %1402 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1399, <32 x i8> %1400, <16 x i8> %1401, i32 2) %1403 = extractelement <4 x float> %1402, i32 0 %1404 = extractelement <4 x float> %1402, i32 1 %1405 = fmul float %1403, 0x3FEFE02000000000 %1406 = fmul float %1404, 0x3F6FE01F80000000 %1407 = fadd float %1405, %1406 %1408 = fmul float %134, 0x3FD2C69B60000000 %1409 = fmul float %135, 0xBFC5D74D60000000 %1410 = fadd float %1409, %1408 %1411 = fmul float %136, 0xBFE52D59E0000000 %1412 = fadd float %1410, %1411 %1413 = fmul float %1412, %134 %1414 = fmul float %1412, %135 %1415 = fmul float %1412, %136 %1416 = fmul float %1413, 2.000000e+00 %1417 = fmul float %1414, 2.000000e+00 %1418 = fmul float %1415, 2.000000e+00 %1419 = fsub float 0x3FD2C69B60000000, %1416 %1420 = fsub float 0xBFC5D74D60000000, %1417 %1421 = fsub float 0xBFE52D59E0000000, %1418 %1422 = fmul float %97, %1419 %1423 = fmul float %98, %1420 %1424 = fadd float %1423, %1422 %1425 = fmul float %99, %1421 %1426 = fadd float %1424, %1425 %1427 = fmul float %1426, %97 %1428 = fmul float %1426, %98 %1429 = fmul float %1426, %99 %1430 = fmul float %1427, 2.000000e+00 %1431 = fmul float %1428, 2.000000e+00 %1432 = fmul float %1429, 2.000000e+00 %1433 = fsub float %1419, %1430 %1434 = fsub float %1420, %1431 %1435 = fsub float %1421, %1432 %1436 = fmul float %1419, %97 %1437 = fmul float %1420, %98 %1438 = fadd float %1437, %1436 %1439 = fmul float %1421, %99 %1440 = fadd float %1438, %1439 %1441 = fcmp olt float %1440, 0.000000e+00 %temp60.1 = select i1 %1441, float %1433, float %1419 %temp61.1 = select i1 %1441, float %1434, float %1420 %temp62.1 = select i1 %1441, float %1435, float %1421 %1442 = fmul float %153, %temp60.1 %1443 = fmul float %154, %temp61.1 %1444 = fadd float %1442, %1443 %1445 = fmul float %155, %temp62.1 %1446 = fadd float %1444, %1445 %1447 = fadd float %1446, %166 %1448 = fmul float %temp60.1, %185 %1449 = fmul float %temp61.1, %186 %1450 = fadd float %1448, %1449 %1451 = fmul float %temp62.1, %187 %1452 = fadd float %1450, %1451 %1453 = fadd float %1452, %196 %1454 = fmul float %temp60.1, %197 %1455 = fmul float %temp61.1, %198 %1456 = fadd float %1454, %1455 %1457 = fmul float %temp62.1, %199 %1458 = fadd float %1456, %1457 %1459 = fadd float %1458, %208 %1460 = fmul float %temp60.1, %209 %1461 = fmul float %temp61.1, %210 %1462 = fadd float %1460, %1461 %1463 = fmul float %temp62.1, %211 %1464 = fadd float %1462, %1463 %1465 = fadd float %1464, %217 %1466 = fdiv float 1.000000e+00, %1465 %1467 = fmul float %1453, %1466 %1468 = fmul float %1459, %1466 %1469 = bitcast float %1467 to i32 %1470 = bitcast float %1468 to i32 %1471 = insertelement <2 x i32> undef, i32 %1469, i32 0 %1472 = insertelement <2 x i32> %1471, i32 %1470, i32 1 %1473 = bitcast <8 x i32> %57 to <32 x i8> %1474 = bitcast <4 x i32> %59 to <16 x i8> %1475 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1472, <32 x i8> %1473, <16 x i8> %1474, i32 2) %1476 = extractelement <4 x float> %1475, i32 0 %1477 = extractelement <4 x float> %1475, i32 1 %1478 = fmul float %1476, 0x3FEFE02000000000 %1479 = fmul float %1477, 0x3F6FE01F80000000 %1480 = fadd float %1478, %1479 %1481 = fsub float %1228, %1261 %1482 = fsub float %1301, %1334 %1483 = fsub float %1374, %1407 %1484 = fsub float %1447, %1480 %1485 = fadd float %1481, 0xBF59000120000000 %1486 = fadd float %1482, 0xBF59000120000000 %1487 = fadd float %1483, 0xBF59000120000000 %1488 = fadd float %1484, 0xBF59000120000000 %1489 = fmul float %34, %1485 %1490 = fsub float 1.000000e+00, %1489 %1491 = call float @llvm.AMDIL.clamp.(float %1490, float 0.000000e+00, float 1.000000e+00) %1492 = fcmp olt float %1485, 0.000000e+00 %1493 = fmul float %34, %1486 %1494 = fsub float 1.000000e+00, %1493 %1495 = call float @llvm.AMDIL.clamp.(float %1494, float 0.000000e+00, float 1.000000e+00) %1496 = fcmp olt float %1486, 0.000000e+00 %1497 = fmul float %34, %1487 %1498 = fsub float 1.000000e+00, %1497 %1499 = call float @llvm.AMDIL.clamp.(float %1498, float 0.000000e+00, float 1.000000e+00) %1500 = fcmp olt float %1487, 0.000000e+00 %1501 = fmul float %34, %1488 %1502 = fsub float 1.000000e+00, %1501 %1503 = call float @llvm.AMDIL.clamp.(float %1502, float 0.000000e+00, float 1.000000e+00) %1504 = fcmp olt float %1488, 0.000000e+00 %1505 = fdiv float 1.000000e+00, %55 %1506 = fmul float %106, %1505 %1507 = fmul float %1506, 6.553500e+04 %1508 = call float @floor(float %1507) %1509 = fmul float %1508, 3.906250e-03 %1510 = call float @floor(float %1509) %1511 = fmul float %97, 7.000000e+00 %1512 = fadd float %1511, 8.000000e+00 %1513 = fmul float %98, 7.000000e+00 %1514 = fadd float %1513, 8.000000e+00 %1515 = call float @floor(float %1512) %1516 = call float @floor(float %1514) %.op207 = fmul float %1491, 6.250000e-02 %1517 = select i1 %1492, float 0.000000e+00, float %.op207 %.op208 = fmul float %1495, 6.250000e-02 %1518 = select i1 %1496, float 0.000000e+00, float %.op208 %1519 = fadd float %1517, %1518 %.op209 = fmul float %1499, 6.250000e-02 %1520 = select i1 %1500, float 0.000000e+00, float %.op209 %1521 = fadd float %1519, %1520 %.op210 = fmul float %1503, 6.250000e-02 %1522 = select i1 %1504, float 0.000000e+00, float %.op210 %1523 = fadd float %1521, %1522 %1524 = fadd float %1188, %1523 %1525 = fmul float %1510, 0x3F70101020000000 %1526 = fmul float %1510, 2.560000e+02 %1527 = fsub float %1508, %1526 %1528 = fmul float %1527, 0x3F70101020000000 %1529 = fmul float %1515, 0x3FB0101020000000 %1530 = fmul float %1516, 0x3F70101060000000 %1531 = fadd float %1530, %1529 %1532 = call i32 @llvm.SI.packf16(float %1524, float %1525) %1533 = bitcast i32 %1532 to float %1534 = call i32 @llvm.SI.packf16(float %1528, float %1531) %1535 = bitcast i32 %1534 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1533, float %1535, float %1533, float %1535) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000 v_mul_f32_e32 v5, v4, v2 ; 100A0504 v_mov_b32_e32 v11, 0x3f1d7d6b ; 7E1602FF 3F1D7D6B v_mov_b32_e32 v12, 0x3dcb2181 ; 7E1802FF 3DCB2181 v_mov_b32_e32 v13, 0x3eefe8ee ; 7E1A02FF 3EEFE8EE v_mov_b32_e32 v21, 0xbe7c957d ; 7E2A02FF BE7C957D v_mov_b32_e32 v22, 0x3efdb8dc ; 7E2C02FF 3EFDB8DC v_mov_b32_e32 v23, 0x3f22fc48 ; 7E2E02FF 3F22FC48 v_mov_b32_e32 v24, 0xbe9afb5d ; 7E3002FF BE9AFB5D v_mov_b32_e32 v25, 0x3e4a17b1 ; 7E3202FF 3E4A17B1 v_mov_b32_e32 v26, 0x3f6dd26b ; 7E3402FF 3F6DD26B v_mov_b32_e32 v27, 0x3ea8d66f ; 7E3602FF 3EA8D66F v_mov_b32_e32 v28, 0xbe79bc34 ; 7E3802FF BE79BC34 v_mov_b32_e32 v29, 0x3ee8383b ; 7E3A02FF 3EE8383B s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_mov_b32_e32 v8, 0 ; 7E100280 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[8:11], 0x10 ; C20C0910 s_buffer_load_dword s25, s[8:11], 0x11 ; C20C8911 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 s_load_dwordx8 s[44:51], s[6:7], 0x18 ; C0D60718 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v14, v0, 2, 1, [m0] ; C8380600 v_interp_p2_f32 v14, [v14], v1, 2, 1, [m0] ; C8390601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[28:35], s[20:23] ; F0900700 00A70F06 image_sample_l v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[36:43], s[16:19] ; F0900100 00890006 v_mov_b32_e32 v1, s25 ; 7E020219 v_mac_f32_e32 v1, s24, v3 ; 3E020618 v_mul_f32_e32 v6, v4, v1 ; 100C0304 image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[44:51], s[8:11] ; F0800F00 004B0105 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v1, -1.0 ; D2820005 03CE02F4 v_mad_f32 v6, 2.0, v2, -1.0 ; D2820006 03CE04F4 v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4 v_mad_f32 v1, 2.0, v4, -1.0 ; D2820001 03CE08F4 v_mul_f32_e32 v2, v6, v6 ; 10040D06 v_mac_f32_e32 v2, v5, v5 ; 3E040B05 v_mac_f32_e32 v2, v3, v3 ; 3E040703 v_mac_f32_e32 v2, v1, v1 ; 3E040301 v_rsq_clamp_f32_e32 v7, v2 ; 7E0E5902 v_add_f32_e32 v2, -0.5, v15 ; 06041EF1 v_add_f32_e32 v1, -0.5, v16 ; 060220F1 v_add_f32_e32 v8, -0.5, v17 ; 061022F1 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mul_f32_e32 v4, v7, v6 ; 10080D07 v_mul_f32_e32 v3, v7, v3 ; 10060707 s_buffer_load_dword s16, s[12:15], 0x8 ; C2080D08 s_buffer_load_dword s17, s[12:15], 0xa ; C2088D0A s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s18, s[0:3], 0x3 ; C2090103 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_buffer_load_dword s19, s[12:15], 0x9 ; C2098D09 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s21, s[0:3], 0x7 ; C20A8107 s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108 s_buffer_load_dword s22, s[0:3], 0xb ; C20B010B s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B s_buffer_load_dword s20, s[0:3], 0x5 ; C20A0105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s16 ; 7E0C0210 v_mul_f32_e32 v15, s8, v6 ; 101E0C08 v_mov_b32_e32 v6, s17 ; 7E0C0211 v_mac_f32_e32 v15, s18, v6 ; 3E1E0C12 v_mov_b32_e32 v6, s16 ; 7E0C0210 v_mul_f32_e32 v16, s9, v6 ; 10200C09 s_buffer_load_dword s24, s[0:3], 0x9 ; C20C0109 s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C v_mov_b32_e32 v6, s17 ; 7E0C0211 v_mac_f32_e32 v16, s21, v6 ; 3E200C15 v_mov_b32_e32 v6, s16 ; 7E0C0210 v_mul_f32_e32 v17, s11, v6 ; 10220C0B v_mov_b32_e32 v6, s17 ; 7E0C0211 v_mac_f32_e32 v17, s22, v6 ; 3E220C16 v_mov_b32_e32 v6, s19 ; 7E0C0213 v_mul_f32_e32 v18, s10, v6 ; 10240C0A s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mac_f32_e32 v18, s18, v6 ; 3E240C12 s_buffer_load_dword s10, s[0:3], 0x4c ; C205014C s_buffer_load_dword s11, s[0:3], 0x4d ; C205814D s_buffer_load_dword s9, s[0:3], 0x4e ; C204814E v_mov_b32_e32 v6, s19 ; 7E0C0213 v_mul_f32_e32 v30, s20, v6 ; 103C0C14 v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mac_f32_e32 v30, s21, v6 ; 3E3C0C15 s_buffer_load_dword s26, s[12:15], 0x4 ; C20D0D04 v_mov_b32_e32 v6, s19 ; 7E0C0213 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v31, s24, v6 ; 103E0C18 v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mac_f32_e32 v31, s22, v6 ; 3E3E0C16 s_buffer_load_dword s20, s[12:15], 0x5 ; C20A0D05 v_add_f32_e32 v6, v8, v8 ; 060C1108 v_mad_f32 v32, v9, v0, s10 ; D2820020 002A0109 v_mad_f32 v33, v10, v0, s11 ; D2820021 002E010A v_mad_f32 v34, v14, v0, s9 ; D2820022 0026010E s_buffer_load_dword s12, s[0:3], 0xf ; C206010F v_add_f32_e32 v10, v2, v2 ; 06140502 v_mac_f32_e32 v32, s26, v10 ; 3E40141A v_add_f32_e32 v9, v1, v1 ; 06120301 v_mac_f32_e32 v33, s26, v9 ; 3E42121A s_buffer_load_dword s13, s[0:3], 0xd ; C206810D v_mac_f32_e32 v34, s26, v6 ; 3E440C1A v_mul_f32_e32 v7, v32, v15 ; 100E1F20 v_mac_f32_e32 v7, v33, v16 ; 3E0E2121 v_mac_f32_e32 v7, v34, v17 ; 3E0E2322 v_mov_b32_e32 v8, s16 ; 7E100210 v_mac_f32_e32 v7, s25, v8 ; 3E0E1019 v_mov_b32_e32 v8, s17 ; 7E100211 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v7, s12, v8 ; 3E0E100C v_mul_f32_e32 v8, v32, v18 ; 10102520 v_mac_f32_e32 v8, v33, v30 ; 3E103D21 v_mac_f32_e32 v8, v34, v31 ; 3E103F22 v_mov_b32_e32 v14, s19 ; 7E1C0213 v_mac_f32_e32 v8, s13, v14 ; 3E101C0D v_mov_b32_e32 v14, s23 ; 7E1C0217 v_mac_f32_e32 v8, s12, v14 ; 3E101C0C v_mul_f32_e32 v14, v11, v5 ; 101C0B0B v_mac_f32_e32 v14, v12, v4 ; 3E1C090C v_mac_f32_e32 v14, v13, v3 ; 3E1C070D v_mul_f32_e32 v19, v5, v14 ; 10261D05 v_mac_f32_e32 v11, -2.0, v19 ; 3E1626F5 v_mul_f32_e32 v19, v4, v14 ; 10261D04 v_mac_f32_e32 v12, -2.0, v19 ; 3E1826F5 v_mul_f32_e32 v14, v3, v14 ; 101C1D03 v_mac_f32_e32 v13, -2.0, v14 ; 3E1A1CF5 v_mul_f32_e32 v14, v11, v10 ; 101C150B v_mac_f32_e32 v14, v12, v9 ; 3E1C130C v_mac_f32_e32 v14, v13, v6 ; 3E1C0D0D v_mul_f32_e32 v19, v10, v14 ; 10261D0A v_mad_f32 v19, -2.0, v19, v11 ; D2820013 042E26F5 v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_cndmask_b32_e32 v35, v11, v19 ; 0046270B v_mul_f32_e32 v11, v9, v14 ; 10161D09 v_mad_f32 v11, -2.0, v11, v12 ; D282000B 043216F5 v_cndmask_b32_e32 v36, v12, v11 ; 0048170C v_mul_f32_e32 v11, v6, v14 ; 10161D06 v_mad_f32 v11, -2.0, v11, v13 ; D282000B 043616F5 v_cndmask_b32_e32 v37, v13, v11 ; 004A170D v_mul_f32_e32 v11, s18, v32 ; 10164012 v_mac_f32_e32 v11, s21, v33 ; 3E164215 v_mac_f32_e32 v11, s22, v34 ; 3E164416 v_add_f32_e32 v11, s12, v11 ; 0616160C v_mul_f32_e32 v19, s20, v15 ; 10261E14 v_mul_f32_e32 v20, s20, v16 ; 10282014 v_mul_f32_e32 v13, s20, v17 ; 101A2214 v_mul_f32_e32 v17, s20, v18 ; 10222414 v_mul_f32_e32 v18, s20, v30 ; 10243C14 v_mul_f32_e32 v12, s20, v31 ; 10183E14 v_mov_b32_e32 v14, s18 ; 7E1C0212 v_mul_f32_e32 v15, s20, v14 ; 101E1C14 v_mov_b32_e32 v14, s21 ; 7E1C0215 v_mul_f32_e32 v16, s20, v14 ; 10201C14 v_mov_b32_e32 v14, s22 ; 7E1C0216 v_mul_f32_e32 v14, s20, v14 ; 101C1C14 v_mul_f32_e32 v30, v20, v36 ; 103C4914 v_mac_f32_e32 v30, v19, v35 ; 3E3C4713 v_mac_f32_e32 v30, v13, v37 ; 3E3C4B0D v_add_f32_e32 v30, v7, v30 ; 063C3D07 v_mul_f32_e32 v31, v16, v36 ; 103E4910 v_mac_f32_e32 v31, v15, v35 ; 3E3E470F v_mac_f32_e32 v31, v14, v37 ; 3E3E4B0E v_add_f32_e32 v31, v11, v31 ; 063E3F0B v_rcp_f32_e32 v31, v31 ; 7E3E551F v_mul_f32_e32 v38, v18, v36 ; 104C4912 v_mac_f32_e32 v38, v17, v35 ; 3E4C4711 v_mac_f32_e32 v38, v12, v37 ; 3E4C4B0C v_add_f32_e32 v38, v8, v38 ; 064C4D08 v_mul_f32_e32 v39, v31, v30 ; 104E3D1F v_mul_f32_e32 v40, v31, v38 ; 10504D1F v_mul_f32_e32 v30, v21, v5 ; 103C0B15 v_mac_f32_e32 v30, v22, v4 ; 3E3C0916 v_mac_f32_e32 v30, v23, v3 ; 3E3C0717 v_mul_f32_e32 v31, v5, v30 ; 103E3D05 v_mac_f32_e32 v21, -2.0, v31 ; 3E2A3EF5 v_mul_f32_e32 v31, v4, v30 ; 103E3D04 v_mac_f32_e32 v22, -2.0, v31 ; 3E2C3EF5 v_mul_f32_e32 v30, v3, v30 ; 103C3D03 v_mac_f32_e32 v23, -2.0, v30 ; 3E2E3CF5 v_mul_f32_e32 v30, v21, v10 ; 103C1515 v_mac_f32_e32 v30, v22, v9 ; 3E3C1316 v_mac_f32_e32 v30, v23, v6 ; 3E3C0D17 v_mul_f32_e32 v31, v10, v30 ; 103E3D0A v_mad_f32 v31, -2.0, v31, v21 ; D282001F 04563EF5 v_cmp_gt_f32_e32 vcc, 0, v30 ; 7C083C80 v_cndmask_b32_e32 v38, v21, v31 ; 004C3F15 v_mul_f32_e32 v21, v9, v30 ; 102A3D09 v_mad_f32 v21, -2.0, v21, v22 ; D2820015 045A2AF5 v_cndmask_b32_e32 v41, v22, v21 ; 00522B16 v_mul_f32_e32 v21, v6, v30 ; 102A3D06 v_mad_f32 v21, -2.0, v21, v23 ; D2820015 045E2AF5 v_cndmask_b32_e32 v42, v23, v21 ; 00542B17 v_mul_f32_e32 v21, v20, v41 ; 102A5314 v_mac_f32_e32 v21, v19, v38 ; 3E2A4D13 v_mac_f32_e32 v21, v13, v42 ; 3E2A550D v_add_f32_e32 v21, v7, v21 ; 062A2B07 v_mul_f32_e32 v22, v16, v41 ; 102C5310 v_mac_f32_e32 v22, v15, v38 ; 3E2C4D0F v_mac_f32_e32 v22, v14, v42 ; 3E2C550E v_add_f32_e32 v22, v11, v22 ; 062C2D0B v_rcp_f32_e32 v22, v22 ; 7E2C5516 v_mul_f32_e32 v23, v18, v41 ; 102E5312 v_mac_f32_e32 v23, v17, v38 ; 3E2E4D11 v_mac_f32_e32 v23, v12, v42 ; 3E2E550C v_add_f32_e32 v23, v8, v23 ; 062E2F08 v_mul_f32_e32 v43, v22, v21 ; 10562B16 v_mul_f32_e32 v44, v22, v23 ; 10582F16 v_mul_f32_e32 v21, v24, v5 ; 102A0B18 v_mac_f32_e32 v21, v25, v4 ; 3E2A0919 v_mac_f32_e32 v21, v26, v3 ; 3E2A071A v_mul_f32_e32 v22, v5, v21 ; 102C2B05 v_mac_f32_e32 v24, -2.0, v22 ; 3E302CF5 v_mul_f32_e32 v22, v4, v21 ; 102C2B04 v_mac_f32_e32 v25, -2.0, v22 ; 3E322CF5 v_mul_f32_e32 v21, v3, v21 ; 102A2B03 v_mac_f32_e32 v26, -2.0, v21 ; 3E342AF5 v_mul_f32_e32 v21, v24, v10 ; 102A1518 v_mac_f32_e32 v21, v25, v9 ; 3E2A1319 v_mac_f32_e32 v21, v26, v6 ; 3E2A0D1A v_mul_f32_e32 v22, v10, v21 ; 102C2B0A v_mad_f32 v22, -2.0, v22, v24 ; D2820016 04622CF5 v_cmp_gt_f32_e32 vcc, 0, v21 ; 7C082A80 v_cndmask_b32_e32 v45, v24, v22 ; 005A2D18 v_mul_f32_e32 v22, v9, v21 ; 102C2B09 v_mad_f32 v22, -2.0, v22, v25 ; D2820016 04662CF5 v_cndmask_b32_e32 v46, v25, v22 ; 005C2D19 v_mul_f32_e32 v21, v6, v21 ; 102A2B06 v_mad_f32 v21, -2.0, v21, v26 ; D2820015 046A2AF5 v_cndmask_b32_e32 v47, v26, v21 ; 005E2B1A v_mul_f32_e32 v21, v20, v46 ; 102A5D14 v_mac_f32_e32 v21, v19, v45 ; 3E2A5B13 v_mac_f32_e32 v21, v13, v47 ; 3E2A5F0D v_add_f32_e32 v21, v7, v21 ; 062A2B07 v_mul_f32_e32 v22, v16, v46 ; 102C5D10 v_mac_f32_e32 v22, v15, v45 ; 3E2C5B0F v_mac_f32_e32 v22, v14, v47 ; 3E2C5F0E v_add_f32_e32 v22, v11, v22 ; 062C2D0B v_rcp_f32_e32 v22, v22 ; 7E2C5516 v_mul_f32_e32 v23, v18, v46 ; 102E5D12 v_mac_f32_e32 v23, v17, v45 ; 3E2E5B11 v_mac_f32_e32 v23, v12, v47 ; 3E2E5F0C v_add_f32_e32 v23, v8, v23 ; 062E2F08 v_mul_f32_e32 v48, v22, v21 ; 10602B16 v_mul_f32_e32 v49, v22, v23 ; 10622F16 v_mul_f32_e32 v21, v27, v5 ; 102A0B1B v_mac_f32_e32 v21, v28, v4 ; 3E2A091C v_mac_f32_e32 v21, v29, v3 ; 3E2A071D v_mul_f32_e32 v22, v5, v21 ; 102C2B05 v_mac_f32_e32 v27, -2.0, v22 ; 3E362CF5 v_mul_f32_e32 v22, v4, v21 ; 102C2B04 v_mac_f32_e32 v28, -2.0, v22 ; 3E382CF5 v_mul_f32_e32 v21, v3, v21 ; 102A2B03 v_mac_f32_e32 v29, -2.0, v21 ; 3E3A2AF5 v_mul_f32_e32 v21, v27, v10 ; 102A151B v_mac_f32_e32 v21, v28, v9 ; 3E2A131C v_mac_f32_e32 v21, v29, v6 ; 3E2A0D1D v_mul_f32_e32 v22, v10, v21 ; 102C2B0A v_mad_f32 v22, -2.0, v22, v27 ; D2820016 046E2CF5 v_cmp_gt_f32_e32 vcc, 0, v21 ; 7C082A80 v_cndmask_b32_e32 v50, v27, v22 ; 00642D1B v_mul_f32_e32 v22, v9, v21 ; 102C2B09 v_mad_f32 v22, -2.0, v22, v28 ; D2820016 04722CF5 v_cndmask_b32_e32 v28, v28, v22 ; 00382D1C v_mul_f32_e32 v21, v6, v21 ; 102A2B06 v_mad_f32 v21, -2.0, v21, v29 ; D2820015 04762AF5 v_cndmask_b32_e32 v51, v29, v21 ; 00662B1D s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s13, s[0:3], 0x51 ; C2068151 s_buffer_load_dword s14, s[0:3], 0x52 ; C2070152 s_buffer_load_dword s0, s[0:3], 0x5c ; C200015C v_mul_f32_e32 v21, v20, v28 ; 102A3914 v_mac_f32_e32 v21, v19, v50 ; 3E2A6513 v_mac_f32_e32 v21, v13, v51 ; 3E2A670D v_add_f32_e32 v22, v7, v21 ; 062C2B07 v_mul_f32_e32 v23, v18, v28 ; 102E3912 v_mac_f32_e32 v23, v17, v50 ; 3E2E6511 v_mul_f32_e32 v21, v16, v28 ; 102A3910 v_mac_f32_e32 v21, v15, v50 ; 3E2A650F v_mac_f32_e32 v21, v14, v51 ; 3E2A670E v_add_f32_e32 v21, v11, v21 ; 062A2B0B v_rcp_f32_e32 v24, v21 ; 7E305515 v_mac_f32_e32 v23, v12, v51 ; 3E2E670C s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v21, s0 ; 7E2A5400 v_add_f32_e32 v23, v8, v23 ; 062E2F08 v_mul_f32_e32 v52, v24, v22 ; 10682D18 v_mul_f32_e32 v53, v24, v23 ; 106A2F18 v_mul_f32_e32 v23, s12, v21 ; 102E2A0C v_mul_f32_e32 v22, v32, v23 ; 102C2F20 v_mul_f32_e32 v24, s13, v21 ; 10302A0D v_mac_f32_e32 v22, v33, v24 ; 3E2C3121 v_mul_f32_e32 v25, s14, v21 ; 10322A0E v_mac_f32_e32 v22, v34, v25 ; 3E2C3322 v_mul_f32_e32 v26, s10, v23 ; 10342E0A v_mac_f32_e32 v26, s11, v24 ; 3E34300B s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 v_mac_f32_e32 v26, s9, v25 ; 3E343209 v_subrev_f32_e32 v22, v26, v22 ; 0A2C2D1A v_mul_f32_e32 v30, s20, v23 ; 103C2E14 v_mul_f32_e32 v31, s20, v24 ; 103E3014 v_mul_f32_e32 v23, v36, v31 ; 102E3F24 v_mac_f32_e32 v23, v35, v30 ; 3E2E3D23 v_mul_f32_e32 v29, s20, v25 ; 103A3214 v_mac_f32_e32 v23, v37, v29 ; 3E2E3B25 v_add_f32_e32 v23, v22, v23 ; 062E2F16 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[12:19], s[0:3] ; F0800300 00032027 v_mov_b32_e32 v25, 0x3b7f00fc ; 7E3202FF 3B7F00FC s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, -v33, v25, v23 ; D2820017 245E3321 v_mov_b32_e32 v26, 0x3f7f0100 ; 7E3402FF 3F7F0100 v_mad_f32 v23, -v32, v26, v23 ; D2820017 245E3520 v_mul_f32_e32 v24, v41, v31 ; 10303F29 v_mac_f32_e32 v24, v38, v30 ; 3E303D26 v_mac_f32_e32 v24, v42, v29 ; 3E303B2A v_add_f32_e32 v24, v22, v24 ; 06303116 image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[12:19], s[0:3] ; F0800300 0003202B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, -v33, v25, v24 ; D2820018 24623321 v_mad_f32 v24, -v32, v26, v24 ; D2820018 24623520 v_mul_f32_e32 v27, v46, v31 ; 10363F2E v_mac_f32_e32 v27, v45, v30 ; 3E363D2D v_mac_f32_e32 v27, v47, v29 ; 3E363B2F v_add_f32_e32 v27, v22, v27 ; 06363716 image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[48:49], s[12:19], s[0:3] ; F0800300 00032030 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, -v33, v25, v27 ; D282001B 246E3321 v_mad_f32 v27, -v32, v26, v27 ; D282001B 246E3520 v_mul_f32_e32 v28, v28, v31 ; 10383F1C v_mac_f32_e32 v28, v50, v30 ; 3E383D32 v_mac_f32_e32 v28, v51, v29 ; 3E383B33 v_add_f32_e32 v28, v22, v28 ; 06383916 image_sample v[32:33], 3, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[12:19], s[0:3] ; F0800300 00032034 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, -v33, v25, v28 ; D282001C 24723321 v_mad_f32 v28, -v32, v26, v28 ; D282001C 24723520 v_mov_b32_e32 v32, 0x3e6a54c5 ; 7E4002FF 3E6A54C5 v_mul_f32_e32 v33, v32, v5 ; 10420B20 v_mov_b32_e32 v34, 0x3e945027 ; 7E4402FF 3E945027 v_mac_f32_e32 v33, v34, v4 ; 3E420922 v_mov_b32_e32 v35, 0x3e085879 ; 7E4602FF 3E085879 v_mac_f32_e32 v33, v35, v3 ; 3E420723 v_mul_f32_e32 v36, v5, v33 ; 10484305 v_mac_f32_e32 v32, -2.0, v36 ; 3E4048F5 v_mul_f32_e32 v36, v4, v33 ; 10484304 v_mac_f32_e32 v34, -2.0, v36 ; 3E4448F5 v_mul_f32_e32 v33, v3, v33 ; 10424303 v_mac_f32_e32 v35, -2.0, v33 ; 3E4642F5 v_mul_f32_e32 v33, v32, v10 ; 10421520 v_mac_f32_e32 v33, v34, v9 ; 3E421322 v_mac_f32_e32 v33, v35, v6 ; 3E420D23 v_mul_f32_e32 v36, v10, v33 ; 1048430A v_mad_f32 v36, -2.0, v36, v32 ; D2820024 048248F5 v_cmp_gt_f32_e32 vcc, 0, v33 ; 7C084280 v_cndmask_b32_e32 v32, v32, v36 ; 00404920 v_mul_f32_e32 v36, v9, v33 ; 10484309 v_mad_f32 v36, -2.0, v36, v34 ; D2820024 048A48F5 v_cndmask_b32_e32 v34, v34, v36 ; 00444922 v_mul_f32_e32 v33, v6, v33 ; 10424306 v_mad_f32 v33, -2.0, v33, v35 ; D2820021 048E42F5 v_cndmask_b32_e32 v33, v35, v33 ; 00424323 v_mul_f32_e32 v35, v20, v34 ; 10464514 v_mac_f32_e32 v35, v19, v32 ; 3E464113 v_mac_f32_e32 v35, v13, v33 ; 3E46430D v_add_f32_e32 v35, v7, v35 ; 06464707 v_mul_f32_e32 v36, v16, v34 ; 10484510 v_mac_f32_e32 v36, v15, v32 ; 3E48410F v_mac_f32_e32 v36, v14, v33 ; 3E48430E v_add_f32_e32 v36, v11, v36 ; 0648490B v_rcp_f32_e32 v36, v36 ; 7E485524 v_mul_f32_e32 v37, v18, v34 ; 104A4512 v_mac_f32_e32 v37, v17, v32 ; 3E4A4111 v_mac_f32_e32 v37, v12, v33 ; 3E4A430C v_add_f32_e32 v37, v8, v37 ; 064A4B08 v_mul_f32_e32 v38, v36, v35 ; 104C4724 v_mul_f32_e32 v39, v36, v37 ; 104E4B24 v_mov_b32_e32 v35, 0xbe62a6b1 ; 7E4602FF BE62A6B1 v_mul_f32_e32 v36, v35, v5 ; 10480B23 v_mov_b32_e32 v37, 0xbe17f887 ; 7E4A02FF BE17F887 v_mac_f32_e32 v36, v37, v4 ; 3E480925 v_mov_b32_e32 v40, 0x3e06eb0b ; 7E5002FF 3E06EB0B v_mac_f32_e32 v36, v40, v3 ; 3E480728 v_mul_f32_e32 v41, v5, v36 ; 10524905 v_mac_f32_e32 v35, -2.0, v41 ; 3E4652F5 v_mul_f32_e32 v41, v4, v36 ; 10524904 v_mac_f32_e32 v37, -2.0, v41 ; 3E4A52F5 v_mul_f32_e32 v36, v3, v36 ; 10484903 v_mac_f32_e32 v40, -2.0, v36 ; 3E5048F5 v_mul_f32_e32 v36, v35, v10 ; 10481523 v_mac_f32_e32 v36, v37, v9 ; 3E481325 v_mac_f32_e32 v36, v40, v6 ; 3E480D28 v_mul_f32_e32 v41, v10, v36 ; 1052490A v_mad_f32 v41, -2.0, v41, v35 ; D2820029 048E52F5 v_cmp_gt_f32_e32 vcc, 0, v36 ; 7C084880 v_cndmask_b32_e32 v35, v35, v41 ; 00465323 v_mul_f32_e32 v41, v9, v36 ; 10524909 v_mad_f32 v41, -2.0, v41, v37 ; D2820029 049652F5 v_cndmask_b32_e32 v37, v37, v41 ; 004A5325 v_mul_f32_e32 v36, v6, v36 ; 10484906 v_mad_f32 v36, -2.0, v36, v40 ; D2820024 04A248F5 v_cndmask_b32_e32 v36, v40, v36 ; 00484928 v_mul_f32_e32 v40, v20, v37 ; 10504B14 v_mac_f32_e32 v40, v19, v35 ; 3E504713 v_mac_f32_e32 v40, v13, v36 ; 3E50490D v_add_f32_e32 v40, v7, v40 ; 06505107 v_mul_f32_e32 v41, v16, v37 ; 10524B10 v_mac_f32_e32 v41, v15, v35 ; 3E52470F v_mac_f32_e32 v41, v14, v36 ; 3E52490E v_add_f32_e32 v41, v11, v41 ; 0652530B v_rcp_f32_e32 v41, v41 ; 7E525529 v_mul_f32_e32 v42, v18, v37 ; 10544B12 v_mac_f32_e32 v42, v17, v35 ; 3E544711 v_mac_f32_e32 v42, v12, v36 ; 3E54490C v_add_f32_e32 v42, v8, v42 ; 06545508 v_mul_f32_e32 v43, v41, v40 ; 10565129 v_mul_f32_e32 v44, v41, v42 ; 10585529 v_mov_b32_e32 v40, 0xbeea467c ; 7E5002FF BEEA467C v_mul_f32_e32 v41, v40, v5 ; 10520B28 v_mov_b32_e32 v42, 0xbf2a8be3 ; 7E5402FF BF2A8BE3 v_mac_f32_e32 v41, v42, v4 ; 3E52092A v_mov_b32_e32 v45, 0x3e6af89c ; 7E5A02FF 3E6AF89C v_mac_f32_e32 v41, v45, v3 ; 3E52072D v_mul_f32_e32 v46, v5, v41 ; 105C5305 v_mac_f32_e32 v40, -2.0, v46 ; 3E505CF5 v_mul_f32_e32 v46, v4, v41 ; 105C5304 v_mac_f32_e32 v42, -2.0, v46 ; 3E545CF5 v_mul_f32_e32 v41, v3, v41 ; 10525303 v_mac_f32_e32 v45, -2.0, v41 ; 3E5A52F5 v_mul_f32_e32 v41, v40, v10 ; 10521528 v_mac_f32_e32 v41, v42, v9 ; 3E52132A v_mac_f32_e32 v41, v45, v6 ; 3E520D2D v_mul_f32_e32 v46, v10, v41 ; 105C530A v_mad_f32 v46, -2.0, v46, v40 ; D282002E 04A25CF5 v_cmp_gt_f32_e32 vcc, 0, v41 ; 7C085280 v_cndmask_b32_e32 v40, v40, v46 ; 00505D28 v_mul_f32_e32 v46, v9, v41 ; 105C5309 v_mad_f32 v46, -2.0, v46, v42 ; D282002E 04AA5CF5 v_cndmask_b32_e32 v42, v42, v46 ; 00545D2A v_mul_f32_e32 v41, v6, v41 ; 10525306 v_mad_f32 v41, -2.0, v41, v45 ; D2820029 04B652F5 v_cndmask_b32_e32 v41, v45, v41 ; 0052532D v_mul_f32_e32 v45, v20, v42 ; 105A5514 v_mac_f32_e32 v45, v19, v40 ; 3E5A5113 v_mac_f32_e32 v45, v13, v41 ; 3E5A530D v_add_f32_e32 v45, v7, v45 ; 065A5B07 v_mul_f32_e32 v46, v16, v42 ; 105C5510 v_mac_f32_e32 v46, v15, v40 ; 3E5C510F v_mac_f32_e32 v46, v14, v41 ; 3E5C530E v_add_f32_e32 v46, v11, v46 ; 065C5D0B v_rcp_f32_e32 v46, v46 ; 7E5C552E v_mul_f32_e32 v47, v18, v42 ; 105E5512 v_mac_f32_e32 v47, v17, v40 ; 3E5E5111 v_mac_f32_e32 v47, v12, v41 ; 3E5E530C v_add_f32_e32 v47, v8, v47 ; 065E5F08 v_mul_f32_e32 v48, v46, v45 ; 10605B2E v_mul_f32_e32 v49, v46, v47 ; 10625F2E v_mov_b32_e32 v45, 0xbd974f72 ; 7E5A02FF BD974F72 v_mul_f32_e32 v46, v45, v5 ; 105C0B2D v_mov_b32_e32 v47, 0x3d61565c ; 7E5E02FF 3D61565C v_mac_f32_e32 v46, v47, v4 ; 3E5C092F v_mov_b32_e32 v50, 0xbc1ef0f1 ; 7E6402FF BC1EF0F1 v_mac_f32_e32 v46, v50, v3 ; 3E5C0732 v_mul_f32_e32 v51, v5, v46 ; 10665D05 v_mac_f32_e32 v45, -2.0, v51 ; 3E5A66F5 v_mul_f32_e32 v51, v4, v46 ; 10665D04 v_mac_f32_e32 v47, -2.0, v51 ; 3E5E66F5 v_mul_f32_e32 v46, v3, v46 ; 105C5D03 v_mac_f32_e32 v50, -2.0, v46 ; 3E645CF5 v_mul_f32_e32 v46, v45, v10 ; 105C152D v_mac_f32_e32 v46, v47, v9 ; 3E5C132F v_mac_f32_e32 v46, v50, v6 ; 3E5C0D32 v_mul_f32_e32 v51, v10, v46 ; 10665D0A v_mad_f32 v51, -2.0, v51, v45 ; D2820033 04B666F5 v_cmp_gt_f32_e32 vcc, 0, v46 ; 7C085C80 v_cndmask_b32_e32 v45, v45, v51 ; 005A672D v_mul_f32_e32 v51, v9, v46 ; 10665D09 v_mad_f32 v51, -2.0, v51, v47 ; D2820033 04BE66F5 v_cndmask_b32_e32 v47, v47, v51 ; 005E672F v_mul_f32_e32 v46, v6, v46 ; 105C5D06 v_mad_f32 v46, -2.0, v46, v50 ; D282002E 04CA5CF5 v_cndmask_b32_e32 v46, v50, v46 ; 005C5D32 v_mul_f32_e32 v50, v20, v47 ; 10645F14 v_mac_f32_e32 v50, v19, v45 ; 3E645B13 v_mac_f32_e32 v50, v13, v46 ; 3E645D0D v_add_f32_e32 v50, v7, v50 ; 06646507 v_mul_f32_e32 v51, v16, v47 ; 10665F10 v_mac_f32_e32 v51, v15, v45 ; 3E665B0F v_mac_f32_e32 v51, v14, v46 ; 3E665D0E v_add_f32_e32 v51, v11, v51 ; 0666670B v_rcp_f32_e32 v51, v51 ; 7E665533 v_mul_f32_e32 v52, v18, v47 ; 10685F12 v_mac_f32_e32 v52, v17, v45 ; 3E685B11 v_mac_f32_e32 v52, v12, v46 ; 3E685D0C v_add_f32_e32 v52, v8, v52 ; 06686908 v_mul_f32_e32 v53, v51, v50 ; 106A6533 v_mul_f32_e32 v54, v51, v52 ; 106C6933 v_mul_f32_e32 v34, v34, v31 ; 10443F22 v_mac_f32_e32 v34, v32, v30 ; 3E443D20 v_mac_f32_e32 v34, v33, v29 ; 3E443B21 v_add_f32_e32 v32, v22, v34 ; 06404516 image_sample v[33:34], 3, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[12:19], s[0:3] ; F0800300 00032126 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v32, -v34, v25, v32 ; D2820020 24823322 v_mad_f32 v32, -v33, v26, v32 ; D2820020 24823521 v_mul_f32_e32 v33, v37, v31 ; 10423F25 v_mac_f32_e32 v33, v35, v30 ; 3E423D23 v_mac_f32_e32 v33, v36, v29 ; 3E423B24 v_add_f32_e32 v33, v22, v33 ; 06424316 image_sample v[34:35], 3, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[12:19], s[0:3] ; F0800300 0003222B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, -v35, v25, v33 ; D2820021 24863323 v_mad_f32 v33, -v34, v26, v33 ; D2820021 24863522 v_mul_f32_e32 v34, v42, v31 ; 10443F2A v_mac_f32_e32 v34, v40, v30 ; 3E443D28 v_mac_f32_e32 v34, v41, v29 ; 3E443B29 v_add_f32_e32 v34, v22, v34 ; 06444516 image_sample v[35:36], 3, 0, 0, 0, 0, 0, 0, 0, v[48:49], s[12:19], s[0:3] ; F0800300 00032330 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v34, -v36, v25, v34 ; D2820022 248A3324 v_mad_f32 v34, -v35, v26, v34 ; D2820022 248A3523 v_mul_f32_e32 v35, v47, v31 ; 10463F2F v_mac_f32_e32 v35, v45, v30 ; 3E463D2D v_mac_f32_e32 v35, v46, v29 ; 3E463B2E v_add_f32_e32 v35, v22, v35 ; 06464716 image_sample v[36:37], 3, 0, 0, 0, 0, 0, 0, 0, v[53:54], s[12:19], s[0:3] ; F0800300 00032435 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, -v37, v25, v35 ; D2820023 248E3325 v_mad_f32 v35, -v36, v26, v35 ; D2820023 248E3524 v_mov_b32_e32 v36, 0xbd1cc533 ; 7E4802FF BD1CC533 v_mul_f32_e32 v37, v36, v5 ; 104A0B24 v_mov_b32_e32 v38, 0xbd23c85c ; 7E4C02FF BD23C85C v_mac_f32_e32 v37, v38, v4 ; 3E4A0926 v_mov_b32_e32 v39, 0x3c20dbad ; 7E4E02FF 3C20DBAD v_mac_f32_e32 v37, v39, v3 ; 3E4A0727 v_mul_f32_e32 v40, v5, v37 ; 10504B05 v_mac_f32_e32 v36, -2.0, v40 ; 3E4850F5 v_mul_f32_e32 v40, v4, v37 ; 10504B04 v_mac_f32_e32 v38, -2.0, v40 ; 3E4C50F5 v_mul_f32_e32 v37, v3, v37 ; 104A4B03 v_mac_f32_e32 v39, -2.0, v37 ; 3E4E4AF5 v_mul_f32_e32 v37, v36, v10 ; 104A1524 v_mac_f32_e32 v37, v38, v9 ; 3E4A1326 v_mac_f32_e32 v37, v39, v6 ; 3E4A0D27 v_mul_f32_e32 v40, v10, v37 ; 10504B0A v_mad_f32 v40, -2.0, v40, v36 ; D2820028 049250F5 v_cmp_gt_f32_e32 vcc, 0, v37 ; 7C084A80 v_cndmask_b32_e32 v36, v36, v40 ; 00485124 v_mul_f32_e32 v40, v9, v37 ; 10504B09 v_mad_f32 v40, -2.0, v40, v38 ; D2820028 049A50F5 v_cndmask_b32_e32 v38, v38, v40 ; 004C5126 v_mul_f32_e32 v37, v6, v37 ; 104A4B06 v_mad_f32 v37, -2.0, v37, v39 ; D2820025 049E4AF5 v_cndmask_b32_e32 v37, v39, v37 ; 004A4B27 v_mul_f32_e32 v39, v20, v38 ; 104E4D14 v_mac_f32_e32 v39, v19, v36 ; 3E4E4913 v_mac_f32_e32 v39, v13, v37 ; 3E4E4B0D v_add_f32_e32 v39, v7, v39 ; 064E4F07 v_mul_f32_e32 v40, v16, v38 ; 10504D10 v_mac_f32_e32 v40, v15, v36 ; 3E50490F v_mac_f32_e32 v40, v14, v37 ; 3E504B0E v_add_f32_e32 v40, v11, v40 ; 0650510B v_rcp_f32_e32 v40, v40 ; 7E505528 v_mul_f32_e32 v41, v18, v38 ; 10524D12 v_mac_f32_e32 v41, v17, v36 ; 3E524911 v_mac_f32_e32 v41, v12, v37 ; 3E524B0C v_add_f32_e32 v41, v8, v41 ; 06525308 v_mul_f32_e32 v42, v40, v39 ; 10544F28 v_mul_f32_e32 v43, v40, v41 ; 10565328 v_mov_b32_e32 v39, 0x3f3ec311 ; 7E4E02FF 3F3EC311 v_mul_f32_e32 v40, v39, v5 ; 10500B27 v_mov_b32_e32 v41, 0x3ea5be7f ; 7E5202FF 3EA5BE7F v_mac_f32_e32 v40, v41, v4 ; 3E500929 v_mov_b32_e32 v44, 0xbedbb0c4 ; 7E5802FF BEDBB0C4 v_mac_f32_e32 v40, v44, v3 ; 3E50072C v_mul_f32_e32 v45, v5, v40 ; 105A5105 v_mac_f32_e32 v39, -2.0, v45 ; 3E4E5AF5 v_mul_f32_e32 v45, v4, v40 ; 105A5104 v_mac_f32_e32 v41, -2.0, v45 ; 3E525AF5 v_mul_f32_e32 v40, v3, v40 ; 10505103 v_mac_f32_e32 v44, -2.0, v40 ; 3E5850F5 v_mul_f32_e32 v40, v39, v10 ; 10501527 v_mac_f32_e32 v40, v41, v9 ; 3E501329 v_mac_f32_e32 v40, v44, v6 ; 3E500D2C v_mul_f32_e32 v45, v10, v40 ; 105A510A v_mad_f32 v45, -2.0, v45, v39 ; D282002D 049E5AF5 v_cmp_gt_f32_e32 vcc, 0, v40 ; 7C085080 v_cndmask_b32_e32 v39, v39, v45 ; 004E5B27 v_mul_f32_e32 v45, v9, v40 ; 105A5109 v_mad_f32 v45, -2.0, v45, v41 ; D282002D 04A65AF5 v_cndmask_b32_e32 v41, v41, v45 ; 00525B29 v_mul_f32_e32 v40, v6, v40 ; 10505106 v_mad_f32 v40, -2.0, v40, v44 ; D2820028 04B250F5 v_cndmask_b32_e32 v40, v44, v40 ; 0050512C v_mul_f32_e32 v44, v20, v41 ; 10585314 v_mac_f32_e32 v44, v19, v39 ; 3E584F13 v_mac_f32_e32 v44, v13, v40 ; 3E58510D v_add_f32_e32 v44, v7, v44 ; 06585907 v_mul_f32_e32 v45, v16, v41 ; 105A5310 v_mac_f32_e32 v45, v15, v39 ; 3E5A4F0F v_mac_f32_e32 v45, v14, v40 ; 3E5A510E v_add_f32_e32 v45, v11, v45 ; 065A5B0B v_rcp_f32_e32 v45, v45 ; 7E5A552D v_mul_f32_e32 v46, v18, v41 ; 105C5312 v_mac_f32_e32 v46, v17, v39 ; 3E5C4F11 v_mac_f32_e32 v46, v12, v40 ; 3E5C510C v_add_f32_e32 v46, v8, v46 ; 065C5D08 v_mul_f32_e32 v47, v45, v44 ; 105E592D v_mul_f32_e32 v48, v45, v46 ; 10605D2D v_mov_b32_e32 v44, 0xbc7c1590 ; 7E5802FF BC7C1590 v_mul_f32_e32 v45, v44, v5 ; 105A0B2C v_mov_b32_e32 v46, 0x3e88ebee ; 7E5C02FF 3E88EBEE v_mac_f32_e32 v45, v46, v4 ; 3E5A092E v_mov_b32_e32 v49, 0xbd873de2 ; 7E6202FF BD873DE2 v_mac_f32_e32 v45, v49, v3 ; 3E5A0731 v_mul_f32_e32 v50, v5, v45 ; 10645B05 v_mac_f32_e32 v44, -2.0, v50 ; 3E5864F5 v_mul_f32_e32 v50, v4, v45 ; 10645B04 v_mac_f32_e32 v46, -2.0, v50 ; 3E5C64F5 v_mul_f32_e32 v45, v3, v45 ; 105A5B03 v_mac_f32_e32 v49, -2.0, v45 ; 3E625AF5 v_mul_f32_e32 v45, v44, v10 ; 105A152C v_mac_f32_e32 v45, v46, v9 ; 3E5A132E v_mac_f32_e32 v45, v49, v6 ; 3E5A0D31 v_mul_f32_e32 v50, v10, v45 ; 10645B0A v_mad_f32 v50, -2.0, v50, v44 ; D2820032 04B264F5 v_cmp_gt_f32_e32 vcc, 0, v45 ; 7C085A80 v_cndmask_b32_e32 v44, v44, v50 ; 0058652C v_mul_f32_e32 v50, v9, v45 ; 10645B09 v_mad_f32 v50, -2.0, v50, v46 ; D2820032 04BA64F5 v_cndmask_b32_e32 v46, v46, v50 ; 005C652E v_mul_f32_e32 v45, v6, v45 ; 105A5B06 v_mad_f32 v45, -2.0, v45, v49 ; D282002D 04C65AF5 v_cndmask_b32_e32 v45, v49, v45 ; 005A5B31 v_mul_f32_e32 v49, v20, v46 ; 10625D14 v_mac_f32_e32 v49, v19, v44 ; 3E625913 v_mac_f32_e32 v49, v13, v45 ; 3E625B0D v_add_f32_e32 v49, v7, v49 ; 06626307 v_mul_f32_e32 v50, v16, v46 ; 10645D10 v_mac_f32_e32 v50, v15, v44 ; 3E64590F v_mac_f32_e32 v50, v14, v45 ; 3E645B0E v_add_f32_e32 v50, v11, v50 ; 0664650B v_rcp_f32_e32 v50, v50 ; 7E645532 v_mul_f32_e32 v51, v18, v46 ; 10665D12 v_mac_f32_e32 v51, v17, v44 ; 3E665911 v_mac_f32_e32 v51, v12, v45 ; 3E665B0C v_add_f32_e32 v51, v8, v51 ; 06666708 v_mul_f32_e32 v52, v50, v49 ; 10686332 v_mul_f32_e32 v53, v50, v51 ; 106A6732 v_mov_b32_e32 v49, 0xbdc4f7ec ; 7E6202FF BDC4F7EC v_mul_f32_e32 v50, v49, v5 ; 10640B31 v_mov_b32_e32 v51, 0xbf05df33 ; 7E6602FF BF05DF33 v_mac_f32_e32 v50, v51, v4 ; 3E640933 v_mov_b32_e32 v54, 0xbe806ec1 ; 7E6C02FF BE806EC1 v_mac_f32_e32 v50, v54, v3 ; 3E640736 v_mul_f32_e32 v55, v5, v50 ; 106E6505 v_mac_f32_e32 v49, -2.0, v55 ; 3E626EF5 v_mul_f32_e32 v55, v4, v50 ; 106E6504 v_mac_f32_e32 v51, -2.0, v55 ; 3E666EF5 v_mul_f32_e32 v50, v3, v50 ; 10646503 v_mac_f32_e32 v54, -2.0, v50 ; 3E6C64F5 v_mul_f32_e32 v50, v49, v10 ; 10641531 v_mac_f32_e32 v50, v51, v9 ; 3E641333 v_mac_f32_e32 v50, v54, v6 ; 3E640D36 v_mul_f32_e32 v55, v10, v50 ; 106E650A v_mad_f32 v55, -2.0, v55, v49 ; D2820037 04C66EF5 v_cmp_gt_f32_e32 vcc, 0, v50 ; 7C086480 v_cndmask_b32_e32 v49, v49, v55 ; 00626F31 v_mul_f32_e32 v55, v9, v50 ; 106E6509 v_mad_f32 v55, -2.0, v55, v51 ; D2820037 04CE6EF5 v_cndmask_b32_e32 v51, v51, v55 ; 00666F33 v_mul_f32_e32 v50, v6, v50 ; 10646506 v_mad_f32 v50, -2.0, v50, v54 ; D2820032 04DA64F5 v_cndmask_b32_e32 v50, v54, v50 ; 00646536 v_mul_f32_e32 v54, v20, v51 ; 106C6714 v_mac_f32_e32 v54, v19, v49 ; 3E6C6313 v_mac_f32_e32 v54, v13, v50 ; 3E6C650D v_add_f32_e32 v54, v7, v54 ; 066C6D07 v_mul_f32_e32 v55, v16, v51 ; 106E6710 v_mac_f32_e32 v55, v15, v49 ; 3E6E630F v_mac_f32_e32 v55, v14, v50 ; 3E6E650E v_add_f32_e32 v55, v11, v55 ; 066E6F0B v_rcp_f32_e32 v55, v55 ; 7E6E5537 v_mul_f32_e32 v56, v18, v51 ; 10706712 v_mac_f32_e32 v56, v17, v49 ; 3E706311 v_mac_f32_e32 v56, v12, v50 ; 3E70650C v_add_f32_e32 v56, v8, v56 ; 06707108 v_mul_f32_e32 v57, v55, v54 ; 10726D37 v_mul_f32_e32 v58, v55, v56 ; 10747137 v_mul_f32_e32 v38, v38, v31 ; 104C3F26 v_mac_f32_e32 v38, v36, v30 ; 3E4C3D24 v_mac_f32_e32 v38, v37, v29 ; 3E4C3B25 v_add_f32_e32 v36, v22, v38 ; 06484D16 image_sample v[37:38], 3, 0, 0, 0, 0, 0, 0, 0, v[42:43], s[12:19], s[0:3] ; F0800300 0003252A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, -v38, v25, v36 ; D2820024 24923326 v_mad_f32 v36, -v37, v26, v36 ; D2820024 24923525 v_mul_f32_e32 v37, v41, v31 ; 104A3F29 v_mac_f32_e32 v37, v39, v30 ; 3E4A3D27 v_mac_f32_e32 v37, v40, v29 ; 3E4A3B28 v_add_f32_e32 v37, v22, v37 ; 064A4B16 image_sample v[38:39], 3, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[12:19], s[0:3] ; F0800300 0003262F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v37, -v39, v25, v37 ; D2820025 24963327 v_mad_f32 v37, -v38, v26, v37 ; D2820025 24963526 v_mul_f32_e32 v38, v46, v31 ; 104C3F2E v_mac_f32_e32 v38, v44, v30 ; 3E4C3D2C v_mac_f32_e32 v38, v45, v29 ; 3E4C3B2D v_add_f32_e32 v38, v22, v38 ; 064C4D16 image_sample v[39:40], 3, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[12:19], s[0:3] ; F0800300 00032734 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v38, -v40, v25, v38 ; D2820026 249A3328 v_mad_f32 v38, -v39, v26, v38 ; D2820026 249A3527 v_mul_f32_e32 v39, v51, v31 ; 104E3F33 v_mac_f32_e32 v39, v49, v30 ; 3E4E3D31 v_mac_f32_e32 v39, v50, v29 ; 3E4E3B32 v_add_f32_e32 v39, v22, v39 ; 064E4F16 image_sample v[40:41], 3, 0, 0, 0, 0, 0, 0, 0, v[57:58], s[12:19], s[0:3] ; F0800300 00032839 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v39, -v41, v25, v39 ; D2820027 249E3329 v_mad_f32 v39, -v40, v26, v39 ; D2820027 249E3528 v_mov_b32_e32 v40, 0x3efab7fe ; 7E5002FF 3EFAB7FE v_mul_f32_e32 v41, v40, v5 ; 10520B28 v_mov_b32_e32 v42, 0xbeedc63b ; 7E5402FF BEEDC63B v_mac_f32_e32 v41, v42, v4 ; 3E52092A v_mov_b32_e32 v43, 0xbe18d25f ; 7E5602FF BE18D25F v_mac_f32_e32 v41, v43, v3 ; 3E52072B v_mul_f32_e32 v44, v5, v41 ; 10585305 v_mac_f32_e32 v40, -2.0, v44 ; 3E5058F5 v_mul_f32_e32 v44, v4, v41 ; 10585304 v_mac_f32_e32 v42, -2.0, v44 ; 3E5458F5 v_mul_f32_e32 v41, v3, v41 ; 10525303 v_mac_f32_e32 v43, -2.0, v41 ; 3E5652F5 v_mul_f32_e32 v41, v40, v10 ; 10521528 v_mac_f32_e32 v41, v42, v9 ; 3E52132A v_mac_f32_e32 v41, v43, v6 ; 3E520D2B v_mul_f32_e32 v44, v10, v41 ; 1058530A v_mad_f32 v44, -2.0, v44, v40 ; D282002C 04A258F5 v_cmp_gt_f32_e32 vcc, 0, v41 ; 7C085280 v_cndmask_b32_e32 v40, v40, v44 ; 00505928 v_mul_f32_e32 v44, v9, v41 ; 10585309 v_mad_f32 v44, -2.0, v44, v42 ; D282002C 04AA58F5 v_cndmask_b32_e32 v42, v42, v44 ; 0054592A v_mul_f32_e32 v41, v6, v41 ; 10525306 v_mad_f32 v41, -2.0, v41, v43 ; D2820029 04AE52F5 v_cndmask_b32_e32 v41, v43, v41 ; 0052532B v_mul_f32_e32 v43, v20, v42 ; 10565514 v_mac_f32_e32 v43, v19, v40 ; 3E565113 v_mac_f32_e32 v43, v13, v41 ; 3E56530D v_add_f32_e32 v43, v7, v43 ; 06565707 v_mul_f32_e32 v44, v16, v42 ; 10585510 v_mac_f32_e32 v44, v15, v40 ; 3E58510F v_mac_f32_e32 v44, v14, v41 ; 3E58530E v_add_f32_e32 v44, v11, v44 ; 0658590B v_rcp_f32_e32 v44, v44 ; 7E58552C v_mul_f32_e32 v45, v18, v42 ; 105A5512 v_mac_f32_e32 v45, v17, v40 ; 3E5A5111 v_mac_f32_e32 v45, v12, v41 ; 3E5A530C v_add_f32_e32 v45, v8, v45 ; 065A5B08 v_mul_f32_e32 v46, v44, v43 ; 105C572C v_mul_f32_e32 v47, v44, v45 ; 105E5B2C v_mov_b32_e32 v43, 0xbe064c73 ; 7E5602FF BE064C73 v_mul_f32_e32 v44, v43, v5 ; 10580B2B v_mov_b32_e32 v45, 0x3e0e79ee ; 7E5A02FF 3E0E79EE v_mac_f32_e32 v44, v45, v4 ; 3E58092D v_mov_b32_e32 v48, 0xbf336cdf ; 7E6002FF BF336CDF v_mac_f32_e32 v44, v48, v3 ; 3E580730 v_mul_f32_e32 v49, v5, v44 ; 10625905 v_mac_f32_e32 v43, -2.0, v49 ; 3E5662F5 v_mul_f32_e32 v49, v4, v44 ; 10625904 v_mac_f32_e32 v45, -2.0, v49 ; 3E5A62F5 v_mul_f32_e32 v44, v3, v44 ; 10585903 v_mac_f32_e32 v48, -2.0, v44 ; 3E6058F5 v_mul_f32_e32 v44, v43, v10 ; 1058152B v_mac_f32_e32 v44, v45, v9 ; 3E58132D v_mac_f32_e32 v44, v48, v6 ; 3E580D30 v_mul_f32_e32 v49, v10, v44 ; 1062590A v_mad_f32 v49, -2.0, v49, v43 ; D2820031 04AE62F5 v_cmp_gt_f32_e32 vcc, 0, v44 ; 7C085880 v_cndmask_b32_e32 v43, v43, v49 ; 0056632B v_mul_f32_e32 v49, v9, v44 ; 10625909 v_mad_f32 v49, -2.0, v49, v45 ; D2820031 04B662F5 v_cndmask_b32_e32 v45, v45, v49 ; 005A632D v_mul_f32_e32 v44, v6, v44 ; 10585906 v_mad_f32 v44, -2.0, v44, v48 ; D282002C 04C258F5 v_cndmask_b32_e32 v44, v48, v44 ; 00585930 v_mul_f32_e32 v48, v20, v45 ; 10605B14 v_mac_f32_e32 v48, v19, v43 ; 3E605713 v_mac_f32_e32 v48, v13, v44 ; 3E60590D v_add_f32_e32 v48, v7, v48 ; 06606107 v_mul_f32_e32 v49, v16, v45 ; 10625B10 v_mac_f32_e32 v49, v15, v43 ; 3E62570F v_mac_f32_e32 v49, v14, v44 ; 3E62590E v_add_f32_e32 v49, v11, v49 ; 0662630B v_rcp_f32_e32 v49, v49 ; 7E625531 v_mul_f32_e32 v50, v18, v45 ; 10645B12 v_mac_f32_e32 v50, v17, v43 ; 3E645711 v_mac_f32_e32 v50, v12, v44 ; 3E64590C v_add_f32_e32 v50, v8, v50 ; 06646508 v_mul_f32_e32 v51, v49, v48 ; 10666131 v_mul_f32_e32 v52, v49, v50 ; 10686531 v_mov_b32_e32 v48, 0xbeb98394 ; 7E6002FF BEB98394 v_mul_f32_e32 v49, v48, v5 ; 10620B30 v_mov_b32_e32 v50, 0xbdcdd3fe ; 7E6402FF BDCDD3FE v_mac_f32_e32 v49, v50, v4 ; 3E620932 v_mov_b32_e32 v53, 0xbe9ccb19 ; 7E6A02FF BE9CCB19 v_mac_f32_e32 v49, v53, v3 ; 3E620735 v_mul_f32_e32 v54, v5, v49 ; 106C6305 v_mac_f32_e32 v48, -2.0, v54 ; 3E606CF5 v_mul_f32_e32 v54, v4, v49 ; 106C6304 v_mac_f32_e32 v50, -2.0, v54 ; 3E646CF5 v_mul_f32_e32 v49, v3, v49 ; 10626303 v_mac_f32_e32 v53, -2.0, v49 ; 3E6A62F5 v_mul_f32_e32 v49, v48, v10 ; 10621530 v_mac_f32_e32 v49, v50, v9 ; 3E621332 v_mac_f32_e32 v49, v53, v6 ; 3E620D35 v_mul_f32_e32 v54, v10, v49 ; 106C630A v_mad_f32 v54, -2.0, v54, v48 ; D2820036 04C26CF5 v_cmp_gt_f32_e32 vcc, 0, v49 ; 7C086280 v_cndmask_b32_e32 v48, v48, v54 ; 00606D30 v_mul_f32_e32 v54, v9, v49 ; 106C6309 v_mad_f32 v54, -2.0, v54, v50 ; D2820036 04CA6CF5 v_cndmask_b32_e32 v50, v50, v54 ; 00646D32 v_mul_f32_e32 v49, v6, v49 ; 10626306 v_mad_f32 v49, -2.0, v49, v53 ; D2820031 04D662F5 v_cndmask_b32_e32 v49, v53, v49 ; 00626335 v_mul_f32_e32 v53, v20, v50 ; 106A6514 v_mac_f32_e32 v53, v19, v48 ; 3E6A6113 v_mac_f32_e32 v53, v13, v49 ; 3E6A630D v_add_f32_e32 v53, v7, v53 ; 066A6B07 v_mul_f32_e32 v54, v16, v50 ; 106C6510 v_mac_f32_e32 v54, v15, v48 ; 3E6C610F v_mac_f32_e32 v54, v14, v49 ; 3E6C630E v_add_f32_e32 v54, v11, v54 ; 066C6D0B v_rcp_f32_e32 v54, v54 ; 7E6C5536 v_mul_f32_e32 v55, v18, v50 ; 106E6512 v_mac_f32_e32 v55, v17, v48 ; 3E6E6111 v_mac_f32_e32 v55, v12, v49 ; 3E6E630C v_add_f32_e32 v55, v8, v55 ; 066E6F08 v_mul_f32_e32 v56, v54, v53 ; 10706B36 v_mul_f32_e32 v57, v54, v55 ; 10726F36 v_mul_f32_e32 v42, v42, v31 ; 10543F2A v_mac_f32_e32 v42, v40, v30 ; 3E543D28 v_mul_f32_e32 v40, v45, v31 ; 10503F2D v_mac_f32_e32 v40, v43, v30 ; 3E503D2B v_mul_f32_e32 v43, v50, v31 ; 10563F32 v_mac_f32_e32 v43, v48, v30 ; 3E563D30 v_mov_b32_e32 v45, 0x3e9634db ; 7E5A02FF 3E9634DB v_mul_f32_e32 v48, v45, v5 ; 10600B2D v_mov_b32_e32 v50, 0xbe2eba6b ; 7E6402FF BE2EBA6B v_mac_f32_e32 v48, v50, v4 ; 3E600932 v_mov_b32_e32 v53, 0xbf296acf ; 7E6A02FF BF296ACF v_mac_f32_e32 v48, v53, v3 ; 3E600735 v_mul_f32_e32 v5, v5, v48 ; 100A6105 v_mul_f32_e32 v4, v4, v48 ; 10086104 v_mul_f32_e32 v3, v3, v48 ; 10066103 v_mac_f32_e32 v45, -2.0, v5 ; 3E5A0AF5 v_mac_f32_e32 v50, -2.0, v4 ; 3E6408F5 v_mac_f32_e32 v53, -2.0, v3 ; 3E6A06F5 v_mul_f32_e32 v3, v45, v10 ; 1006152D v_mac_f32_e32 v3, v50, v9 ; 3E061332 v_mac_f32_e32 v3, v53, v6 ; 3E060D35 v_mul_f32_e32 v4, v10, v3 ; 1008070A v_mad_f32 v4, -2.0, v4, v45 ; D2820004 04B608F5 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e32 v4, v45, v4 ; 0008092D v_mul_f32_e32 v5, v9, v3 ; 100A0709 v_mad_f32 v5, -2.0, v5, v50 ; D2820005 04CA0AF5 v_cndmask_b32_e32 v5, v50, v5 ; 000A0B32 v_mul_f32_e32 v9, v5, v31 ; 10123F05 v_mac_f32_e32 v9, v4, v30 ; 3E123D04 v_mul_f32_e32 v10, v20, v5 ; 10140B14 v_mac_f32_e32 v10, v19, v4 ; 3E140913 v_mul_f32_e32 v18, v18, v5 ; 10240B12 v_mac_f32_e32 v18, v17, v4 ; 3E240911 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mac_f32_e32 v5, v15, v4 ; 3E0A090F v_mul_f32_e32 v3, v6, v3 ; 10060706 v_mad_f32 v3, -2.0, v3, v53 ; D2820003 04D606F5 v_cndmask_b32_e32 v3, v53, v3 ; 00060735 v_mac_f32_e32 v42, v41, v29 ; 3E543B29 v_mac_f32_e32 v40, v44, v29 ; 3E503B2C v_mac_f32_e32 v43, v49, v29 ; 3E563B31 v_mac_f32_e32 v9, v3, v29 ; 3E123B03 v_mac_f32_e32 v10, v13, v3 ; 3E14070D v_mac_f32_e32 v5, v14, v3 ; 3E0A070E v_add_f32_e32 v4, v11, v5 ; 06080B0B v_rcp_f32_e32 v4, v4 ; 7E085504 v_mac_f32_e32 v18, v12, v3 ; 3E24070C v_add_f32_e32 v3, v7, v10 ; 06061507 v_add_f32_e32 v5, v8, v18 ; 060A2508 v_mul_f32_e32 v6, v4, v3 ; 100C0704 v_mul_f32_e32 v7, v4, v5 ; 100E0B04 v_add_f32_e32 v3, v22, v42 ; 06065516 image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[12:19], s[0:3] ; F0800300 0003042E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, -v5, v25, v3 ; D2820003 240E3305 v_mad_f32 v3, -v4, v26, v3 ; D2820003 240E3504 v_add_f32_e32 v4, v22, v40 ; 06085116 image_sample v[10:11], 3, 0, 0, 0, 0, 0, 0, 0, v[51:52], s[12:19], s[0:3] ; F0800300 00030A33 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, -v11, v25, v4 ; D2820004 2412330B v_mad_f32 v4, -v10, v26, v4 ; D2820004 2412350A v_add_f32_e32 v5, v22, v43 ; 060A5716 image_sample v[10:11], 3, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[12:19], s[0:3] ; F0800300 00030A38 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, -v11, v25, v5 ; D2820005 2416330B v_mad_f32 v5, -v10, v26, v5 ; D2820005 2416350A v_add_f32_e32 v8, v22, v9 ; 06101316 image_sample v[6:7], 3, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[0:3] ; F0800300 00030606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, -v7, v25, v8 ; D2820007 24223307 v_mad_f32 v6, -v6, v26, v7 ; D2820006 241E3506 v_mov_b32_e32 v7, 0xbac80009 ; 7E0E02FF BAC80009 v_add_f32_e32 v8, v7, v23 ; 06102F07 v_mad_f32 v9, -s8, v8, 1.0 ; D2820009 23CA1008 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080 v_mov_b32_e32 v8, 0x3d800000 ; 7E1002FF 3D800000 v_mul_f32_e32 v9, v8, v9 ; 10121308 v_cndmask_b32_e64 v9, v9, 0, vcc ; D2000009 01A90109 v_add_f32_e32 v10, v7, v24 ; 06143107 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_mad_f32 v10, -s8, v10, 1.0 ; D282000A 23CA1408 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v10, v8, v10 ; 10141508 v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_add_f32_e32 v11, v7, v27 ; 06163707 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v12, v7, v28 ; 06183907 v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_mad_f32 v12, -s8, v12, 1.0 ; D282000C 23CA1808 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v12, v8, v12 ; 10181908 v_cndmask_b32_e64 v12, v12, 0, vcc ; D200000C 01A9010C v_add_f32_e32 v13, v7, v32 ; 061A4107 v_cmp_gt_f32_e32 vcc, 0, v13 ; 7C081A80 v_mad_f32 v13, -s8, v13, 1.0 ; D282000D 23CA1A08 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mul_f32_e32 v13, v8, v13 ; 101A1B08 v_cndmask_b32_e64 v13, v13, 0, vcc ; D200000D 01A9010D v_add_f32_e32 v14, v7, v33 ; 061C4307 v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_mad_f32 v14, -s8, v14, 1.0 ; D282000E 23CA1C08 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mul_f32_e32 v14, v8, v14 ; 101C1D08 v_cndmask_b32_e64 v14, v14, 0, vcc ; D200000E 01A9010E v_add_f32_e32 v15, v7, v34 ; 061E4507 v_cmp_gt_f32_e32 vcc, 0, v15 ; 7C081E80 v_mad_f32 v15, -s8, v15, 1.0 ; D282000F 23CA1E08 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mul_f32_e32 v15, v8, v15 ; 101E1F08 v_cndmask_b32_e64 v15, v15, 0, vcc ; D200000F 01A9010F v_add_f32_e32 v16, v7, v35 ; 06204707 v_cmp_gt_f32_e32 vcc, 0, v16 ; 7C082080 v_mad_f32 v16, -s8, v16, 1.0 ; D2820010 23CA2008 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_mul_f32_e32 v16, v8, v16 ; 10202108 v_cndmask_b32_e64 v16, v16, 0, vcc ; D2000010 01A90110 v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v9, v11, v9 ; 0612130B v_add_f32_e32 v9, v12, v9 ; 0612130C v_add_f32_e32 v10, v14, v13 ; 06141B0E v_add_f32_e32 v10, v15, v10 ; 0614150F v_add_f32_e32 v10, v16, v10 ; 06141510 v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v10, v7, v36 ; 06144907 v_add_f32_e32 v11, v7, v37 ; 06164B07 v_cmp_gt_f32_e32 vcc, 0, v10 ; 7C081480 v_mad_f32 v10, -s8, v10, 1.0 ; D282000A 23CA1408 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_mul_f32_e32 v10, v8, v10 ; 10141508 v_cndmask_b32_e64 v10, v10, 0, vcc ; D200000A 01A9010A v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v11, v7, v38 ; 06164D07 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v11, v7, v39 ; 06164F07 v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_mad_f32 v11, -s8, v11, 1.0 ; D282000B 23CA1608 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_mul_f32_e32 v11, v8, v11 ; 10161708 v_cndmask_b32_e64 v11, v11, 0, vcc ; D200000B 01A9010B v_add_f32_e32 v10, v11, v10 ; 0614150B v_add_f32_e32 v9, v10, v9 ; 0612130A v_add_f32_e32 v3, v7, v3 ; 06060707 v_add_f32_e32 v4, v7, v4 ; 06080907 v_add_f32_e32 v5, v7, v5 ; 060A0B07 v_add_f32_e32 v6, v7, v6 ; 060C0D07 v_mad_f32 v7, -s8, v3, 1.0 ; D2820007 23CA0608 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_mad_f32 v3, -s8, v4, 1.0 ; D2820003 23CA0808 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_cmp_gt_f32_e64 s[0:1], 0, v4 ; D0080000 00020880 v_mad_f32 v4, -s8, v5, 1.0 ; D2820004 23CA0A08 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_mov_b32_e32 v10, 0x41000000 ; 7E1402FF 41000000 v_mov_b32_e32 v11, 0x41600000 ; 7E1602FF 41600000 v_mad_f32 v2, v11, v2, v10 ; D2820002 042A050B v_mac_f32_e32 v10, v11, v1 ; 3E14030B v_cmp_gt_f32_e64 s[2:3], 0, v5 ; D0080002 00020A80 v_mad_f32 v1, -s8, v6, 1.0 ; D2820001 23CA0C08 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_cmp_gt_f32_e64 s[4:5], 0, v6 ; D0080004 00020C80 v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mul_f32_e32 v0, 0x477fff00, v0 ; 100000FF 477FFF00 v_floor_f32_e32 v0, v0 ; 7E004900 v_mul_f32_e32 v5, 0x3b800000, v0 ; 100A00FF 3B800000 v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mul_f32_e32 v6, v8, v7 ; 100C0F08 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mul_f32_e32 v3, v8, v3 ; 10060708 v_cndmask_b32_e64 v3, v3, 0, s[0:1] ; D2000003 00010103 v_add_f32_e32 v3, v3, v6 ; 06060D03 v_mul_f32_e32 v4, v8, v4 ; 10080908 v_cndmask_b32_e64 v4, v4, 0, s[2:3] ; D2000004 00090104 v_add_f32_e32 v3, v4, v3 ; 06060704 v_mul_f32_e32 v1, v8, v1 ; 10020308 v_cndmask_b32_e64 v1, v1, 0, s[4:5] ; D2000001 00110101 v_add_f32_e32 v1, v1, v3 ; 06020701 v_add_f32_e32 v1, v1, v9 ; 06021301 v_mov_b32_e32 v3, 0x3b808081 ; 7E0602FF 3B808081 v_madmk_f32_e32 v0, v5, v0, 0xc3800000 ; 40000105 C3800000 v_mul_f32_e32 v4, v3, v5 ; 10080B03 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_floor_f32_e32 v2, v2 ; 7E044902 v_floor_f32_e32 v3, v10 ; 7E06490A v_mul_f32_e32 v2, 0x3d808081, v2 ; 100404FF 3D808081 v_madmk_f32_e32 v2, v3, v2, 0x3b808083 ; 40040503 3B808083 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 60 Code Size: 5136 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.9961, 0.0039, 255.0000, 0.0625} IMM[1] FLT32 { 16.0000, -8.0000, 0.1429, 0.0000} IMM[2] FLT32 { 1.0000, -2.0000, 0.0350, -0.7000} IMM[3] UINT32 {0, 0, 0, 0} IMM[4] FLT32 { 3.3333, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yzw, TEMP[0], SAMP[0], 2D 2: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[0].xyyy 3: MUL TEMP[2].x, TEMP[0].wwww, IMM[0].zzzz 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww 5: FLR TEMP[3].x, TEMP[3].xxxx 6: MOV TEMP[4].x, TEMP[3].xxxx 7: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 8: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 9: MOV TEMP[4].y, TEMP[2].xxxx 10: ADD TEMP[2].xy, TEMP[4].xyyy, IMM[1].yyyy 11: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].zzzz 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 15: ADD TEMP[4].x, IMM[2].xxxx, -TEMP[4].xxxx 16: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy 17: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx 18: MAX TEMP[2].x, IMM[1].wwww, TEMP[2].xxxx 19: SQRT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[3].z, TEMP[2].xxxx 21: MOV TEMP[2].y, IMM[1].wwww 22: MOV TEMP[2].x, CONST[1][0].xxxx 23: MAD TEMP[4].xy, IMM[2].yyyy, TEMP[2].xyyy, IN[0].xyyy 24: MOV TEMP[4].xy, TEMP[4].xyyy 25: TEX TEMP[4], TEMP[4], SAMP[0], 2D 26: DP2 TEMP[5].x, TEMP[4].yzzz, IMM[0].xyyy 27: ADD TEMP[6].x, TEMP[5].xxxx, -TEMP[1].xxxx 28: MUL TEMP[7].x, IMM[0].zzzz, TEMP[4].wwww 29: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MOV TEMP[9].x, TEMP[8].xxxx 32: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 33: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 34: MOV TEMP[9].y, TEMP[7].xxxx 35: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 36: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 37: MOV TEMP[8].x, TEMP[7].xxxx 38: MOV TEMP[8].y, TEMP[7].yyyy 39: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 40: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 41: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 42: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 43: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 44: SQRT TEMP[7].x, TEMP[7].xxxx 45: MOV TEMP[8].z, TEMP[7].xxxx 46: MOV TEMP[7].x, -TEMP[6].xxxx 47: FSLT TEMP[9].x, TEMP[6].xxxx, IMM[1].wwww 48: UIF TEMP[9].xxxx :0 49: MOV TEMP[7].x, TEMP[7].xxxx 50: ELSE :0 51: MOV TEMP[7].x, TEMP[6].xxxx 52: ENDIF 53: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 56: MOV_SAT TEMP[5].x, TEMP[5].xxxx 57: ADD TEMP[5].x, IMM[2].xxxx, -TEMP[5].xxxx 58: DP3 TEMP[6].x, TEMP[8].xyzz, TEMP[3].xyzz 59: ADD TEMP[6].x, TEMP[6].xxxx, IMM[2].wwww 60: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].xxxx 61: MOV_SAT TEMP[6].x, TEMP[6].xxxx 62: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 63: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 64: ADD TEMP[6].xy, -TEMP[2].xyyy, IN[0].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6], TEMP[6], SAMP[0], 2D 67: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 68: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 69: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 70: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 71: FLR TEMP[10].x, TEMP[10].xxxx 72: MOV TEMP[11].x, TEMP[10].xxxx 73: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 74: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 75: MOV TEMP[11].y, TEMP[9].xxxx 76: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 77: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 78: MOV TEMP[10].x, TEMP[9].xxxx 79: MOV TEMP[10].y, TEMP[9].yyyy 80: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 81: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 82: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 83: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 84: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 85: SQRT TEMP[9].x, TEMP[9].xxxx 86: MOV TEMP[10].z, TEMP[9].xxxx 87: MOV TEMP[9].x, -TEMP[8].xxxx 88: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[9].x, TEMP[9].xxxx 91: ELSE :0 92: MOV TEMP[9].x, TEMP[8].xxxx 93: ENDIF 94: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 95: RCP TEMP[7].x, TEMP[7].xxxx 96: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 97: MOV_SAT TEMP[7].x, TEMP[7].xxxx 98: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 99: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 100: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 101: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 102: MOV_SAT TEMP[8].x, TEMP[8].xxxx 103: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 104: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 105: ADD TEMP[5].x, IMM[2].xxxx, TEMP[5].xxxx 106: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 107: MOV TEMP[6].xy, IN[0].xyyy 108: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 110: ADD TEMP[6].xy, TEMP[2].xyyy, IN[0].xyyy 111: MOV TEMP[6].xy, TEMP[6].xyyy 112: TEX TEMP[6], TEMP[6], SAMP[0], 2D 113: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 114: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 115: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 116: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 117: FLR TEMP[10].x, TEMP[10].xxxx 118: MOV TEMP[11].x, TEMP[10].xxxx 119: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 120: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 121: MOV TEMP[11].y, TEMP[9].xxxx 122: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 123: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 124: MOV TEMP[10].x, TEMP[9].xxxx 125: MOV TEMP[10].y, TEMP[9].yyyy 126: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 127: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 128: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 129: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 130: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 131: SQRT TEMP[9].x, TEMP[9].xxxx 132: MOV TEMP[10].z, TEMP[9].xxxx 133: MOV TEMP[9].x, -TEMP[8].xxxx 134: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 135: UIF TEMP[11].xxxx :0 136: MOV TEMP[9].x, TEMP[9].xxxx 137: ELSE :0 138: MOV TEMP[9].x, TEMP[8].xxxx 139: ENDIF 140: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 141: RCP TEMP[7].x, TEMP[7].xxxx 142: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 143: MOV_SAT TEMP[7].x, TEMP[7].xxxx 144: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 145: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 146: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 147: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 148: MOV_SAT TEMP[8].x, TEMP[8].xxxx 149: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 150: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 151: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 152: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[4].yyyy, IN[0].xyyy 153: MOV TEMP[2].xy, TEMP[2].xyyy 154: TEX TEMP[2], TEMP[2], SAMP[0], 2D 155: DP2 TEMP[6].x, TEMP[2].yzzz, IMM[0].xyyy 156: ADD TEMP[1].x, TEMP[6].xxxx, -TEMP[1].xxxx 157: MUL TEMP[7].x, IMM[0].zzzz, TEMP[2].wwww 158: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 159: FLR TEMP[8].x, TEMP[8].xxxx 160: MOV TEMP[9].x, TEMP[8].xxxx 161: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 162: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 163: MOV TEMP[9].y, TEMP[7].xxxx 164: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 165: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 166: MOV TEMP[8].x, TEMP[7].xxxx 167: MOV TEMP[8].y, TEMP[7].yyyy 168: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 169: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 170: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 171: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 172: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 173: SQRT TEMP[7].x, TEMP[7].xxxx 174: MOV TEMP[8].z, TEMP[7].xxxx 175: MOV TEMP[7].x, -TEMP[1].xxxx 176: FSLT TEMP[9].x, TEMP[1].xxxx, IMM[1].wwww 177: UIF TEMP[9].xxxx :0 178: MOV TEMP[7].x, TEMP[7].xxxx 179: ELSE :0 180: MOV TEMP[7].x, TEMP[1].xxxx 181: ENDIF 182: MUL TEMP[1].x, IMM[2].zzzz, TEMP[6].xxxx 183: RCP TEMP[1].x, TEMP[1].xxxx 184: MUL TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx 185: MOV_SAT TEMP[1].x, TEMP[1].xxxx 186: ADD TEMP[1].x, IMM[2].xxxx, -TEMP[1].xxxx 187: DP3 TEMP[3].x, TEMP[8].xyzz, TEMP[3].xyzz 188: ADD TEMP[3].x, IMM[2].wwww, TEMP[3].xxxx 189: MUL TEMP[3].x, TEMP[3].xxxx, IMM[4].xxxx 190: MOV_SAT TEMP[3].x, TEMP[3].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 192: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[4].xxxx 193: ADD TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx 194: RCP TEMP[1].x, TEMP[1].xxxx 195: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx 196: MOV TEMP[1].y, TEMP[0].yyyy 197: MOV TEMP[1].z, TEMP[0].zzzz 198: MOV TEMP[1].w, TEMP[0].wwww 199: MOV OUT[0], TEMP[1] 200: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 1 %39 = extractelement <4 x float> %37, i32 2 %40 = extractelement <4 x float> %37, i32 3 %41 = fmul float %38, 0x3FEFE02000000000 %42 = fmul float %39, 0x3F6FE01F80000000 %43 = fadd float %41, %42 %44 = fmul float %40, 2.550000e+02 %45 = fmul float %44, 6.250000e-02 %46 = call float @floor(float %45) %47 = fmul float %46, 1.600000e+01 %48 = fsub float %44, %47 %49 = fadd float %46, -8.000000e+00 %50 = fadd float %48, -8.000000e+00 %51 = fmul float %49, 0x3FC24924A0000000 %52 = fmul float %50, 0x3FC24924A0000000 %53 = fmul float %51, %51 %54 = fsub float 1.000000e+00, %53 %55 = fmul float %52, %52 %56 = fsub float %54, %55 %57 = call float @llvm.maxnum.f32(float %56, float 0.000000e+00) %58 = call float @llvm.sqrt.f32(float %57) %59 = fmul float %24, -2.000000e+00 %60 = fadd float %59, %29 %61 = bitcast float %60 to i32 %62 = bitcast float %30 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fmul float %69, 0x3FEFE02000000000 %73 = fmul float %70, 0x3F6FE01F80000000 %74 = fadd float %72, %73 %75 = fsub float %74, %43 %76 = fmul float %71, 2.550000e+02 %77 = fmul float %76, 6.250000e-02 %78 = call float @floor(float %77) %79 = fmul float %78, 1.600000e+01 %80 = fsub float %76, %79 %81 = fadd float %78, -8.000000e+00 %82 = fadd float %80, -8.000000e+00 %83 = fmul float %81, 0x3FC24924A0000000 %84 = fmul float %82, 0x3FC24924A0000000 %85 = fmul float %83, %83 %86 = fsub float 1.000000e+00, %85 %87 = fmul float %84, %84 %88 = fsub float %86, %87 %89 = call float @llvm.maxnum.f32(float %88, float 0.000000e+00) %90 = call float @llvm.sqrt.f32(float %89) %91 = fsub float -0.000000e+00, %75 %92 = fcmp olt float %75, 0.000000e+00 %. = select i1 %92, float %91, float %75 %93 = fmul float %74, 0x3FA1EB8520000000 %94 = fdiv float 1.000000e+00, %93 %95 = fmul float %., %94 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fsub float 1.000000e+00, %96 %98 = fmul float %83, %51 %99 = fmul float %84, %52 %100 = fadd float %99, %98 %101 = fmul float %90, %58 %102 = fadd float %100, %101 %103 = fadd float %102, 0xBFE6666660000000 %104 = fmul float %103, 0x400AAAAAA0000000 %105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %106 = fmul float %97, %105 %107 = fmul float %106, %68 %108 = fsub float %29, %24 %109 = bitcast float %108 to i32 %110 = bitcast float %30 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %26 to <32 x i8> %114 = bitcast <4 x i32> %28 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = extractelement <4 x float> %115, i32 3 %120 = fmul float %117, 0x3FEFE02000000000 %121 = fmul float %118, 0x3F6FE01F80000000 %122 = fadd float %120, %121 %123 = fsub float %122, %43 %124 = fmul float %119, 2.550000e+02 %125 = fmul float %124, 6.250000e-02 %126 = call float @floor(float %125) %127 = fmul float %126, 1.600000e+01 %128 = fsub float %124, %127 %129 = fadd float %126, -8.000000e+00 %130 = fadd float %128, -8.000000e+00 %131 = fmul float %129, 0x3FC24924A0000000 %132 = fmul float %130, 0x3FC24924A0000000 %133 = fmul float %131, %131 %134 = fsub float 1.000000e+00, %133 %135 = fmul float %132, %132 %136 = fsub float %134, %135 %137 = call float @llvm.maxnum.f32(float %136, float 0.000000e+00) %138 = call float @llvm.sqrt.f32(float %137) %139 = fsub float -0.000000e+00, %123 %140 = fcmp olt float %123, 0.000000e+00 %temp36.0 = select i1 %140, float %139, float %123 %141 = fmul float %122, 0x3FA1EB8520000000 %142 = fdiv float 1.000000e+00, %141 %143 = fmul float %temp36.0, %142 %144 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %145 = fsub float 1.000000e+00, %144 %146 = fmul float %131, %51 %147 = fmul float %132, %52 %148 = fadd float %147, %146 %149 = fmul float %138, %58 %150 = fadd float %148, %149 %151 = fadd float %150, 0xBFE6666660000000 %152 = fmul float %151, 0x400AAAAAA0000000 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fmul float %145, %153 %155 = fadd float %106, %154 %156 = fadd float %155, 1.000000e+00 %157 = fmul float %154, %116 %158 = fadd float %157, %107 %159 = bitcast float %29 to i32 %160 = bitcast float %30 to i32 %161 = insertelement <2 x i32> undef, i32 %159, i32 0 %162 = insertelement <2 x i32> %161, i32 %160, i32 1 %163 = bitcast <8 x i32> %26 to <32 x i8> %164 = bitcast <4 x i32> %28 to <16 x i8> %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = fadd float %158, %166 %168 = fadd float %24, %29 %169 = fadd float %30, 0.000000e+00 %170 = bitcast float %168 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %26 to <32 x i8> %175 = bitcast <4 x i32> %28 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = extractelement <4 x float> %176, i32 3 %181 = fmul float %178, 0x3FEFE02000000000 %182 = fmul float %179, 0x3F6FE01F80000000 %183 = fadd float %181, %182 %184 = fsub float %183, %43 %185 = fmul float %180, 2.550000e+02 %186 = fmul float %185, 6.250000e-02 %187 = call float @floor(float %186) %188 = fmul float %187, 1.600000e+01 %189 = fsub float %185, %188 %190 = fadd float %187, -8.000000e+00 %191 = fadd float %189, -8.000000e+00 %192 = fmul float %190, 0x3FC24924A0000000 %193 = fmul float %191, 0x3FC24924A0000000 %194 = fmul float %192, %192 %195 = fsub float 1.000000e+00, %194 %196 = fmul float %193, %193 %197 = fsub float %195, %196 %198 = call float @llvm.maxnum.f32(float %197, float 0.000000e+00) %199 = call float @llvm.sqrt.f32(float %198) %200 = fsub float -0.000000e+00, %184 %201 = fcmp olt float %184, 0.000000e+00 %.57 = select i1 %201, float %200, float %184 %202 = fmul float %183, 0x3FA1EB8520000000 %203 = fdiv float 1.000000e+00, %202 %204 = fmul float %.57, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fsub float 1.000000e+00, %205 %207 = fmul float %192, %51 %208 = fmul float %193, %52 %209 = fadd float %208, %207 %210 = fmul float %199, %58 %211 = fadd float %209, %210 %212 = fadd float %211, 0xBFE6666660000000 %213 = fmul float %212, 0x400AAAAAA0000000 %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %206, %214 %216 = fadd float %156, %215 %217 = fmul float %215, %177 %218 = fadd float %217, %167 %219 = fmul float %24, 2.000000e+00 %220 = fadd float %219, %29 %221 = fadd float %30, 0.000000e+00 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %26 to <32 x i8> %227 = bitcast <4 x i32> %28 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fmul float %230, 0x3FEFE02000000000 %234 = fmul float %231, 0x3F6FE01F80000000 %235 = fadd float %233, %234 %236 = fsub float %235, %43 %237 = fmul float %232, 2.550000e+02 %238 = fmul float %237, 6.250000e-02 %239 = call float @floor(float %238) %240 = fmul float %239, 1.600000e+01 %241 = fsub float %237, %240 %242 = fadd float %239, -8.000000e+00 %243 = fadd float %241, -8.000000e+00 %244 = fmul float %242, 0x3FC24924A0000000 %245 = fmul float %243, 0x3FC24924A0000000 %246 = fmul float %244, %244 %247 = fsub float 1.000000e+00, %246 %248 = fmul float %245, %245 %249 = fsub float %247, %248 %250 = call float @llvm.maxnum.f32(float %249, float 0.000000e+00) %251 = call float @llvm.sqrt.f32(float %250) %252 = fsub float -0.000000e+00, %236 %253 = fcmp olt float %236, 0.000000e+00 %temp28.1 = select i1 %253, float %252, float %236 %254 = fmul float %235, 0x3FA1EB8520000000 %255 = fdiv float 1.000000e+00, %254 %256 = fmul float %temp28.1, %255 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fsub float 1.000000e+00, %257 %259 = fmul float %244, %51 %260 = fmul float %245, %52 %261 = fadd float %260, %259 %262 = fmul float %251, %58 %263 = fadd float %261, %262 %264 = fadd float %263, 0xBFE6666660000000 %265 = fmul float %264, 0x400AAAAAA0000000 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %258, %266 %268 = fmul float %267, %229 %269 = fadd float %268, %218 %270 = fadd float %216, %267 %271 = fdiv float 1.000000e+00, %270 %272 = fmul float %269, %271 %273 = call i32 @llvm.SI.packf16(float %272, float %38) %274 = bitcast i32 %273 to float %275 = call i32 @llvm.SI.packf16(float %39, float %40) %276 = bitcast i32 %275 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %274, float %276, float %274, float %276) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x0 ; C2021500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_mov_b32_e32 v0, 0x3b7f00fc ; 7E0002FF 3B7F00FC v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v1, 0x3f7f0100 ; 7E0202FF 3F7F0100 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v8, v0, v6 ; 10100D00 v_mad_f32 v9, -2.0, s4, v2 ; D2820009 040808F5 v_mac_f32_e32 v8, v1, v5 ; 3E100B01 v_mov_b32_e32 v10, v3 ; 7E140303 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[0:3] ; F0800F00 00030909 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, v0, v11 ; 101A1700 v_mac_f32_e32 v13, v1, v10 ; 3E1A1501 v_mad_f32 v14, v11, v0, -v8 ; D282000E 8422010B v_mac_f32_e32 v14, v1, v10 ; 3E1C1501 v_mov_b32_e32 v15, 0x3d0f5c29 ; 7E1E02FF 3D0F5C29 v_mul_f32_e32 v13, v15, v13 ; 101A1B0F v_rcp_f32_e32 v13, v13 ; 7E1A550D v_mad_f32 v11, -v11, v0, v8 ; D282000B 2422010B v_mad_f32 v10, -v10, v1, v11 ; D282000A 242E030A v_cmp_gt_f32_e32 vcc, 0, v14 ; 7C081C80 v_cndmask_b32_e32 v10, v14, v10 ; 0014150E v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mov_b32_e32 v11, 0x417f0000 ; 7E1602FF 417F0000 v_mul_f32_e32 v13, v11, v7 ; 101A0F0B v_floor_f32_e32 v13, v13 ; 7E1A490D v_mov_b32_e32 v14, 0xc1800000 ; 7E1C02FF C1800000 v_mul_f32_e32 v16, v14, v13 ; 10201B0E v_mov_b32_e32 v17, 0x437f0000 ; 7E2202FF 437F0000 v_mac_f32_e32 v16, v17, v7 ; 3E200F11 v_mov_b32_e32 v18, 0xc1000000 ; 7E2402FF C1000000 v_add_f32_e32 v13, v18, v13 ; 061A1B12 v_add_f32_e32 v16, v18, v16 ; 06202112 v_mov_b32_e32 v19, 0x3e124925 ; 7E2602FF 3E124925 v_mul_f32_e32 v13, v19, v13 ; 101A1B13 v_mul_f32_e32 v16, v19, v16 ; 10202113 v_mul_f32_e32 v20, v11, v12 ; 1028190B v_floor_f32_e32 v20, v20 ; 7E284914 v_mul_f32_e32 v21, v14, v20 ; 102A290E v_mac_f32_e32 v21, v17, v12 ; 3E2A1911 v_add_f32_e32 v12, v18, v20 ; 06182912 v_add_f32_e32 v20, v18, v21 ; 06282B12 v_mul_f32_e32 v12, v19, v12 ; 10181913 v_mul_f32_e32 v20, v19, v20 ; 10282913 v_mad_f32 v21, -v12, v12, 1.0 ; D2820015 23CA190C v_mad_f32 v21, -v20, v20, v21 ; D2820015 24562914 v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mac_f32_e32 v12, v16, v20 ; 3E182910 v_mad_f32 v20, -v13, v13, 1.0 ; D2820014 23CA1B0D v_mad_f32 v20, -v16, v16, v20 ; D2820014 24522110 v_max_f32_e32 v20, 0, v20 ; 20282880 v_sqrt_f32_e32 v20, v20 ; 7E286714 v_max_f32_e32 v21, 0, v21 ; 202A2A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mac_f32_e32 v12, v20, v21 ; 3E182B14 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_mov_b32_e32 v21, 0xbf333333 ; 7E2A02FF BF333333 v_add_f32_e32 v12, v21, v12 ; 06181915 v_mov_b32_e32 v22, 0x40555555 ; 7E2C02FF 40555555 v_mul_f32_e32 v12, v22, v12 ; 10181916 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v23, v12, v10 ; 102E150C v_subrev_f32_e32 v24, s4, v2 ; 0A300404 v_mac_f32_e32 v4, v9, v23 ; 3E082F09 v_mov_b32_e32 v25, v3 ; 7E320303 image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[12:19], s[0:3] ; F0800F00 00031718 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v0, v25 ; 10123300 v_mac_f32_e32 v9, v1, v24 ; 3E123101 v_mad_f32 v27, v25, v0, -v8 ; D282001B 84220119 v_mac_f32_e32 v27, v1, v24 ; 3E363101 v_mul_f32_e32 v9, v15, v9 ; 1012130F v_rcp_f32_e32 v9, v9 ; 7E125509 v_mad_f32 v25, -v25, v0, v8 ; D2820019 24220119 v_mad_f32 v24, -v24, v1, v25 ; D2820018 24660318 v_cmp_gt_f32_e32 vcc, 0, v27 ; 7C083680 v_cndmask_b32_e32 v24, v27, v24 ; 0030311B v_mul_f32_e32 v9, v9, v24 ; 10123109 v_mul_f32_e32 v24, v11, v26 ; 1030350B v_floor_f32_e32 v24, v24 ; 7E304918 v_mul_f32_e32 v25, v14, v24 ; 1032310E v_mac_f32_e32 v25, v17, v26 ; 3E323511 v_add_f32_e32 v24, v18, v24 ; 06303112 v_add_f32_e32 v25, v18, v25 ; 06323312 v_mul_f32_e32 v24, v19, v24 ; 10303113 v_mul_f32_e32 v25, v19, v25 ; 10323313 v_mad_f32 v26, -v24, v24, 1.0 ; D282001A 23CA3118 v_mad_f32 v26, -v25, v25, v26 ; D282001A 246A3319 v_mul_f32_e32 v24, v13, v24 ; 1030310D v_mac_f32_e32 v24, v16, v25 ; 3E303310 v_max_f32_e32 v25, 0, v26 ; 20323480 v_sqrt_f32_e32 v25, v25 ; 7E326719 v_mac_f32_e32 v24, v20, v25 ; 3E303314 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_add_f32_e32 v24, v21, v24 ; 06303115 v_mul_f32_e32 v24, v22, v24 ; 10303116 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_mul_f32_e32 v25, v24, v9 ; 10321318 v_mac_f32_e32 v4, v23, v25 ; 3E083317 v_mad_f32 v9, v9, v24, 1.0 ; D2820009 03CA3109 v_add_f32_e32 v23, s4, v2 ; 062E0404 v_mac_f32_e32 v9, v12, v10 ; 3E12150C v_mov_b32_e32 v24, v3 ; 7E300303 image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[12:19], s[0:3] ; F0800F00 00031717 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v11, v26 ; 1014350B v_floor_f32_e32 v10, v10 ; 7E14490A v_mul_f32_e32 v12, v14, v10 ; 1018150E v_mac_f32_e32 v12, v17, v26 ; 3E183511 v_add_f32_e32 v10, v18, v10 ; 06141512 v_add_f32_e32 v12, v18, v12 ; 06181912 v_mul_f32_e32 v10, v19, v10 ; 10141513 v_mul_f32_e32 v12, v19, v12 ; 10181913 v_mad_f32 v26, -v10, v10, 1.0 ; D282001A 23CA150A v_mad_f32 v26, -v12, v12, v26 ; D282001A 246A190C v_mul_f32_e32 v10, v13, v10 ; 1014150D v_mac_f32_e32 v10, v16, v12 ; 3E141910 v_max_f32_e32 v12, 0, v26 ; 20183480 v_sqrt_f32_e32 v12, v12 ; 7E18670C v_mac_f32_e32 v10, v20, v12 ; 3E141914 v_mac_f32_e64 v2, 2.0, s4 ; D23E0002 000008F4 image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00031A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v11, v29 ; 10043B0B v_floor_f32_e32 v2, v2 ; 7E044902 v_mul_f32_e32 v3, v14, v2 ; 1006050E v_mac_f32_e32 v3, v17, v29 ; 3E063B11 v_add_f32_e32 v2, v18, v2 ; 06040512 v_add_f32_e32 v3, v18, v3 ; 06060712 v_mul_f32_e32 v2, v19, v2 ; 10040513 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mul_f32_e32 v11, v13, v2 ; 1016050D v_mac_f32_e32 v11, v16, v3 ; 3E160710 v_mad_f32 v2, -v2, v2, 1.0 ; D2820002 23CA0502 v_mad_f32 v2, -v3, v3, v2 ; D2820002 240A0703 v_max_f32_e32 v2, 0, v2 ; 20040480 v_sqrt_f32_e32 v2, v2 ; 7E046702 v_mac_f32_e32 v11, v20, v2 ; 3E160514 v_mul_f32_e32 v2, v0, v25 ; 10043300 v_mac_f32_e32 v2, v1, v24 ; 3E043101 v_mad_f32 v3, v25, v0, -v8 ; D2820003 84220119 v_mac_f32_e32 v3, v1, v24 ; 3E063101 v_mul_f32_e32 v2, v15, v2 ; 1004050F v_rcp_f32_e32 v2, v2 ; 7E045502 v_mad_f32 v12, -v25, v0, v8 ; D282000C 24220119 v_mad_f32 v12, -v24, v1, v12 ; D282000C 24320318 v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e32 v3, v3, v12 ; 00061903 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mad_f32 v3, v28, v0, -v8 ; D2820003 8422011C v_mac_f32_e32 v3, v1, v27 ; 3E063701 v_mad_f32 v8, -v28, v0, v8 ; D2820008 2422011C v_mad_f32 v8, -v27, v1, v8 ; D2820008 2422031B v_cmp_gt_f32_e32 vcc, 0, v3 ; 7C080680 v_cndmask_b32_e32 v3, v3, v8 ; 00061103 v_mul_f32_e32 v0, v0, v28 ; 10003900 v_mac_f32_e32 v0, v1, v27 ; 3E003701 v_mul_f32_e32 v0, v15, v0 ; 1000010F v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e32 v2, v21, v10 ; 06041515 v_mul_f32_e32 v2, v22, v2 ; 10040516 v_rcp_f32_e32 v0, v0 ; 7E005500 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v9, v2, v1 ; 3E120302 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_mul_f32_e32 v0, v0, v3 ; 10000700 v_add_f32_e32 v2, v21, v11 ; 06041715 v_mul_f32_e32 v2, v22, v2 ; 10040516 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v9, v2, v0 ; 3E120102 v_rcp_f32_e32 v3, v9 ; 7E065509 v_mac_f32_e32 v4, v23, v1 ; 3E080317 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_mac_f32_e32 v4, v26, v0 ; 3E08011A v_mul_f32_e32 v0, v3, v4 ; 10000903 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 32 Code Size: 972 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 0.9961, 0.0039, 255.0000, 0.0625} IMM[1] FLT32 { 16.0000, -8.0000, 0.1429, 0.0000} IMM[2] FLT32 { 1.0000, -2.0000, 0.0350, -0.7000} IMM[3] UINT32 {0, 0, 0, 0} IMM[4] FLT32 { 3.3333, 2.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yzw, TEMP[0], SAMP[0], 2D 2: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[0].xyyy 3: MUL TEMP[2].x, TEMP[0].wwww, IMM[0].zzzz 4: MUL TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww 5: FLR TEMP[3].x, TEMP[3].xxxx 6: MOV TEMP[4].x, TEMP[3].xxxx 7: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx 8: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[3].xxxx 9: MOV TEMP[4].y, TEMP[2].xxxx 10: ADD TEMP[2].xy, TEMP[4].xyyy, IMM[1].yyyy 11: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].zzzz 12: MOV TEMP[3].x, TEMP[2].xxxx 13: MOV TEMP[3].y, TEMP[2].yyyy 14: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 15: ADD TEMP[4].x, IMM[2].xxxx, -TEMP[4].xxxx 16: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy 17: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx 18: MAX TEMP[2].x, IMM[1].wwww, TEMP[2].xxxx 19: SQRT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[3].z, TEMP[2].xxxx 21: MOV TEMP[2].x, IMM[1].wwww 22: MOV TEMP[2].y, CONST[1][0].yyyy 23: MAD TEMP[4].xy, IMM[2].yyyy, TEMP[2].xyyy, IN[0].xyyy 24: MOV TEMP[4].xy, TEMP[4].xyyy 25: TEX TEMP[4], TEMP[4], SAMP[0], 2D 26: DP2 TEMP[5].x, TEMP[4].yzzz, IMM[0].xyyy 27: ADD TEMP[6].x, TEMP[5].xxxx, -TEMP[1].xxxx 28: MUL TEMP[7].x, IMM[0].zzzz, TEMP[4].wwww 29: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MOV TEMP[9].x, TEMP[8].xxxx 32: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 33: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 34: MOV TEMP[9].y, TEMP[7].xxxx 35: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 36: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 37: MOV TEMP[8].x, TEMP[7].xxxx 38: MOV TEMP[8].y, TEMP[7].yyyy 39: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 40: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 41: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 42: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 43: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 44: SQRT TEMP[7].x, TEMP[7].xxxx 45: MOV TEMP[8].z, TEMP[7].xxxx 46: MOV TEMP[7].x, -TEMP[6].xxxx 47: FSLT TEMP[9].x, TEMP[6].xxxx, IMM[1].wwww 48: UIF TEMP[9].xxxx :0 49: MOV TEMP[7].x, TEMP[7].xxxx 50: ELSE :0 51: MOV TEMP[7].x, TEMP[6].xxxx 52: ENDIF 53: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx 56: MOV_SAT TEMP[5].x, TEMP[5].xxxx 57: ADD TEMP[5].x, IMM[2].xxxx, -TEMP[5].xxxx 58: DP3 TEMP[6].x, TEMP[8].xyzz, TEMP[3].xyzz 59: ADD TEMP[6].x, TEMP[6].xxxx, IMM[2].wwww 60: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].xxxx 61: MOV_SAT TEMP[6].x, TEMP[6].xxxx 62: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 63: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 64: ADD TEMP[6].xy, -TEMP[2].xyyy, IN[0].xyyy 65: MOV TEMP[6].xy, TEMP[6].xyyy 66: TEX TEMP[6], TEMP[6], SAMP[0], 2D 67: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 68: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 69: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 70: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 71: FLR TEMP[10].x, TEMP[10].xxxx 72: MOV TEMP[11].x, TEMP[10].xxxx 73: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 74: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 75: MOV TEMP[11].y, TEMP[9].xxxx 76: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 77: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 78: MOV TEMP[10].x, TEMP[9].xxxx 79: MOV TEMP[10].y, TEMP[9].yyyy 80: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 81: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 82: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 83: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 84: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 85: SQRT TEMP[9].x, TEMP[9].xxxx 86: MOV TEMP[10].z, TEMP[9].xxxx 87: MOV TEMP[9].x, -TEMP[8].xxxx 88: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[9].x, TEMP[9].xxxx 91: ELSE :0 92: MOV TEMP[9].x, TEMP[8].xxxx 93: ENDIF 94: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 95: RCP TEMP[7].x, TEMP[7].xxxx 96: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 97: MOV_SAT TEMP[7].x, TEMP[7].xxxx 98: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 99: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 100: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 101: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 102: MOV_SAT TEMP[8].x, TEMP[8].xxxx 103: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 104: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 105: ADD TEMP[5].x, IMM[2].xxxx, TEMP[5].xxxx 106: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 107: MOV TEMP[6].xy, IN[0].xyyy 108: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 109: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 110: ADD TEMP[6].xy, TEMP[2].xyyy, IN[0].xyyy 111: MOV TEMP[6].xy, TEMP[6].xyyy 112: TEX TEMP[6], TEMP[6], SAMP[0], 2D 113: DP2 TEMP[7].x, TEMP[6].yzzz, IMM[0].xyyy 114: ADD TEMP[8].x, TEMP[7].xxxx, -TEMP[1].xxxx 115: MUL TEMP[9].x, IMM[0].zzzz, TEMP[6].wwww 116: MUL TEMP[10].x, IMM[0].wwww, TEMP[9].xxxx 117: FLR TEMP[10].x, TEMP[10].xxxx 118: MOV TEMP[11].x, TEMP[10].xxxx 119: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx 120: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[10].xxxx 121: MOV TEMP[11].y, TEMP[9].xxxx 122: ADD TEMP[9].xy, IMM[1].yyyy, TEMP[11].xyyy 123: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[1].zzzz 124: MOV TEMP[10].x, TEMP[9].xxxx 125: MOV TEMP[10].y, TEMP[9].yyyy 126: MUL TEMP[11].x, TEMP[9].xxxx, TEMP[9].xxxx 127: ADD TEMP[11].x, IMM[2].xxxx, -TEMP[11].xxxx 128: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 129: ADD TEMP[9].x, TEMP[11].xxxx, -TEMP[9].xxxx 130: MAX TEMP[9].x, IMM[1].wwww, TEMP[9].xxxx 131: SQRT TEMP[9].x, TEMP[9].xxxx 132: MOV TEMP[10].z, TEMP[9].xxxx 133: MOV TEMP[9].x, -TEMP[8].xxxx 134: FSLT TEMP[11].x, TEMP[8].xxxx, IMM[1].wwww 135: UIF TEMP[11].xxxx :0 136: MOV TEMP[9].x, TEMP[9].xxxx 137: ELSE :0 138: MOV TEMP[9].x, TEMP[8].xxxx 139: ENDIF 140: MUL TEMP[7].x, IMM[2].zzzz, TEMP[7].xxxx 141: RCP TEMP[7].x, TEMP[7].xxxx 142: MUL TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx 143: MOV_SAT TEMP[7].x, TEMP[7].xxxx 144: ADD TEMP[7].x, IMM[2].xxxx, -TEMP[7].xxxx 145: DP3 TEMP[8].x, TEMP[10].xyzz, TEMP[3].xyzz 146: ADD TEMP[8].x, IMM[2].wwww, TEMP[8].xxxx 147: MUL TEMP[8].x, TEMP[8].xxxx, IMM[4].xxxx 148: MOV_SAT TEMP[8].x, TEMP[8].xxxx 149: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx 150: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 151: MAD TEMP[4].x, TEMP[7].xxxx, TEMP[6].xxxx, TEMP[4].xxxx 152: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[4].yyyy, IN[0].xyyy 153: MOV TEMP[2].xy, TEMP[2].xyyy 154: TEX TEMP[2], TEMP[2], SAMP[0], 2D 155: DP2 TEMP[6].x, TEMP[2].yzzz, IMM[0].xyyy 156: ADD TEMP[1].x, TEMP[6].xxxx, -TEMP[1].xxxx 157: MUL TEMP[7].x, IMM[0].zzzz, TEMP[2].wwww 158: MUL TEMP[8].x, IMM[0].wwww, TEMP[7].xxxx 159: FLR TEMP[8].x, TEMP[8].xxxx 160: MOV TEMP[9].x, TEMP[8].xxxx 161: MUL TEMP[8].x, IMM[1].xxxx, TEMP[8].xxxx 162: ADD TEMP[7].x, TEMP[7].xxxx, -TEMP[8].xxxx 163: MOV TEMP[9].y, TEMP[7].xxxx 164: ADD TEMP[7].xy, IMM[1].yyyy, TEMP[9].xyyy 165: MUL TEMP[7].xy, TEMP[7].xyyy, IMM[1].zzzz 166: MOV TEMP[8].x, TEMP[7].xxxx 167: MOV TEMP[8].y, TEMP[7].yyyy 168: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx 169: ADD TEMP[9].x, IMM[2].xxxx, -TEMP[9].xxxx 170: MUL TEMP[7].x, TEMP[7].yyyy, TEMP[7].yyyy 171: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx 172: MAX TEMP[7].x, IMM[1].wwww, TEMP[7].xxxx 173: SQRT TEMP[7].x, TEMP[7].xxxx 174: MOV TEMP[8].z, TEMP[7].xxxx 175: MOV TEMP[7].x, -TEMP[1].xxxx 176: FSLT TEMP[9].x, TEMP[1].xxxx, IMM[1].wwww 177: UIF TEMP[9].xxxx :0 178: MOV TEMP[7].x, TEMP[7].xxxx 179: ELSE :0 180: MOV TEMP[7].x, TEMP[1].xxxx 181: ENDIF 182: MUL TEMP[1].x, IMM[2].zzzz, TEMP[6].xxxx 183: RCP TEMP[1].x, TEMP[1].xxxx 184: MUL TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx 185: MOV_SAT TEMP[1].x, TEMP[1].xxxx 186: ADD TEMP[1].x, IMM[2].xxxx, -TEMP[1].xxxx 187: DP3 TEMP[3].x, TEMP[8].xyzz, TEMP[3].xyzz 188: ADD TEMP[3].x, IMM[2].wwww, TEMP[3].xxxx 189: MUL TEMP[3].x, TEMP[3].xxxx, IMM[4].xxxx 190: MOV_SAT TEMP[3].x, TEMP[3].xxxx 191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 192: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx, TEMP[4].xxxx 193: ADD TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx 194: RCP TEMP[1].x, TEMP[1].xxxx 195: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx 196: MOV TEMP[1].y, TEMP[0].yyyy 197: MOV TEMP[1].z, TEMP[0].zzzz 198: MOV TEMP[1].w, TEMP[0].wwww 199: MOV OUT[0], TEMP[1] 200: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !tbaa !0 %27 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %28 = load <4 x i32>, <4 x i32> addrspace(2)* %27, align 16, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 1 %39 = extractelement <4 x float> %37, i32 2 %40 = extractelement <4 x float> %37, i32 3 %41 = fmul float %38, 0x3FEFE02000000000 %42 = fmul float %39, 0x3F6FE01F80000000 %43 = fadd float %41, %42 %44 = fmul float %40, 2.550000e+02 %45 = fmul float %44, 6.250000e-02 %46 = call float @floor(float %45) %47 = fmul float %46, 1.600000e+01 %48 = fsub float %44, %47 %49 = fadd float %46, -8.000000e+00 %50 = fadd float %48, -8.000000e+00 %51 = fmul float %49, 0x3FC24924A0000000 %52 = fmul float %50, 0x3FC24924A0000000 %53 = fmul float %51, %51 %54 = fsub float 1.000000e+00, %53 %55 = fmul float %52, %52 %56 = fsub float %54, %55 %57 = call float @llvm.maxnum.f32(float %56, float 0.000000e+00) %58 = call float @llvm.sqrt.f32(float %57) %59 = fmul float %24, -2.000000e+00 %60 = fadd float %59, %30 %61 = bitcast float %29 to i32 %62 = bitcast float %60 to i32 %63 = insertelement <2 x i32> undef, i32 %61, i32 0 %64 = insertelement <2 x i32> %63, i32 %62, i32 1 %65 = bitcast <8 x i32> %26 to <32 x i8> %66 = bitcast <4 x i32> %28 to <16 x i8> %67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %65, <16 x i8> %66, i32 2) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = extractelement <4 x float> %67, i32 2 %71 = extractelement <4 x float> %67, i32 3 %72 = fmul float %69, 0x3FEFE02000000000 %73 = fmul float %70, 0x3F6FE01F80000000 %74 = fadd float %72, %73 %75 = fsub float %74, %43 %76 = fmul float %71, 2.550000e+02 %77 = fmul float %76, 6.250000e-02 %78 = call float @floor(float %77) %79 = fmul float %78, 1.600000e+01 %80 = fsub float %76, %79 %81 = fadd float %78, -8.000000e+00 %82 = fadd float %80, -8.000000e+00 %83 = fmul float %81, 0x3FC24924A0000000 %84 = fmul float %82, 0x3FC24924A0000000 %85 = fmul float %83, %83 %86 = fsub float 1.000000e+00, %85 %87 = fmul float %84, %84 %88 = fsub float %86, %87 %89 = call float @llvm.maxnum.f32(float %88, float 0.000000e+00) %90 = call float @llvm.sqrt.f32(float %89) %91 = fsub float -0.000000e+00, %75 %92 = fcmp olt float %75, 0.000000e+00 %. = select i1 %92, float %91, float %75 %93 = fmul float %74, 0x3FA1EB8520000000 %94 = fdiv float 1.000000e+00, %93 %95 = fmul float %., %94 %96 = call float @llvm.AMDIL.clamp.(float %95, float 0.000000e+00, float 1.000000e+00) %97 = fsub float 1.000000e+00, %96 %98 = fmul float %83, %51 %99 = fmul float %84, %52 %100 = fadd float %99, %98 %101 = fmul float %90, %58 %102 = fadd float %100, %101 %103 = fadd float %102, 0xBFE6666660000000 %104 = fmul float %103, 0x400AAAAAA0000000 %105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00) %106 = fmul float %97, %105 %107 = fmul float %106, %68 %108 = fsub float %30, %24 %109 = bitcast float %29 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %26 to <32 x i8> %114 = bitcast <4 x i32> %28 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = extractelement <4 x float> %115, i32 1 %118 = extractelement <4 x float> %115, i32 2 %119 = extractelement <4 x float> %115, i32 3 %120 = fmul float %117, 0x3FEFE02000000000 %121 = fmul float %118, 0x3F6FE01F80000000 %122 = fadd float %120, %121 %123 = fsub float %122, %43 %124 = fmul float %119, 2.550000e+02 %125 = fmul float %124, 6.250000e-02 %126 = call float @floor(float %125) %127 = fmul float %126, 1.600000e+01 %128 = fsub float %124, %127 %129 = fadd float %126, -8.000000e+00 %130 = fadd float %128, -8.000000e+00 %131 = fmul float %129, 0x3FC24924A0000000 %132 = fmul float %130, 0x3FC24924A0000000 %133 = fmul float %131, %131 %134 = fsub float 1.000000e+00, %133 %135 = fmul float %132, %132 %136 = fsub float %134, %135 %137 = call float @llvm.maxnum.f32(float %136, float 0.000000e+00) %138 = call float @llvm.sqrt.f32(float %137) %139 = fsub float -0.000000e+00, %123 %140 = fcmp olt float %123, 0.000000e+00 %temp36.0 = select i1 %140, float %139, float %123 %141 = fmul float %122, 0x3FA1EB8520000000 %142 = fdiv float 1.000000e+00, %141 %143 = fmul float %temp36.0, %142 %144 = call float @llvm.AMDIL.clamp.(float %143, float 0.000000e+00, float 1.000000e+00) %145 = fsub float 1.000000e+00, %144 %146 = fmul float %131, %51 %147 = fmul float %132, %52 %148 = fadd float %147, %146 %149 = fmul float %138, %58 %150 = fadd float %148, %149 %151 = fadd float %150, 0xBFE6666660000000 %152 = fmul float %151, 0x400AAAAAA0000000 %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00) %154 = fmul float %145, %153 %155 = fadd float %106, %154 %156 = fadd float %155, 1.000000e+00 %157 = fmul float %154, %116 %158 = fadd float %157, %107 %159 = bitcast float %29 to i32 %160 = bitcast float %30 to i32 %161 = insertelement <2 x i32> undef, i32 %159, i32 0 %162 = insertelement <2 x i32> %161, i32 %160, i32 1 %163 = bitcast <8 x i32> %26 to <32 x i8> %164 = bitcast <4 x i32> %28 to <16 x i8> %165 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) %166 = extractelement <4 x float> %165, i32 0 %167 = fadd float %158, %166 %168 = fadd float %29, 0.000000e+00 %169 = fadd float %24, %30 %170 = bitcast float %168 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = bitcast <8 x i32> %26 to <32 x i8> %175 = bitcast <4 x i32> %28 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = extractelement <4 x float> %176, i32 3 %181 = fmul float %178, 0x3FEFE02000000000 %182 = fmul float %179, 0x3F6FE01F80000000 %183 = fadd float %181, %182 %184 = fsub float %183, %43 %185 = fmul float %180, 2.550000e+02 %186 = fmul float %185, 6.250000e-02 %187 = call float @floor(float %186) %188 = fmul float %187, 1.600000e+01 %189 = fsub float %185, %188 %190 = fadd float %187, -8.000000e+00 %191 = fadd float %189, -8.000000e+00 %192 = fmul float %190, 0x3FC24924A0000000 %193 = fmul float %191, 0x3FC24924A0000000 %194 = fmul float %192, %192 %195 = fsub float 1.000000e+00, %194 %196 = fmul float %193, %193 %197 = fsub float %195, %196 %198 = call float @llvm.maxnum.f32(float %197, float 0.000000e+00) %199 = call float @llvm.sqrt.f32(float %198) %200 = fsub float -0.000000e+00, %184 %201 = fcmp olt float %184, 0.000000e+00 %.57 = select i1 %201, float %200, float %184 %202 = fmul float %183, 0x3FA1EB8520000000 %203 = fdiv float 1.000000e+00, %202 %204 = fmul float %.57, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fsub float 1.000000e+00, %205 %207 = fmul float %192, %51 %208 = fmul float %193, %52 %209 = fadd float %208, %207 %210 = fmul float %199, %58 %211 = fadd float %209, %210 %212 = fadd float %211, 0xBFE6666660000000 %213 = fmul float %212, 0x400AAAAAA0000000 %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00) %215 = fmul float %206, %214 %216 = fadd float %156, %215 %217 = fmul float %215, %177 %218 = fadd float %217, %167 %219 = fadd float %29, 0.000000e+00 %220 = fmul float %24, 2.000000e+00 %221 = fadd float %220, %30 %222 = bitcast float %219 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %26 to <32 x i8> %227 = bitcast <4 x i32> %28 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fmul float %230, 0x3FEFE02000000000 %234 = fmul float %231, 0x3F6FE01F80000000 %235 = fadd float %233, %234 %236 = fsub float %235, %43 %237 = fmul float %232, 2.550000e+02 %238 = fmul float %237, 6.250000e-02 %239 = call float @floor(float %238) %240 = fmul float %239, 1.600000e+01 %241 = fsub float %237, %240 %242 = fadd float %239, -8.000000e+00 %243 = fadd float %241, -8.000000e+00 %244 = fmul float %242, 0x3FC24924A0000000 %245 = fmul float %243, 0x3FC24924A0000000 %246 = fmul float %244, %244 %247 = fsub float 1.000000e+00, %246 %248 = fmul float %245, %245 %249 = fsub float %247, %248 %250 = call float @llvm.maxnum.f32(float %249, float 0.000000e+00) %251 = call float @llvm.sqrt.f32(float %250) %252 = fsub float -0.000000e+00, %236 %253 = fcmp olt float %236, 0.000000e+00 %temp28.1 = select i1 %253, float %252, float %236 %254 = fmul float %235, 0x3FA1EB8520000000 %255 = fdiv float 1.000000e+00, %254 %256 = fmul float %temp28.1, %255 %257 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %258 = fsub float 1.000000e+00, %257 %259 = fmul float %244, %51 %260 = fmul float %245, %52 %261 = fadd float %260, %259 %262 = fmul float %251, %58 %263 = fadd float %261, %262 %264 = fadd float %263, 0xBFE6666660000000 %265 = fmul float %264, 0x400AAAAAA0000000 %266 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %258, %266 %268 = fmul float %267, %229 %269 = fadd float %268, %218 %270 = fadd float %216, %267 %271 = fdiv float 1.000000e+00, %270 %272 = fmul float %269, %271 %273 = call i32 @llvm.SI.packf16(float %272, float %38) %274 = bitcast i32 %273 to float %275 = call i32 @llvm.SI.packf16(float %39, float %40) %276 = bitcast i32 %275 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %274, float %276, float %274, float %276) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v0, v2 ; 7E000302 v_mov_b32_e32 v1, v3 ; 7E020303 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, -2.0, s8, v3 ; D2820001 040C10F5 v_mov_b32_e32 v4, 0x3b7f00fc ; 7E0802FF 3B7F00FC v_mov_b32_e32 v5, 0x3f7f0100 ; 7E0A02FF 3F7F0100 image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030602 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v4, v8 ; 10141104 v_mac_f32_e32 v10, v5, v7 ; 3E140F05 image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[0:3] ; F0800F00 00030B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v13 ; 10001B04 v_mac_f32_e32 v0, v5, v12 ; 3E001905 v_mad_f32 v1, v13, v4, -v10 ; D2820001 842A090D v_mac_f32_e32 v1, v5, v12 ; 3E021905 v_mov_b32_e32 v15, 0x3d0f5c29 ; 7E1E02FF 3D0F5C29 v_mul_f32_e32 v0, v15, v0 ; 1000010F v_rcp_f32_e32 v0, v0 ; 7E005500 v_mad_f32 v13, -v13, v4, v10 ; D282000D 242A090D v_mad_f32 v12, -v12, v5, v13 ; D282000C 24360B0C v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e32 v1, v1, v12 ; 00021901 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mov_b32_e32 v1, 0x417f0000 ; 7E0202FF 417F0000 v_mul_f32_e32 v12, v1, v9 ; 10181301 v_floor_f32_e32 v12, v12 ; 7E18490C v_mov_b32_e32 v13, 0xc1800000 ; 7E1A02FF C1800000 v_mul_f32_e32 v16, v13, v12 ; 1020190D v_mov_b32_e32 v17, 0x437f0000 ; 7E2202FF 437F0000 v_mac_f32_e32 v16, v17, v9 ; 3E201311 v_mov_b32_e32 v18, 0xc1000000 ; 7E2402FF C1000000 v_add_f32_e32 v12, v18, v12 ; 06181912 v_add_f32_e32 v16, v18, v16 ; 06202112 v_mov_b32_e32 v19, 0x3e124925 ; 7E2602FF 3E124925 v_mul_f32_e32 v12, v19, v12 ; 10181913 v_mul_f32_e32 v16, v19, v16 ; 10202113 v_mul_f32_e32 v20, v1, v14 ; 10281D01 v_floor_f32_e32 v20, v20 ; 7E284914 v_mul_f32_e32 v21, v13, v20 ; 102A290D v_mac_f32_e32 v21, v17, v14 ; 3E2A1D11 v_add_f32_e32 v14, v18, v20 ; 061C2912 v_add_f32_e32 v20, v18, v21 ; 06282B12 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mul_f32_e32 v20, v19, v20 ; 10282913 v_mad_f32 v21, -v14, v14, 1.0 ; D2820015 23CA1D0E v_mad_f32 v21, -v20, v20, v21 ; D2820015 24562914 v_mul_f32_e32 v14, v12, v14 ; 101C1D0C v_mac_f32_e32 v14, v16, v20 ; 3E1C2910 v_mad_f32 v20, -v12, v12, 1.0 ; D2820014 23CA190C v_mad_f32 v20, -v16, v16, v20 ; D2820014 24522110 v_max_f32_e32 v20, 0, v20 ; 20282880 v_sqrt_f32_e32 v20, v20 ; 7E286714 v_max_f32_e32 v21, 0, v21 ; 202A2A80 v_sqrt_f32_e32 v21, v21 ; 7E2A6715 v_mac_f32_e32 v14, v20, v21 ; 3E1C2B14 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_mov_b32_e32 v21, 0xbf333333 ; 7E2A02FF BF333333 v_add_f32_e32 v14, v21, v14 ; 061C1D15 v_mov_b32_e32 v22, 0x40555555 ; 7E2C02FF 40555555 v_mul_f32_e32 v14, v22, v14 ; 101C1D16 v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80 v_mul_f32_e32 v23, v14, v0 ; 102E010E v_subrev_f32_e32 v24, s8, v3 ; 0A300608 v_mov_b32_e32 v25, v2 ; 7E320302 v_mov_b32_e32 v26, v3 ; 7E340303 v_mac_f32_e32 v6, v11, v23 ; 3E0C2F0B v_mov_b32_e32 v26, v24 ; 7E340318 image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[12:19], s[0:3] ; F0800F00 00031719 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v4, v25 ; 10163304 v_mac_f32_e32 v11, v5, v24 ; 3E163105 v_mad_f32 v27, v25, v4, -v10 ; D282001B 842A0919 v_mac_f32_e32 v27, v5, v24 ; 3E363105 v_mul_f32_e32 v11, v15, v11 ; 1016170F v_rcp_f32_e32 v11, v11 ; 7E16550B v_mad_f32 v25, -v25, v4, v10 ; D2820019 242A0919 v_mad_f32 v24, -v24, v5, v25 ; D2820018 24660B18 v_cmp_gt_f32_e32 vcc, 0, v27 ; 7C083680 v_cndmask_b32_e32 v24, v27, v24 ; 0030311B v_mul_f32_e32 v11, v11, v24 ; 1016310B v_mul_f32_e32 v24, v1, v26 ; 10303501 v_floor_f32_e32 v24, v24 ; 7E304918 v_mul_f32_e32 v25, v13, v24 ; 1032310D v_mac_f32_e32 v25, v17, v26 ; 3E323511 v_add_f32_e32 v24, v18, v24 ; 06303112 v_add_f32_e32 v25, v18, v25 ; 06323312 v_mul_f32_e32 v24, v19, v24 ; 10303113 v_mul_f32_e32 v25, v19, v25 ; 10323313 v_mad_f32 v26, -v24, v24, 1.0 ; D282001A 23CA3118 v_mad_f32 v26, -v25, v25, v26 ; D282001A 246A3319 v_mul_f32_e32 v24, v12, v24 ; 1030310C v_mac_f32_e32 v24, v16, v25 ; 3E303310 v_max_f32_e32 v25, 0, v26 ; 20323480 v_sqrt_f32_e32 v25, v25 ; 7E326719 v_mac_f32_e32 v24, v20, v25 ; 3E303314 v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680 v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_add_f32_e32 v24, v21, v24 ; 06303115 v_mul_f32_e32 v24, v22, v24 ; 10303116 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_mul_f32_e32 v25, v24, v11 ; 10321718 v_mac_f32_e32 v6, v23, v25 ; 3E0C3317 v_mad_f32 v11, v11, v24, 1.0 ; D282000B 03CA310B v_add_f32_e32 v23, s8, v3 ; 062E0608 v_mov_b32_e32 v24, v2 ; 7E300302 v_mov_b32_e32 v25, v3 ; 7E320303 v_mac_f32_e32 v11, v14, v0 ; 3E16010E v_mov_b32_e32 v25, v23 ; 7E320317 image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[12:19], s[0:3] ; F0800F00 00031718 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v1, v26 ; 10003501 v_floor_f32_e32 v0, v0 ; 7E004900 v_mul_f32_e32 v14, v13, v0 ; 101C010D v_mac_f32_e32 v14, v17, v26 ; 3E1C3511 v_add_f32_e32 v0, v18, v0 ; 06000112 v_add_f32_e32 v14, v18, v14 ; 061C1D12 v_mul_f32_e32 v0, v19, v0 ; 10000113 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mad_f32 v26, -v0, v0, 1.0 ; D282001A 23CA0100 v_mad_f32 v26, -v14, v14, v26 ; D282001A 246A1D0E v_mul_f32_e32 v0, v12, v0 ; 1000010C v_mac_f32_e32 v0, v16, v14 ; 3E001D10 v_max_f32_e32 v14, 0, v26 ; 201C3480 v_sqrt_f32_e32 v14, v14 ; 7E1C670E v_mac_f32_e32 v0, v20, v14 ; 3E001D14 v_mac_f32_e64 v3, 2.0, s8 ; D23E0003 000010F4 image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00031A02 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v29 ; 10023B01 v_floor_f32_e32 v1, v1 ; 7E024901 v_mul_f32_e32 v2, v13, v1 ; 1004030D v_mac_f32_e32 v2, v17, v29 ; 3E043B11 v_add_f32_e32 v1, v18, v1 ; 06020312 v_add_f32_e32 v2, v18, v2 ; 06040512 v_mul_f32_e32 v1, v19, v1 ; 10020313 v_mul_f32_e32 v2, v19, v2 ; 10040513 v_mul_f32_e32 v3, v12, v1 ; 1006030C v_mac_f32_e32 v3, v16, v2 ; 3E060510 v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301 v_mad_f32 v1, -v2, v2, v1 ; D2820001 24060502 v_max_f32_e32 v1, 0, v1 ; 20020280 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v3, v20, v1 ; 3E060314 v_mul_f32_e32 v1, v4, v25 ; 10023304 v_mac_f32_e32 v1, v5, v24 ; 3E023105 v_mad_f32 v2, v25, v4, -v10 ; D2820002 842A0919 v_mac_f32_e32 v2, v5, v24 ; 3E043105 v_mul_f32_e32 v1, v15, v1 ; 1002030F v_rcp_f32_e32 v1, v1 ; 7E025501 v_mad_f32 v12, -v25, v4, v10 ; D282000C 242A0919 v_mad_f32 v12, -v24, v5, v12 ; D282000C 24320B18 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e32 v2, v2, v12 ; 00041902 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mad_f32 v2, v28, v4, -v10 ; D2820002 842A091C v_mac_f32_e32 v2, v5, v27 ; 3E043705 v_mad_f32 v10, -v28, v4, v10 ; D282000A 242A091C v_mad_f32 v10, -v27, v5, v10 ; D282000A 242A0B1B v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e32 v2, v2, v10 ; 00041502 v_mul_f32_e32 v4, v4, v28 ; 10083904 v_mac_f32_e32 v4, v5, v27 ; 3E083705 v_mul_f32_e32 v4, v15, v4 ; 1008090F v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e32 v0, v21, v0 ; 06000115 v_mul_f32_e32 v0, v22, v0 ; 10000116 v_rcp_f32_e32 v4, v4 ; 7E085504 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mac_f32_e32 v11, v0, v1 ; 3E160300 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v1, v4, v2 ; 10020504 v_add_f32_e32 v2, v21, v3 ; 06040715 v_mul_f32_e32 v2, v22, v2 ; 10040516 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mac_f32_e32 v11, v2, v1 ; 3E160302 v_rcp_f32_e32 v3, v11 ; 7E06550B v_mac_f32_e32 v6, v23, v0 ; 3E0C0117 v_mul_f32_e32 v0, v2, v1 ; 10000302 v_mac_f32_e32 v6, v26, v0 ; 3E0C011A v_mul_f32_e32 v0, v3, v6 ; 10000D03 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 v_cvt_pkrtz_f16_f32_e32 v1, v8, v9 ; 5E021308 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 996 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0], TEMP[0], SAMP[0], 2D 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <4 x i32> undef, i32 %28, i32 0 %31 = insertelement <4 x i32> %30, i32 %29, i32 1 %32 = insertelement <4 x i32> %31, i32 0, i32 2 %33 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = call i32 @llvm.SI.packf16(float %34, float %35) %39 = bitcast i32 %38 to float %40 = call i32 @llvm.SI.packf16(float %36, float %37) %41 = bitcast i32 %40 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %39, float %41, float %39, float %41) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v4, 0 ; 7E080280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[0:3] ; F0900F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SVIEW[0], SHADOW2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL SVIEW[5], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..16], LOCAL IMM[0] FLT32 { 0.0000, -0.5000, 2.0000, 0.5000} IMM[1] FLT32 { 1.0000, 255.0000, 0.0625, 16.0000} IMM[2] UINT32 {0, 16, 3, 304} IMM[3] FLT32 { -8.0000, 0.1429, 0.9961, 0.0039} IMM[4] FLT32 { 0.0010, 120.0000, -0.3000, 1.4286} IMM[5] FLT32 { 0.0000, 2.1000, 0.2060, 0.0749} IMM[6] UINT32 {4, 32, 44, 48} IMM[7] UINT32 {200, 196, 192, 204} IMM[8] UINT32 {216, 212, 208, 220} IMM[9] UINT32 {232, 228, 224, 236} IMM[10] UINT32 {248, 244, 240, 252} IMM[11] UINT32 {256, 76, 80, 92} IMM[12] UINT32 {96, 64, 0, 0} IMM[13] FLT32 { 0.1236, 0.2125, 0.7154, 0.0721} IMM[14] FLT32 { 0.4500, 1.8500, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0].xyz, TEMP[0], SAMP[2], 2D 3: ADD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].yyyy 4: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].zzzz 5: MOV TEMP[1].xy, IN[0].xyyy 6: MOV TEMP[1].w, IMM[0].xxxx 7: TXL TEMP[1].xyz, TEMP[1], SAMP[3], 2D 8: MOV TEMP[2].xy, IN[0].xyyy 9: MOV TEMP[2].w, IMM[0].xxxx 10: TXL TEMP[2].x, TEMP[2], SAMP[4], 2D 11: MUL TEMP[3].xy, IN[0].xyyy, IMM[0].wwww 12: MUL TEMP[4].xy, CONST[1][0].xyyy, TEMP[3].xyyy 13: FRC TEMP[4].xy, TEMP[4].xyyy 14: ADD TEMP[4].xy, IMM[1].xxxx, -TEMP[4].xyyy 15: MUL TEMP[5].xy, CONST[1][1].xyyy, TEMP[4].xyyy 16: ADD TEMP[3].xy, TEMP[3].xyyy, -TEMP[5].xyyy 17: MOV TEMP[5].xy, TEMP[3].xyyy 18: TEX TEMP[5], TEMP[5], SAMP[1], 2D 19: MOV TEMP[6].y, IMM[0].xxxx 20: MOV TEMP[6].x, CONST[1][1].xxxx 21: ADD TEMP[6].xy, TEMP[6].xyyy, TEMP[3].xyyy 22: MOV TEMP[6].xy, TEMP[6].xyyy 23: TEX TEMP[6], TEMP[6], SAMP[1], 2D 24: MOV TEMP[7].x, IMM[0].xxxx 25: MOV TEMP[7].y, CONST[1][1].yyyy 26: ADD TEMP[7].xy, TEMP[7].xyyy, TEMP[3].xyyy 27: MOV TEMP[7].xy, TEMP[7].xyyy 28: TEX TEMP[7], TEMP[7], SAMP[1], 2D 29: ADD TEMP[3].xy, CONST[1][1].xyyy, TEMP[3].xyyy 30: MOV TEMP[3].xy, TEMP[3].xyyy 31: TEX TEMP[3], TEMP[3], SAMP[1], 2D 32: ADD TEMP[8].x, IMM[1].xxxx, -TEMP[4].xxxx 33: ADD TEMP[9].x, IMM[1].xxxx, -TEMP[4].yyyy 34: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[8].xxxx 35: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[4].xxxx 36: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[4].yyyy 37: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].yyyy 38: MOV TEMP[11].x, TEMP[10].xxxx 39: MOV TEMP[11].y, TEMP[9].xxxx 40: MOV TEMP[11].z, TEMP[8].xxxx 41: MOV TEMP[11].w, TEMP[4].xxxx 42: MUL TEMP[12].x, TEMP[5].wwww, IMM[1].yyyy 43: MUL TEMP[13].x, TEMP[12].xxxx, IMM[1].zzzz 44: FLR TEMP[13].x, TEMP[13].xxxx 45: MOV TEMP[14].x, TEMP[13].xxxx 46: MUL TEMP[13].x, TEMP[13].xxxx, IMM[1].wwww 47: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[13].xxxx 48: MOV TEMP[14].y, TEMP[12].xxxx 49: ADD TEMP[12].xy, TEMP[14].xyyy, IMM[3].xxxx 50: MUL TEMP[12].xy, TEMP[12].xyyy, IMM[3].yyyy 51: MOV TEMP[13].x, TEMP[12].xxxx 52: MOV TEMP[13].y, TEMP[12].yyyy 53: MUL TEMP[14].x, TEMP[12].xxxx, TEMP[12].xxxx 54: ADD TEMP[14].x, IMM[1].xxxx, -TEMP[14].xxxx 55: MUL TEMP[12].x, TEMP[12].yyyy, TEMP[12].yyyy 56: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 57: MAX TEMP[12].x, IMM[0].xxxx, TEMP[12].xxxx 58: SQRT TEMP[12].x, TEMP[12].xxxx 59: MOV TEMP[13].z, TEMP[12].xxxx 60: DP2 TEMP[12].x, TEMP[5].yzzz, IMM[3].zwww 61: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[2].xxxx 62: ADD TEMP[12].x, TEMP[12].xxxx, IMM[4].xxxx 63: MOV TEMP[14].x, -TEMP[12].xxxx 64: FSLT TEMP[15].x, TEMP[12].xxxx, IMM[0].xxxx 65: UIF TEMP[15].xxxx :0 66: MOV TEMP[14].x, TEMP[14].xxxx 67: ELSE :0 68: MOV TEMP[14].x, TEMP[12].xxxx 69: ENDIF 70: MUL TEMP[12].x, TEMP[14].xxxx, IMM[4].yyyy 71: MOV_SAT TEMP[12].x, TEMP[12].xxxx 72: ADD TEMP[12].x, IMM[1].xxxx, -TEMP[12].xxxx 73: DP3 TEMP[13].x, TEMP[13].xyzz, TEMP[0].xyzz 74: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].zzzz 75: MUL TEMP[13].x, TEMP[13].xxxx, IMM[4].wwww 76: MOV_SAT TEMP[13].x, TEMP[13].xxxx 77: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 78: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx 79: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[5].xxxx 80: MUL TEMP[13].x, IMM[1].yyyy, TEMP[6].wwww 81: MUL TEMP[14].x, IMM[1].zzzz, TEMP[13].xxxx 82: FLR TEMP[14].x, TEMP[14].xxxx 83: MOV TEMP[15].x, TEMP[14].xxxx 84: MUL TEMP[14].x, IMM[1].wwww, TEMP[14].xxxx 85: ADD TEMP[13].x, TEMP[13].xxxx, -TEMP[14].xxxx 86: MOV TEMP[15].y, TEMP[13].xxxx 87: ADD TEMP[13].xy, IMM[3].xxxx, TEMP[15].xyyy 88: MUL TEMP[13].xy, TEMP[13].xyyy, IMM[3].yyyy 89: MOV TEMP[14].x, TEMP[13].xxxx 90: MOV TEMP[14].y, TEMP[13].yyyy 91: MUL TEMP[15].x, TEMP[13].xxxx, TEMP[13].xxxx 92: ADD TEMP[15].x, IMM[1].xxxx, -TEMP[15].xxxx 93: MUL TEMP[13].x, TEMP[13].yyyy, TEMP[13].yyyy 94: ADD TEMP[13].x, TEMP[15].xxxx, -TEMP[13].xxxx 95: MAX TEMP[13].x, IMM[0].xxxx, TEMP[13].xxxx 96: SQRT TEMP[13].x, TEMP[13].xxxx 97: MOV TEMP[14].z, TEMP[13].xxxx 98: DP2 TEMP[13].x, TEMP[6].yzzz, IMM[3].zwww 99: ADD TEMP[13].x, TEMP[13].xxxx, -TEMP[2].xxxx 100: ADD TEMP[13].x, IMM[4].xxxx, TEMP[13].xxxx 101: MOV TEMP[15].x, -TEMP[13].xxxx 102: FSLT TEMP[16].x, TEMP[13].xxxx, IMM[0].xxxx 103: UIF TEMP[16].xxxx :0 104: MOV TEMP[15].x, TEMP[15].xxxx 105: ELSE :0 106: MOV TEMP[15].x, TEMP[13].xxxx 107: ENDIF 108: MUL TEMP[13].x, IMM[4].yyyy, TEMP[15].xxxx 109: MOV_SAT TEMP[13].x, TEMP[13].xxxx 110: ADD TEMP[13].x, IMM[1].xxxx, -TEMP[13].xxxx 111: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[0].xyzz 112: ADD TEMP[14].x, IMM[4].zzzz, TEMP[14].xxxx 113: MUL TEMP[14].x, TEMP[14].xxxx, IMM[4].wwww 114: MOV_SAT TEMP[14].x, TEMP[14].xxxx 115: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx 116: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[13].xxxx 117: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx 118: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[6].xxxx, TEMP[12].xxxx 119: MUL TEMP[12].x, IMM[1].yyyy, TEMP[7].wwww 120: MUL TEMP[13].x, IMM[1].zzzz, TEMP[12].xxxx 121: FLR TEMP[13].x, TEMP[13].xxxx 122: MOV TEMP[14].x, TEMP[13].xxxx 123: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx 124: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[13].xxxx 125: MOV TEMP[14].y, TEMP[12].xxxx 126: ADD TEMP[12].xy, IMM[3].xxxx, TEMP[14].xyyy 127: MUL TEMP[12].xy, TEMP[12].xyyy, IMM[3].yyyy 128: MOV TEMP[13].x, TEMP[12].xxxx 129: MOV TEMP[13].y, TEMP[12].yyyy 130: MUL TEMP[14].x, TEMP[12].xxxx, TEMP[12].xxxx 131: ADD TEMP[14].x, IMM[1].xxxx, -TEMP[14].xxxx 132: MUL TEMP[12].x, TEMP[12].yyyy, TEMP[12].yyyy 133: ADD TEMP[12].x, TEMP[14].xxxx, -TEMP[12].xxxx 134: MAX TEMP[12].x, IMM[0].xxxx, TEMP[12].xxxx 135: SQRT TEMP[12].x, TEMP[12].xxxx 136: MOV TEMP[13].z, TEMP[12].xxxx 137: DP2 TEMP[12].x, TEMP[7].yzzz, IMM[3].zwww 138: ADD TEMP[12].x, TEMP[12].xxxx, -TEMP[2].xxxx 139: ADD TEMP[12].x, IMM[4].xxxx, TEMP[12].xxxx 140: MOV TEMP[14].x, -TEMP[12].xxxx 141: FSLT TEMP[15].x, TEMP[12].xxxx, IMM[0].xxxx 142: UIF TEMP[15].xxxx :0 143: MOV TEMP[14].x, TEMP[14].xxxx 144: ELSE :0 145: MOV TEMP[14].x, TEMP[12].xxxx 146: ENDIF 147: MUL TEMP[12].x, IMM[4].yyyy, TEMP[14].xxxx 148: MOV_SAT TEMP[12].x, TEMP[12].xxxx 149: ADD TEMP[12].x, IMM[1].xxxx, -TEMP[12].xxxx 150: DP3 TEMP[13].x, TEMP[13].xyzz, TEMP[0].xyzz 151: ADD TEMP[13].x, IMM[4].zzzz, TEMP[13].xxxx 152: MUL TEMP[13].x, TEMP[13].xxxx, IMM[4].wwww 153: MOV_SAT TEMP[13].x, TEMP[13].xxxx 154: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 155: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[12].xxxx 156: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[8].xxxx 157: MAD TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx, TEMP[9].xxxx 158: MUL TEMP[9].x, IMM[1].yyyy, TEMP[3].wwww 159: MUL TEMP[12].x, IMM[1].zzzz, TEMP[9].xxxx 160: FLR TEMP[12].x, TEMP[12].xxxx 161: MOV TEMP[13].x, TEMP[12].xxxx 162: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx 163: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[12].xxxx 164: MOV TEMP[13].y, TEMP[9].xxxx 165: ADD TEMP[9].xy, IMM[3].xxxx, TEMP[13].xyyy 166: MUL TEMP[9].xy, TEMP[9].xyyy, IMM[3].yyyy 167: MOV TEMP[12].x, TEMP[9].xxxx 168: MOV TEMP[12].y, TEMP[9].yyyy 169: MUL TEMP[13].x, TEMP[9].xxxx, TEMP[9].xxxx 170: ADD TEMP[13].x, IMM[1].xxxx, -TEMP[13].xxxx 171: MUL TEMP[9].x, TEMP[9].yyyy, TEMP[9].yyyy 172: ADD TEMP[9].x, TEMP[13].xxxx, -TEMP[9].xxxx 173: MAX TEMP[9].x, IMM[0].xxxx, TEMP[9].xxxx 174: SQRT TEMP[9].x, TEMP[9].xxxx 175: MOV TEMP[12].z, TEMP[9].xxxx 176: DP2 TEMP[9].x, TEMP[3].yzzz, IMM[3].zwww 177: ADD TEMP[9].x, TEMP[9].xxxx, -TEMP[2].xxxx 178: ADD TEMP[9].x, IMM[4].xxxx, TEMP[9].xxxx 179: MOV TEMP[13].x, -TEMP[9].xxxx 180: FSLT TEMP[14].x, TEMP[9].xxxx, IMM[0].xxxx 181: UIF TEMP[14].xxxx :0 182: MOV TEMP[13].x, TEMP[13].xxxx 183: ELSE :0 184: MOV TEMP[13].x, TEMP[9].xxxx 185: ENDIF 186: MUL TEMP[9].x, IMM[4].yyyy, TEMP[13].xxxx 187: MOV_SAT TEMP[9].x, TEMP[9].xxxx 188: ADD TEMP[9].x, IMM[1].xxxx, -TEMP[9].xxxx 189: DP3 TEMP[12].x, TEMP[12].xyzz, TEMP[0].xyzz 190: ADD TEMP[12].x, IMM[4].zzzz, TEMP[12].xxxx 191: MUL TEMP[12].x, TEMP[12].xxxx, IMM[4].wwww 192: MOV_SAT TEMP[12].x, TEMP[12].xxxx 193: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx 194: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[9].xxxx 195: ADD TEMP[9].x, TEMP[10].xxxx, TEMP[4].xxxx 196: MOV TEMP[5].x, TEMP[5].xxxx 197: MOV TEMP[5].y, TEMP[6].xxxx 198: MOV TEMP[5].z, TEMP[7].xxxx 199: MOV TEMP[5].w, TEMP[3].xxxx 200: DP4 TEMP[5].x, TEMP[11], TEMP[5] 201: MAD TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx, TEMP[8].xxxx 202: RCP TEMP[4].x, TEMP[9].xxxx 203: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx 204: FSLT TEMP[4].x, TEMP[9].xxxx, IMM[5].xxxx 205: UIF TEMP[4].xxxx :0 206: MOV TEMP[4].x, TEMP[5].xxxx 207: ELSE :0 208: MOV TEMP[4].x, TEMP[3].xxxx 209: ENDIF 210: MAD TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx, CONST[4][19].xyzz 211: MOV TEMP[3].xy, IN[0].xyyy 212: MOV TEMP[3].w, IMM[0].xxxx 213: TXL TEMP[3].xyz, TEMP[3], SAMP[5], 2D 214: ADD TEMP[5].xyz, TEMP[2].xyzz, -CONST[4][19].xyzz 215: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz 216: RSQ TEMP[6].x, TEMP[6].xxxx 217: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx 218: MOV TEMP[5].xyz, -TEMP[5].xyzx 219: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[5].xyzz 220: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[0].xyzz 221: MUL TEMP[6].xyz, IMM[0].zzzz, TEMP[6].xyzz 222: ADD TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xyzz 223: DP3 TEMP[5].x, CONST[5][2].xyzz, TEMP[5].xyzz 224: MOV_SAT TEMP[5].x, TEMP[5].xxxx 225: POW TEMP[5].x, TEMP[5].xxxx, CONST[5][2].wwww 226: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 227: MAD TEMP[1].xyz, TEMP[5].xxxx, CONST[5][3].xyzz, TEMP[1].xyzz 228: MUL TEMP[6].x, CONST[5][12].xxxx, TEMP[2].xxxx 229: MAD TEMP[6].x, CONST[5][12].yyyy, TEMP[2].yyyy, TEMP[6].xxxx 230: MAD TEMP[6].x, CONST[5][12].zzzz, TEMP[2].zzzz, TEMP[6].xxxx 231: ADD TEMP[6].x, TEMP[6].xxxx, CONST[5][12].wwww 232: MUL TEMP[7].x, CONST[5][13].xxxx, TEMP[2].xxxx 233: MAD TEMP[7].x, CONST[5][13].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 234: MAD TEMP[7].x, CONST[5][13].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 235: ADD TEMP[7].x, TEMP[7].xxxx, CONST[5][13].wwww 236: MOV TEMP[6].y, TEMP[7].xxxx 237: MUL TEMP[7].x, CONST[5][14].xxxx, TEMP[2].xxxx 238: MAD TEMP[7].x, CONST[5][14].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 239: MAD TEMP[7].x, CONST[5][14].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 240: ADD TEMP[7].x, TEMP[7].xxxx, CONST[5][14].wwww 241: MOV TEMP[6].z, TEMP[7].xxxx 242: MUL TEMP[7].x, CONST[5][15].xxxx, TEMP[2].xxxx 243: MAD TEMP[7].x, CONST[5][15].yyyy, TEMP[2].yyyy, TEMP[7].xxxx 244: MAD TEMP[2].x, CONST[5][15].zzzz, TEMP[2].zzzz, TEMP[7].xxxx 245: ADD TEMP[2].x, TEMP[2].xxxx, CONST[5][15].wwww 246: RCP TEMP[2].xyz, TEMP[2].xxxx 247: MUL TEMP[2].xyz, TEMP[6].xyzz, TEMP[2].xyzz 248: MOV_SAT TEMP[6].x, -TEMP[2].zzzz 249: MOV TEMP[7].x, -CONST[5][16].xxxx 250: MOV TEMP[8].x, TEMP[7].xxxx 251: MOV TEMP[8].y, CONST[5][16].xxxx 252: MOV TEMP[9].x, CONST[5][16].xxxx 253: MOV TEMP[9].y, TEMP[7].xxxx 254: ADD TEMP[10].xy, TEMP[2].xyyy, CONST[5][16].xxxx 255: MOV TEMP[10].xy, TEMP[10].xyyy 256: MOV TEMP[10].z, TEMP[6].xxxx 257: MOV TEMP[10].w, IMM[0].xxxx 258: TXL TEMP[10].x, TEMP[10], SAMP[0], SHADOW2D 259: MOV TEMP[10].x, TEMP[10].xxxx 260: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[2].xyyy 261: MOV TEMP[8].xy, TEMP[8].xyyy 262: MOV TEMP[8].z, TEMP[6].xxxx 263: MOV TEMP[8].w, IMM[0].xxxx 264: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 265: MOV TEMP[10].y, TEMP[8].xxxx 266: ADD TEMP[8].xy, TEMP[2].xyyy, TEMP[9].xyyy 267: MOV TEMP[8].xy, TEMP[8].xyyy 268: MOV TEMP[8].z, TEMP[6].xxxx 269: MOV TEMP[8].w, IMM[0].xxxx 270: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 271: MOV TEMP[10].z, TEMP[8].xxxx 272: ADD TEMP[8].xy, TEMP[2].xyyy, TEMP[7].xxxx 273: MOV TEMP[8].xy, TEMP[8].xyyy 274: MOV TEMP[8].z, TEMP[6].xxxx 275: MOV TEMP[8].w, IMM[0].xxxx 276: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 277: MOV TEMP[10].w, TEMP[8].xxxx 278: MOV TEMP[8].y, IMM[0].xxxx 279: MOV TEMP[8].x, CONST[5][16].xxxx 280: MOV TEMP[9].y, IMM[0].xxxx 281: MOV TEMP[9].x, TEMP[7].xxxx 282: MOV TEMP[11].x, IMM[0].xxxx 283: MOV TEMP[11].y, TEMP[7].xxxx 284: MOV TEMP[7].x, IMM[0].xxxx 285: MOV TEMP[7].y, CONST[5][16].xxxx 286: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[2].xyyy 287: MOV TEMP[8].xy, TEMP[8].xyyy 288: MOV TEMP[8].z, TEMP[6].xxxx 289: MOV TEMP[8].w, IMM[0].xxxx 290: TXL TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D 291: MOV TEMP[8].x, TEMP[8].xxxx 292: ADD TEMP[9].xy, TEMP[9].xyyy, TEMP[2].xyyy 293: MOV TEMP[9].xy, TEMP[9].xyyy 294: MOV TEMP[9].z, TEMP[6].xxxx 295: MOV TEMP[9].w, IMM[0].xxxx 296: TXL TEMP[9].x, TEMP[9], SAMP[0], SHADOW2D 297: MOV TEMP[8].y, TEMP[9].xxxx 298: ADD TEMP[9].xy, TEMP[11].xyyy, TEMP[2].xyyy 299: MOV TEMP[9].xy, TEMP[9].xyyy 300: MOV TEMP[9].z, TEMP[6].xxxx 301: MOV TEMP[9].w, IMM[0].xxxx 302: TXL TEMP[9].x, TEMP[9], SAMP[0], SHADOW2D 303: MOV TEMP[8].z, TEMP[9].xxxx 304: ADD TEMP[7].xy, TEMP[7].xyyy, TEMP[2].xyyy 305: MOV TEMP[7].xy, TEMP[7].xyyy 306: MOV TEMP[7].z, TEMP[6].xxxx 307: MOV TEMP[7].w, IMM[0].xxxx 308: TXL TEMP[7].x, TEMP[7], SAMP[0], SHADOW2D 309: MOV TEMP[8].w, TEMP[7].xxxx 310: ADD TEMP[4].x, IMM[1].xxxx, -TEMP[4].xxxx 311: POW TEMP[7].x, TEMP[4].xxxx, IMM[5].yyyy 312: MUL TEMP[9].xyz, CONST[5][4].wwww, CONST[5][5].xyzz 313: MUL TEMP[11].xyz, CONST[5][5].wwww, CONST[5][6].xyzz 314: MOV TEMP[2].xy, TEMP[2].xyyy 315: MOV TEMP[2].z, TEMP[6].xxxx 316: MOV TEMP[2].w, IMM[0].xxxx 317: TXL TEMP[2].x, TEMP[2], SAMP[0], SHADOW2D 318: DP4 TEMP[6].x, TEMP[10], IMM[5].wwww 319: DP4 TEMP[8].x, TEMP[8], IMM[13].xxxx 320: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx 321: MAD TEMP[2].x, TEMP[2].xxxx, IMM[5].zzzz, TEMP[6].xxxx 322: DP3 TEMP[6].x, -CONST[5][0].xyzz, TEMP[0].xyzz 323: MOV_SAT TEMP[6].x, TEMP[6].xxxx 324: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx 325: ADD TEMP[6].x, IMM[1].xxxx, -TEMP[2].xxxx 326: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 327: DP3 TEMP[8].x, TEMP[11].xyzz, IMM[13].yzww 328: LRP TEMP[8].xyz, TEMP[3].zzzz, TEMP[8].xxxx, TEMP[11].xyzz 329: DP3 TEMP[0].x, CONST[5][4].xyzz, TEMP[0].xyzz 330: MOV_SAT TEMP[0].x, TEMP[0].xxxx 331: MUL TEMP[0].x, TEMP[7].xxxx, TEMP[0].xxxx 332: DP3 TEMP[7].x, TEMP[9].xyzz, IMM[13].yzww 333: LRP TEMP[7].xyz, TEMP[3].zzzz, TEMP[7].xxxx, TEMP[9].xyzz 334: POW TEMP[4].x, TEMP[4].xxxx, IMM[14].yyyy 335: LRP TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx, IMM[14].xxxx 336: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 337: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][1].xyzz 338: MAD TEMP[0].xyz, TEMP[0].xxxx, TEMP[7].xyzz, TEMP[2].xyzz 339: MAD TEMP[0].xyz, TEMP[6].xxxx, TEMP[8].xyzz, TEMP[0].xyzz 340: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz 341: LRP TEMP[0].xyz, TEMP[3].yyyy, TEMP[1].xyzz, TEMP[0].xyzz 342: MOV TEMP[1].x, TEMP[0].xxxx 343: MOV TEMP[1].y, TEMP[0].yyyy 344: MOV TEMP[1].z, TEMP[0].zzzz 345: MOV_SAT TEMP[0].x, TEMP[5].xxxx 346: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[3].zzzz 347: MOV TEMP[1].w, TEMP[0].xxxx 348: MOV OUT[0], TEMP[1] 349: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %28 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.load.const(<16 x i8> %29, i32 304) %31 = call float @llvm.SI.load.const(<16 x i8> %29, i32 308) %32 = call float @llvm.SI.load.const(<16 x i8> %29, i32 312) %33 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = call float @llvm.SI.load.const(<16 x i8> %34, i32 0) %36 = call float @llvm.SI.load.const(<16 x i8> %34, i32 4) %37 = call float @llvm.SI.load.const(<16 x i8> %34, i32 8) %38 = call float @llvm.SI.load.const(<16 x i8> %34, i32 16) %39 = call float @llvm.SI.load.const(<16 x i8> %34, i32 20) %40 = call float @llvm.SI.load.const(<16 x i8> %34, i32 24) %41 = call float @llvm.SI.load.const(<16 x i8> %34, i32 32) %42 = call float @llvm.SI.load.const(<16 x i8> %34, i32 36) %43 = call float @llvm.SI.load.const(<16 x i8> %34, i32 40) %44 = call float @llvm.SI.load.const(<16 x i8> %34, i32 44) %45 = call float @llvm.SI.load.const(<16 x i8> %34, i32 48) %46 = call float @llvm.SI.load.const(<16 x i8> %34, i32 52) %47 = call float @llvm.SI.load.const(<16 x i8> %34, i32 56) %48 = call float @llvm.SI.load.const(<16 x i8> %34, i32 64) %49 = call float @llvm.SI.load.const(<16 x i8> %34, i32 68) %50 = call float @llvm.SI.load.const(<16 x i8> %34, i32 72) %51 = call float @llvm.SI.load.const(<16 x i8> %34, i32 76) %52 = call float @llvm.SI.load.const(<16 x i8> %34, i32 80) %53 = call float @llvm.SI.load.const(<16 x i8> %34, i32 84) %54 = call float @llvm.SI.load.const(<16 x i8> %34, i32 88) %55 = call float @llvm.SI.load.const(<16 x i8> %34, i32 92) %56 = call float @llvm.SI.load.const(<16 x i8> %34, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %34, i32 100) %58 = call float @llvm.SI.load.const(<16 x i8> %34, i32 104) %59 = call float @llvm.SI.load.const(<16 x i8> %34, i32 192) %60 = call float @llvm.SI.load.const(<16 x i8> %34, i32 196) %61 = call float @llvm.SI.load.const(<16 x i8> %34, i32 200) %62 = call float @llvm.SI.load.const(<16 x i8> %34, i32 204) %63 = call float @llvm.SI.load.const(<16 x i8> %34, i32 208) %64 = call float @llvm.SI.load.const(<16 x i8> %34, i32 212) %65 = call float @llvm.SI.load.const(<16 x i8> %34, i32 216) %66 = call float @llvm.SI.load.const(<16 x i8> %34, i32 220) %67 = call float @llvm.SI.load.const(<16 x i8> %34, i32 224) %68 = call float @llvm.SI.load.const(<16 x i8> %34, i32 228) %69 = call float @llvm.SI.load.const(<16 x i8> %34, i32 232) %70 = call float @llvm.SI.load.const(<16 x i8> %34, i32 236) %71 = call float @llvm.SI.load.const(<16 x i8> %34, i32 240) %72 = call float @llvm.SI.load.const(<16 x i8> %34, i32 244) %73 = call float @llvm.SI.load.const(<16 x i8> %34, i32 248) %74 = call float @llvm.SI.load.const(<16 x i8> %34, i32 252) %75 = call float @llvm.SI.load.const(<16 x i8> %34, i32 256) %76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0 %78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0 %80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0 %82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0 %84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %85 = bitcast <8 x i32> addrspace(2)* %84 to <32 x i8> addrspace(2)* %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0 %87 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %88 = bitcast <4 x i32> addrspace(2)* %87 to <16 x i8> addrspace(2)* %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %91 = bitcast <8 x i32> addrspace(2)* %90 to <32 x i8> addrspace(2)* %92 = load <32 x i8>, <32 x i8> addrspace(2)* %91, align 32, !tbaa !0 %93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %94 = bitcast <4 x i32> addrspace(2)* %93 to <16 x i8> addrspace(2)* %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !0 %96 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4 %97 = bitcast <8 x i32> addrspace(2)* %96 to <32 x i8> addrspace(2)* %98 = load <32 x i8>, <32 x i8> addrspace(2)* %97, align 32, !tbaa !0 %99 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4 %100 = bitcast <4 x i32> addrspace(2)* %99 to <16 x i8> addrspace(2)* %101 = load <16 x i8>, <16 x i8> addrspace(2)* %100, align 16, !tbaa !0 %102 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5 %103 = bitcast <8 x i32> addrspace(2)* %102 to <32 x i8> addrspace(2)* %104 = load <32 x i8>, <32 x i8> addrspace(2)* %103, align 32, !tbaa !0 %105 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5 %106 = bitcast <4 x i32> addrspace(2)* %105 to <16 x i8> addrspace(2)* %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !0 %108 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %113 = bitcast float %108 to i32 %114 = bitcast float %109 to i32 %115 = insertelement <4 x i32> undef, i32 %113, i32 0 %116 = insertelement <4 x i32> %115, i32 %114, i32 1 %117 = insertelement <4 x i32> %116, i32 0, i32 2 %118 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %117, <32 x i8> %86, <16 x i8> %89, i32 2) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = extractelement <4 x float> %118, i32 2 %122 = fadd float %119, -5.000000e-01 %123 = fadd float %120, -5.000000e-01 %124 = fadd float %121, -5.000000e-01 %125 = fmul float %122, 2.000000e+00 %126 = fmul float %123, 2.000000e+00 %127 = fmul float %124, 2.000000e+00 %128 = bitcast float %108 to i32 %129 = bitcast float %109 to i32 %130 = insertelement <4 x i32> undef, i32 %128, i32 0 %131 = insertelement <4 x i32> %130, i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 0, i32 2 %133 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %132, <32 x i8> %92, <16 x i8> %95, i32 2) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = extractelement <4 x float> %133, i32 2 %137 = bitcast float %108 to i32 %138 = bitcast float %109 to i32 %139 = insertelement <4 x i32> undef, i32 %137, i32 0 %140 = insertelement <4 x i32> %139, i32 %138, i32 1 %141 = insertelement <4 x i32> %140, i32 0, i32 2 %142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %141, <32 x i8> %98, <16 x i8> %101, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = fmul float %108, 5.000000e-01 %145 = fmul float %109, 5.000000e-01 %146 = fmul float %24, %144 %147 = fmul float %25, %145 %148 = call float @llvm.AMDIL.fraction.(float %146) %149 = call float @llvm.AMDIL.fraction.(float %147) %150 = fsub float 1.000000e+00, %148 %151 = fsub float 1.000000e+00, %149 %152 = fmul float %26, %150 %153 = fmul float %27, %151 %154 = fsub float %144, %152 %155 = fsub float %145, %153 %156 = bitcast float %154 to i32 %157 = bitcast float %155 to i32 %158 = insertelement <2 x i32> undef, i32 %156, i32 0 %159 = insertelement <2 x i32> %158, i32 %157, i32 1 %160 = bitcast <8 x i32> %81 to <32 x i8> %161 = bitcast <4 x i32> %83 to <16 x i8> %162 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %160, <16 x i8> %161, i32 2) %163 = extractelement <4 x float> %162, i32 0 %164 = extractelement <4 x float> %162, i32 1 %165 = extractelement <4 x float> %162, i32 2 %166 = extractelement <4 x float> %162, i32 3 %167 = fadd float %26, %154 %168 = fadd float %155, 0.000000e+00 %169 = bitcast float %167 to i32 %170 = bitcast float %168 to i32 %171 = insertelement <2 x i32> undef, i32 %169, i32 0 %172 = insertelement <2 x i32> %171, i32 %170, i32 1 %173 = bitcast <8 x i32> %81 to <32 x i8> %174 = bitcast <4 x i32> %83 to <16 x i8> %175 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %172, <32 x i8> %173, <16 x i8> %174, i32 2) %176 = extractelement <4 x float> %175, i32 0 %177 = extractelement <4 x float> %175, i32 1 %178 = extractelement <4 x float> %175, i32 2 %179 = extractelement <4 x float> %175, i32 3 %180 = fadd float %154, 0.000000e+00 %181 = fadd float %27, %155 %182 = bitcast float %180 to i32 %183 = bitcast float %181 to i32 %184 = insertelement <2 x i32> undef, i32 %182, i32 0 %185 = insertelement <2 x i32> %184, i32 %183, i32 1 %186 = bitcast <8 x i32> %81 to <32 x i8> %187 = bitcast <4 x i32> %83 to <16 x i8> %188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %185, <32 x i8> %186, <16 x i8> %187, i32 2) %189 = extractelement <4 x float> %188, i32 0 %190 = extractelement <4 x float> %188, i32 1 %191 = extractelement <4 x float> %188, i32 2 %192 = extractelement <4 x float> %188, i32 3 %193 = fadd float %26, %154 %194 = fadd float %27, %155 %195 = bitcast float %193 to i32 %196 = bitcast float %194 to i32 %197 = insertelement <2 x i32> undef, i32 %195, i32 0 %198 = insertelement <2 x i32> %197, i32 %196, i32 1 %199 = bitcast <8 x i32> %81 to <32 x i8> %200 = bitcast <4 x i32> %83 to <16 x i8> %201 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %198, <32 x i8> %199, <16 x i8> %200, i32 2) %202 = extractelement <4 x float> %201, i32 0 %203 = extractelement <4 x float> %201, i32 1 %204 = extractelement <4 x float> %201, i32 2 %205 = extractelement <4 x float> %201, i32 3 %206 = fsub float 1.000000e+00, %150 %207 = fsub float 1.000000e+00, %151 %208 = fmul float %207, %206 %209 = fmul float %207, %150 %210 = fmul float %206, %151 %211 = fmul float %150, %151 %212 = fmul float %166, 2.550000e+02 %213 = fmul float %212, 6.250000e-02 %214 = call float @floor(float %213) %215 = fmul float %214, 1.600000e+01 %216 = fsub float %212, %215 %217 = fadd float %214, -8.000000e+00 %218 = fadd float %216, -8.000000e+00 %219 = fmul float %217, 0x3FC24924A0000000 %220 = fmul float %218, 0x3FC24924A0000000 %221 = fmul float %219, %219 %222 = fsub float 1.000000e+00, %221 %223 = fmul float %220, %220 %224 = fsub float %222, %223 %225 = call float @llvm.maxnum.f32(float %224, float 0.000000e+00) %226 = call float @llvm.sqrt.f32(float %225) %227 = fmul float %164, 0x3FEFE02000000000 %228 = fmul float %165, 0x3F6FE01F80000000 %229 = fadd float %227, %228 %230 = fsub float %229, %143 %231 = fadd float %230, 0x3F50000FA0000000 %232 = fsub float -0.000000e+00, %231 %233 = fcmp olt float %231, 0.000000e+00 %. = select i1 %233, float %232, float %231 %234 = fmul float %., 1.200000e+02 %235 = call float @llvm.AMDIL.clamp.(float %234, float 0.000000e+00, float 1.000000e+00) %236 = fsub float 1.000000e+00, %235 %237 = fmul float %219, %125 %238 = fmul float %220, %126 %239 = fadd float %238, %237 %240 = fmul float %226, %127 %241 = fadd float %239, %240 %242 = fadd float %241, 0xBFD3333340000000 %243 = fmul float %242, 0x3FF6DB6DC0000000 %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00) %245 = fmul float %236, %244 %246 = fmul float %208, %245 %247 = fmul float %246, %163 %248 = fmul float %179, 2.550000e+02 %249 = fmul float %248, 6.250000e-02 %250 = call float @floor(float %249) %251 = fmul float %250, 1.600000e+01 %252 = fsub float %248, %251 %253 = fadd float %250, -8.000000e+00 %254 = fadd float %252, -8.000000e+00 %255 = fmul float %253, 0x3FC24924A0000000 %256 = fmul float %254, 0x3FC24924A0000000 %257 = fmul float %255, %255 %258 = fsub float 1.000000e+00, %257 %259 = fmul float %256, %256 %260 = fsub float %258, %259 %261 = call float @llvm.maxnum.f32(float %260, float 0.000000e+00) %262 = call float @llvm.sqrt.f32(float %261) %263 = fmul float %177, 0x3FEFE02000000000 %264 = fmul float %178, 0x3F6FE01F80000000 %265 = fadd float %263, %264 %266 = fsub float %265, %143 %267 = fadd float %266, 0x3F50000FA0000000 %268 = fsub float -0.000000e+00, %267 %269 = fcmp olt float %267, 0.000000e+00 %temp60.0 = select i1 %269, float %268, float %267 %270 = fmul float %temp60.0, 1.200000e+02 %271 = call float @llvm.AMDIL.clamp.(float %270, float 0.000000e+00, float 1.000000e+00) %272 = fsub float 1.000000e+00, %271 %273 = fmul float %255, %125 %274 = fmul float %256, %126 %275 = fadd float %274, %273 %276 = fmul float %262, %127 %277 = fadd float %275, %276 %278 = fadd float %277, 0xBFD3333340000000 %279 = fmul float %278, 0x3FF6DB6DC0000000 %280 = call float @llvm.AMDIL.clamp.(float %279, float 0.000000e+00, float 1.000000e+00) %281 = fmul float %272, %280 %282 = fmul float %209, %281 %283 = fadd float %246, %282 %284 = fmul float %282, %176 %285 = fadd float %284, %247 %286 = fmul float %192, 2.550000e+02 %287 = fmul float %286, 6.250000e-02 %288 = call float @floor(float %287) %289 = fmul float %288, 1.600000e+01 %290 = fsub float %286, %289 %291 = fadd float %288, -8.000000e+00 %292 = fadd float %290, -8.000000e+00 %293 = fmul float %291, 0x3FC24924A0000000 %294 = fmul float %292, 0x3FC24924A0000000 %295 = fmul float %293, %293 %296 = fsub float 1.000000e+00, %295 %297 = fmul float %294, %294 %298 = fsub float %296, %297 %299 = call float @llvm.maxnum.f32(float %298, float 0.000000e+00) %300 = call float @llvm.sqrt.f32(float %299) %301 = fmul float %190, 0x3FEFE02000000000 %302 = fmul float %191, 0x3F6FE01F80000000 %303 = fadd float %301, %302 %304 = fsub float %303, %143 %305 = fadd float %304, 0x3F50000FA0000000 %306 = fsub float -0.000000e+00, %305 %307 = fcmp olt float %305, 0.000000e+00 %.80 = select i1 %307, float %306, float %305 %308 = fmul float %.80, 1.200000e+02 %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00) %310 = fsub float 1.000000e+00, %309 %311 = fmul float %293, %125 %312 = fmul float %294, %126 %313 = fadd float %312, %311 %314 = fmul float %300, %127 %315 = fadd float %313, %314 %316 = fadd float %315, 0xBFD3333340000000 %317 = fmul float %316, 0x3FF6DB6DC0000000 %318 = call float @llvm.AMDIL.clamp.(float %317, float 0.000000e+00, float 1.000000e+00) %319 = fmul float %310, %318 %320 = fmul float %210, %319 %321 = fadd float %283, %320 %322 = fmul float %320, %189 %323 = fadd float %322, %285 %324 = fmul float %205, 2.550000e+02 %325 = fmul float %324, 6.250000e-02 %326 = call float @floor(float %325) %327 = fmul float %326, 1.600000e+01 %328 = fsub float %324, %327 %329 = fadd float %326, -8.000000e+00 %330 = fadd float %328, -8.000000e+00 %331 = fmul float %329, 0x3FC24924A0000000 %332 = fmul float %330, 0x3FC24924A0000000 %333 = fmul float %331, %331 %334 = fsub float 1.000000e+00, %333 %335 = fmul float %332, %332 %336 = fsub float %334, %335 %337 = call float @llvm.maxnum.f32(float %336, float 0.000000e+00) %338 = call float @llvm.sqrt.f32(float %337) %339 = fmul float %203, 0x3FEFE02000000000 %340 = fmul float %204, 0x3F6FE01F80000000 %341 = fadd float %339, %340 %342 = fsub float %341, %143 %343 = fadd float %342, 0x3F50000FA0000000 %344 = fsub float -0.000000e+00, %343 %345 = fcmp olt float %343, 0.000000e+00 %temp52.0 = select i1 %345, float %344, float %343 %346 = fmul float %temp52.0, 1.200000e+02 %347 = call float @llvm.AMDIL.clamp.(float %346, float 0.000000e+00, float 1.000000e+00) %348 = fsub float 1.000000e+00, %347 %349 = fmul float %331, %125 %350 = fmul float %332, %126 %351 = fadd float %350, %349 %352 = fmul float %338, %127 %353 = fadd float %351, %352 %354 = fadd float %353, 0xBFD3333340000000 %355 = fmul float %354, 0x3FF6DB6DC0000000 %356 = call float @llvm.AMDIL.clamp.(float %355, float 0.000000e+00, float 1.000000e+00) %357 = fmul float %348, %356 %358 = fmul float %211, %357 %359 = fadd float %321, %358 %360 = fmul float %208, %163 %361 = fmul float %209, %176 %362 = fadd float %360, %361 %363 = fmul float %210, %189 %364 = fadd float %362, %363 %365 = fmul float %211, %202 %366 = fadd float %364, %365 %367 = fmul float %358, %202 %368 = fadd float %367, %323 %369 = fdiv float 1.000000e+00, %359 %370 = fmul float %368, %369 %371 = fcmp olt float %359, 0x3E80C6F7A0000000 %.81 = select i1 %371, float %366, float %370 %372 = fmul float %110, %143 %373 = fadd float %372, %30 %374 = fmul float %111, %143 %375 = fadd float %374, %31 %376 = fmul float %112, %143 %377 = fadd float %376, %32 %378 = bitcast float %108 to i32 %379 = bitcast float %109 to i32 %380 = insertelement <4 x i32> undef, i32 %378, i32 0 %381 = insertelement <4 x i32> %380, i32 %379, i32 1 %382 = insertelement <4 x i32> %381, i32 0, i32 2 %383 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %104, <16 x i8> %107, i32 2) %384 = extractelement <4 x float> %383, i32 0 %385 = extractelement <4 x float> %383, i32 1 %386 = extractelement <4 x float> %383, i32 2 %387 = fsub float %373, %30 %388 = fsub float %375, %31 %389 = fsub float %377, %32 %390 = fmul float %387, %387 %391 = fmul float %388, %388 %392 = fadd float %391, %390 %393 = fmul float %389, %389 %394 = fadd float %392, %393 %395 = call float @llvm.AMDGPU.rsq.clamped.f32(float %394) %396 = fmul float %387, %395 %397 = fmul float %388, %395 %398 = fmul float %389, %395 %399 = fmul float %396, %125 %400 = fsub float -0.000000e+00, %399 %401 = fmul float %397, %126 %402 = fsub float %400, %401 %403 = fmul float %398, %127 %404 = fsub float %402, %403 %405 = fmul float %404, %125 %406 = fmul float %404, %126 %407 = fmul float %404, %127 %408 = fmul float %405, 2.000000e+00 %409 = fmul float %406, 2.000000e+00 %410 = fmul float %407, 2.000000e+00 %411 = fsub float -0.000000e+00, %408 %412 = fsub float %411, %396 %413 = fsub float -0.000000e+00, %409 %414 = fsub float %413, %397 %415 = fsub float -0.000000e+00, %410 %416 = fsub float %415, %398 %417 = fmul float %41, %412 %418 = fmul float %42, %414 %419 = fadd float %418, %417 %420 = fmul float %43, %416 %421 = fadd float %419, %420 %422 = call float @llvm.AMDIL.clamp.(float %421, float 0.000000e+00, float 1.000000e+00) %423 = call float @llvm.pow.f32(float %422, float %44) %424 = fmul float %423, %384 %425 = fmul float %424, %45 %426 = fadd float %425, %134 %427 = fmul float %424, %46 %428 = fadd float %427, %135 %429 = fmul float %424, %47 %430 = fadd float %429, %136 %431 = fmul float %59, %373 %432 = fmul float %60, %375 %433 = fadd float %432, %431 %434 = fmul float %61, %377 %435 = fadd float %434, %433 %436 = fadd float %435, %62 %437 = fmul float %63, %373 %438 = fmul float %64, %375 %439 = fadd float %438, %437 %440 = fmul float %65, %377 %441 = fadd float %440, %439 %442 = fadd float %441, %66 %443 = fmul float %67, %373 %444 = fmul float %68, %375 %445 = fadd float %444, %443 %446 = fmul float %69, %377 %447 = fadd float %446, %445 %448 = fadd float %447, %70 %449 = fmul float %71, %373 %450 = fmul float %72, %375 %451 = fadd float %450, %449 %452 = fmul float %73, %377 %453 = fadd float %452, %451 %454 = fadd float %453, %74 %455 = fdiv float 1.000000e+00, %454 %456 = fmul float %436, %455 %457 = fmul float %442, %455 %458 = fmul float %448, %455 %459 = fsub float -0.000000e+00, %458 %460 = call float @llvm.AMDIL.clamp.(float %459, float 0.000000e+00, float 1.000000e+00) %461 = fadd float %456, %75 %462 = fadd float %457, %75 %463 = bitcast float %460 to i32 %464 = bitcast float %461 to i32 %465 = bitcast float %462 to i32 %466 = insertelement <4 x i32> undef, i32 %463, i32 0 %467 = insertelement <4 x i32> %466, i32 %464, i32 1 %468 = insertelement <4 x i32> %467, i32 %465, i32 2 %469 = insertelement <4 x i32> %468, i32 0, i32 3 %470 = bitcast <8 x i32> %77 to <32 x i8> %471 = bitcast <4 x i32> %79 to <16 x i8> %472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 7) %473 = extractelement <4 x float> %472, i32 0 %474 = fsub float %456, %75 %475 = fadd float %75, %457 %476 = bitcast float %460 to i32 %477 = bitcast float %474 to i32 %478 = bitcast float %475 to i32 %479 = insertelement <4 x i32> undef, i32 %476, i32 0 %480 = insertelement <4 x i32> %479, i32 %477, i32 1 %481 = insertelement <4 x i32> %480, i32 %478, i32 2 %482 = insertelement <4 x i32> %481, i32 0, i32 3 %483 = bitcast <8 x i32> %77 to <32 x i8> %484 = bitcast <4 x i32> %79 to <16 x i8> %485 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %482, <32 x i8> %483, <16 x i8> %484, i32 7) %486 = extractelement <4 x float> %485, i32 0 %487 = fadd float %456, %75 %488 = fsub float %457, %75 %489 = bitcast float %460 to i32 %490 = bitcast float %487 to i32 %491 = bitcast float %488 to i32 %492 = insertelement <4 x i32> undef, i32 %489, i32 0 %493 = insertelement <4 x i32> %492, i32 %490, i32 1 %494 = insertelement <4 x i32> %493, i32 %491, i32 2 %495 = insertelement <4 x i32> %494, i32 0, i32 3 %496 = bitcast <8 x i32> %77 to <32 x i8> %497 = bitcast <4 x i32> %79 to <16 x i8> %498 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 7) %499 = extractelement <4 x float> %498, i32 0 %500 = fsub float %456, %75 %501 = fsub float %457, %75 %502 = bitcast float %460 to i32 %503 = bitcast float %500 to i32 %504 = bitcast float %501 to i32 %505 = insertelement <4 x i32> undef, i32 %502, i32 0 %506 = insertelement <4 x i32> %505, i32 %503, i32 1 %507 = insertelement <4 x i32> %506, i32 %504, i32 2 %508 = insertelement <4 x i32> %507, i32 0, i32 3 %509 = bitcast <8 x i32> %77 to <32 x i8> %510 = bitcast <4 x i32> %79 to <16 x i8> %511 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 7) %512 = extractelement <4 x float> %511, i32 0 %513 = fadd float %75, %456 %514 = fadd float %457, 0.000000e+00 %515 = bitcast float %460 to i32 %516 = bitcast float %513 to i32 %517 = bitcast float %514 to i32 %518 = insertelement <4 x i32> undef, i32 %515, i32 0 %519 = insertelement <4 x i32> %518, i32 %516, i32 1 %520 = insertelement <4 x i32> %519, i32 %517, i32 2 %521 = insertelement <4 x i32> %520, i32 0, i32 3 %522 = bitcast <8 x i32> %77 to <32 x i8> %523 = bitcast <4 x i32> %79 to <16 x i8> %524 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %521, <32 x i8> %522, <16 x i8> %523, i32 7) %525 = extractelement <4 x float> %524, i32 0 %526 = fsub float %456, %75 %527 = fadd float %457, 0.000000e+00 %528 = bitcast float %460 to i32 %529 = bitcast float %526 to i32 %530 = bitcast float %527 to i32 %531 = insertelement <4 x i32> undef, i32 %528, i32 0 %532 = insertelement <4 x i32> %531, i32 %529, i32 1 %533 = insertelement <4 x i32> %532, i32 %530, i32 2 %534 = insertelement <4 x i32> %533, i32 0, i32 3 %535 = bitcast <8 x i32> %77 to <32 x i8> %536 = bitcast <4 x i32> %79 to <16 x i8> %537 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %534, <32 x i8> %535, <16 x i8> %536, i32 7) %538 = extractelement <4 x float> %537, i32 0 %539 = fadd float %456, 0.000000e+00 %540 = fsub float %457, %75 %541 = bitcast float %460 to i32 %542 = bitcast float %539 to i32 %543 = bitcast float %540 to i32 %544 = insertelement <4 x i32> undef, i32 %541, i32 0 %545 = insertelement <4 x i32> %544, i32 %542, i32 1 %546 = insertelement <4 x i32> %545, i32 %543, i32 2 %547 = insertelement <4 x i32> %546, i32 0, i32 3 %548 = bitcast <8 x i32> %77 to <32 x i8> %549 = bitcast <4 x i32> %79 to <16 x i8> %550 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 7) %551 = extractelement <4 x float> %550, i32 0 %552 = fadd float %456, 0.000000e+00 %553 = fadd float %75, %457 %554 = bitcast float %460 to i32 %555 = bitcast float %552 to i32 %556 = bitcast float %553 to i32 %557 = insertelement <4 x i32> undef, i32 %554, i32 0 %558 = insertelement <4 x i32> %557, i32 %555, i32 1 %559 = insertelement <4 x i32> %558, i32 %556, i32 2 %560 = insertelement <4 x i32> %559, i32 0, i32 3 %561 = bitcast <8 x i32> %77 to <32 x i8> %562 = bitcast <4 x i32> %79 to <16 x i8> %563 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %560, <32 x i8> %561, <16 x i8> %562, i32 7) %564 = extractelement <4 x float> %563, i32 0 %565 = fsub float 1.000000e+00, %.81 %566 = call float @llvm.pow.f32(float %565, float 0x4000CCCCC0000000) %567 = fmul float %51, %52 %568 = fmul float %51, %53 %569 = fmul float %51, %54 %570 = fmul float %55, %56 %571 = fmul float %55, %57 %572 = fmul float %55, %58 %573 = bitcast float %460 to i32 %574 = bitcast float %456 to i32 %575 = bitcast float %457 to i32 %576 = insertelement <4 x i32> undef, i32 %573, i32 0 %577 = insertelement <4 x i32> %576, i32 %574, i32 1 %578 = insertelement <4 x i32> %577, i32 %575, i32 2 %579 = insertelement <4 x i32> %578, i32 0, i32 3 %580 = bitcast <8 x i32> %77 to <32 x i8> %581 = bitcast <4 x i32> %79 to <16 x i8> %582 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %579, <32 x i8> %580, <16 x i8> %581, i32 7) %583 = extractelement <4 x float> %582, i32 0 %584 = fmul float %473, 0x3FB32D10E0000000 %585 = fmul float %486, 0x3FB32D10E0000000 %586 = fadd float %584, %585 %587 = fmul float %499, 0x3FB32D10E0000000 %588 = fadd float %586, %587 %589 = fmul float %512, 0x3FB32D10E0000000 %590 = fadd float %588, %589 %591 = fmul float %525, 0x3FBFA3FCC0000000 %592 = fmul float %538, 0x3FBFA3FCC0000000 %593 = fadd float %591, %592 %594 = fmul float %551, 0x3FBFA3FCC0000000 %595 = fadd float %593, %594 %596 = fmul float %564, 0x3FBFA3FCC0000000 %597 = fadd float %595, %596 %598 = fadd float %590, %597 %599 = fmul float %583, 0x3FCA5DFA80000000 %600 = fadd float %599, %598 %601 = fmul float %35, %125 %602 = fsub float -0.000000e+00, %601 %603 = fmul float %36, %126 %604 = fsub float %602, %603 %605 = fmul float %37, %127 %606 = fsub float %604, %605 %607 = call float @llvm.AMDIL.clamp.(float %606, float 0.000000e+00, float 1.000000e+00) %608 = fmul float %600, %607 %609 = fsub float 1.000000e+00, %608 %610 = fmul float %609, %566 %611 = fmul float %570, 0x3FCB333340000000 %612 = fmul float %571, 0x3FE6E48E80000000 %613 = fadd float %612, %611 %614 = fmul float %572, 0x3FB2752540000000 %615 = fadd float %613, %614 %616 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %570) %617 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %571) %618 = call float @llvm.AMDGPU.lrp(float %386, float %615, float %572) %619 = fmul float %48, %125 %620 = fmul float %49, %126 %621 = fadd float %620, %619 %622 = fmul float %50, %127 %623 = fadd float %621, %622 %624 = call float @llvm.AMDIL.clamp.(float %623, float 0.000000e+00, float 1.000000e+00) %625 = fmul float %566, %624 %626 = fmul float %567, 0x3FCB333340000000 %627 = fmul float %568, 0x3FE6E48E80000000 %628 = fadd float %627, %626 %629 = fmul float %569, 0x3FB2752540000000 %630 = fadd float %628, %629 %631 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %567) %632 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %568) %633 = call float @llvm.AMDGPU.lrp(float %386, float %630, float %569) %634 = call float @llvm.pow.f32(float %565, float 0x3FFD9999A0000000) %635 = call float @llvm.AMDGPU.lrp(float %634, float 1.000000e+00, float 0x3FDCCCCCC0000000) %636 = fmul float %608, %635 %637 = fmul float %636, %38 %638 = fmul float %636, %39 %639 = fmul float %636, %40 %640 = fmul float %625, %631 %641 = fadd float %640, %637 %642 = fmul float %625, %632 %643 = fadd float %642, %638 %644 = fmul float %625, %633 %645 = fadd float %644, %639 %646 = fmul float %610, %616 %647 = fadd float %646, %641 %648 = fmul float %610, %617 %649 = fadd float %648, %643 %650 = fmul float %610, %618 %651 = fadd float %650, %645 %652 = fmul float %426, %647 %653 = fmul float %428, %649 %654 = fmul float %430, %651 %655 = call float @llvm.AMDGPU.lrp(float %385, float %426, float %652) %656 = call float @llvm.AMDGPU.lrp(float %385, float %428, float %653) %657 = call float @llvm.AMDGPU.lrp(float %385, float %430, float %654) %658 = call float @llvm.AMDIL.clamp.(float %424, float 0.000000e+00, float 1.000000e+00) %659 = fmul float %658, %386 %660 = call i32 @llvm.SI.packf16(float %655, float %656) %661 = bitcast i32 %660 to float %662 = call i32 @llvm.SI.packf16(float %657, float %659) %663 = bitcast i32 %662 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %661, float %663, float %661, float %663) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_mov_b32_e32 v19, 0x417f0000 ; 7E2602FF 417F0000 v_mov_b32_e32 v17, 0xc1800000 ; 7E2202FF C1800000 v_mov_b32_e32 v18, 0x437f0000 ; 7E2402FF 437F0000 v_mov_b32_e32 v16, 0xc1000000 ; 7E2002FF C1000000 v_mov_b32_e32 v15, 0x3e124925 ; 7E1E02FF 3E124925 v_mov_b32_e32 v13, 0x3b7f00fc ; 7E1A02FF 3B7F00FC v_mov_b32_e32 v14, 0x3f7f0100 ; 7E1C02FF 3F7F0100 v_mov_b32_e32 v11, 0x3a80007d ; 7E1602FF 3A80007D v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_mov_b32_e32 v10, 0x42f00000 ; 7E1402FF 42F00000 v_mov_b32_e32 v9, 0xbe99999a ; 7E1202FF BE99999A v_mov_b32_e32 v8, 0x3fb6db6e ; 7E1002FF 3FB6DB6E v_mov_b32_e32 v7, 0x340637bd ; 7E0E02FF 340637BD v_mov_b32_e32 v3, 0x3d996887 ; 7E0602FF 3D996887 v_mov_b32_e32 v2, 0x3dfd1fe6 ; 7E0402FF 3DFD1FE6 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[24:27], s[2:3], 0x10 ; C08C0310 s_load_dwordx4 s[8:11], s[2:3], 0x14 ; C0840314 s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500 s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510 s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514 v_interp_p1_f32 v24, v0, 0, 0, [m0] ; C8600000 v_interp_p2_f32 v24, [v24], v1, 0, 0, [m0] ; C8610001 v_interp_p1_f32 v25, v0, 1, 0, [m0] ; C8640100 v_interp_p2_f32 v25, [v25], v1, 1, 0, [m0] ; C8650101 v_mov_b32_e32 v26, 0 ; 7E340280 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s28, s[12:15], 0x0 ; C20E0D00 s_buffer_load_dword s29, s[12:15], 0x1 ; C20E8D01 s_buffer_load_dword s30, s[12:15], 0x4 ; C20F0D04 s_buffer_load_dword s56, s[12:15], 0x5 ; C21C0D05 s_buffer_load_dword s13, s[24:27], 0x4c ; C206994C s_buffer_load_dword s12, s[24:27], 0x4d ; C206194D s_buffer_load_dword s5, s[24:27], 0x4e ; C202994E s_buffer_load_dword s3, s[8:11], 0x0 ; C2018900 s_buffer_load_dword s4, s[8:11], 0x1 ; C2020901 s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902 s_buffer_load_dword s0, s[8:11], 0x4 ; C2000904 s_buffer_load_dword s1, s[8:11], 0x5 ; C2008905 v_interp_p1_f32 v20, v0, 0, 1, [m0] ; C8500400 v_interp_p2_f32 v20, [v20], v1, 0, 1, [m0] ; C8510401 v_interp_p1_f32 v21, v0, 1, 1, [m0] ; C8540500 v_interp_p2_f32 v21, [v21], v1, 1, 1, [m0] ; C8550501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_buffer_load_dword s14, s[8:11], 0x31 ; C2070931 s_buffer_load_dword s15, s[8:11], 0x32 ; C2078932 v_mul_f32_e32 v1, 0.5, v24 ; 100230F0 v_mul_f32_e32 v4, 0.5, v25 ; 100832F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s28, v1 ; 100A021C v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v1, s28, v1, -v5 ; D2820001 8416021C v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 s_buffer_load_dword s24, s[8:11], 0x33 ; C20C0933 v_mul_f32_e32 v5, s29, v4 ; 100A081D v_floor_f32_e32 v5, v5 ; 7E0A4905 v_mad_f32 v4, s29, v4, -v5 ; D2820004 8416081D v_sub_f32_e32 v22, 1.0, v4 ; 082C08F2 s_buffer_load_dword s25, s[8:11], 0x34 ; C20C8934 v_mul_f32_e32 v4, s30, v1 ; 1008021E v_mul_f32_e32 v5, s56, v22 ; 100A2C38 v_mad_f32 v27, 0.5, v24, -v4 ; D282001B 841230F0 v_mad_f32 v28, 0.5, v25, -v5 ; D282001C 841632F0 v_add_f32_e32 v29, s30, v27 ; 063A361E v_mov_b32_e32 v30, v28 ; 7E3C031C s_buffer_load_dword s28, s[8:11], 0x35 ; C20E0935 s_buffer_load_dword s27, s[8:11], 0x36 ; C20D8936 s_buffer_load_dword s26, s[8:11], 0x37 ; C20D0937 s_buffer_load_dword s29, s[8:11], 0x38 ; C20E8938 s_buffer_load_dword s30, s[8:11], 0x39 ; C20F0939 s_buffer_load_dword s31, s[8:11], 0x3a ; C20F893A s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx8 s[68:75], s[6:7], 0x18 ; C0E20718 s_load_dwordx8 s[76:83], s[6:7], 0x20 ; C0E60720 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[60:67], s[52:55] ; F0900700 01AF1F18 image_sample_l v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[68:75], s[48:51] ; F0900700 01910418 image_sample_l v34, 1, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[76:83], s[32:35] ; F0900100 01132218 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[40:47], s[36:39] ; F0800F00 012A231B s_buffer_load_dword s33, s[8:11], 0x3c ; C210893C s_buffer_load_dword s32, s[8:11], 0x3d ; C210093D v_add_f32_e32 v28, s56, v28 ; 06383838 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[40:47], s[36:39] ; F0800F00 012A271D v_mov_b32_e32 v30, v28 ; 7E3C031C image_sample v[43:46], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[40:47], s[36:39] ; F0800F00 012A2B1B s_buffer_load_dword s34, s[8:11], 0x3e ; C211093E s_buffer_load_dword s35, s[8:11], 0x3f ; C211893F image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[40:47], s[36:39] ; F0800F00 012A1B1D s_waitcnt vmcnt(4) ; BF8C0774 v_mad_f32 v20, v20, v34, s13 ; D2820014 00364514 v_mad_f32 v21, v21, v34, s12 ; D2820015 00324515 s_waitcnt vmcnt(3) lgkmcnt(0) ; BF8C0073 v_mul_f32_e32 v23, s33, v20 ; 102E2821 v_mac_f32_e32 v23, s32, v21 ; 3E2E2A20 s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B v_mad_f32 v0, v0, v34, s5 ; D2820000 00164500 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v23, s34, v0 ; 3E2E0022 v_add_f32_e32 v23, s35, v23 ; 062E2E23 s_buffer_load_dword s33, s[8:11], 0x30 ; C2108930 v_rcp_f32_e32 v47, v23 ; 7E5E5517 v_mul_f32_e32 v23, s29, v20 ; 102E281D v_mac_f32_e32 v23, s30, v21 ; 3E2E2A1E v_mac_f32_e32 v23, s31, v0 ; 3E2E001F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v23, s32, v23 ; 062E2E20 v_mul_f32_e32 v23, v47, v23 ; 102E2F2F image_sample_l v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[20:23] ; F0900700 00A93018 v_add_f32_e64 v23, 0, -v23 clamp ; D2060817 40022E80 s_buffer_load_dword s20, s[8:11], 0x40 ; C20A0940 v_mul_f32_e32 v24, s33, v20 ; 10302821 v_mac_f32_e32 v24, s14, v21 ; 3E302A0E v_mac_f32_e32 v24, s15, v0 ; 3E30000F v_add_f32_e32 v51, s24, v24 ; 06663018 v_mul_f32_e32 v24, s25, v20 ; 10302819 v_mac_f32_e32 v24, s28, v21 ; 3E302A1C s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mac_f32_e32 v24, s27, v0 ; 3E30001B v_add_f32_e32 v52, s26, v24 ; 0668301A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v24, v51, v47, s20 ; D2820018 00525F33 v_mad_f32 v25, v52, v47, s20 ; D2820019 00525F34 v_mad_f32 v53, v51, v47, -s20 ; D2820035 80525F33 v_mov_b32_e32 v54, v23 ; 7E6C0317 v_mov_b32_e32 v55, v24 ; 7E6E0318 v_mov_b32_e32 v56, v25 ; 7E700319 v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v55, v53 ; 7E6E0335 v_mad_f32 v53, v52, v47, -s20 ; D2820035 80525F34 v_mov_b32_e32 v58, v23 ; 7E740317 v_mov_b32_e32 v59, v24 ; 7E760318 v_mov_b32_e32 v60, v25 ; 7E780319 v_mov_b32_e32 v61, v26 ; 7E7A031A v_mov_b32_e32 v56, v25 ; 7E700319 v_mov_b32_e32 v60, v53 ; 7E780335 v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v61, v26 ; 7E7A031A image_sample_c_l v62, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[28:35], s[16:19] ; F0B00100 00873E17 image_sample_c_l v63, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[28:35], s[16:19] ; F0B00100 00873F36 image_sample_c_l v58, 1, 0, 0, 0, 0, 0, 0, 0, v[58:61], s[28:35], s[16:19] ; F0B00100 00873A3A v_mul_f32_e32 v52, v47, v52 ; 1068692F v_mov_b32_e32 v56, v53 ; 7E700335 v_mov_b32_e32 v64, v23 ; 7E800317 v_mov_b32_e32 v65, v24 ; 7E820318 v_mov_b32_e32 v66, v25 ; 7E840319 v_mov_b32_e32 v67, v26 ; 7E86031A v_mul_f32_e32 v24, v47, v51 ; 1030672F v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v66, v52 ; 7E840334 image_sample_c_l v47, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[28:35], s[16:19] ; F0B00100 00872F36 v_mov_b32_e32 v56, v52 ; 7E700334 v_mov_b32_e32 v67, v26 ; 7E86031A v_mov_b32_e32 v57, v26 ; 7E72031A v_mov_b32_e32 v68, v23 ; 7E880317 v_mov_b32_e32 v69, v24 ; 7E8A0318 v_mov_b32_e32 v70, v25 ; 7E8C0319 v_mov_b32_e32 v71, v26 ; 7E8E031A image_sample_c_l v51, 1, 0, 0, 0, 0, 0, 0, 0, v[64:67], s[28:35], s[16:19] ; F0B00100 00873340 v_mov_b32_e32 v70, v53 ; 7E8C0335 image_sample_c_l v53, 1, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[28:35], s[16:19] ; F0B00100 00873536 v_mov_b32_e32 v71, v26 ; 7E8E031A image_sample_c_l v54, 1, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[28:35], s[16:19] ; F0B00100 00873644 image_sample_c_l v55, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[28:35], s[16:19] ; F0B00100 00873717 v_mov_b32_e32 v25, v52 ; 7E320334 image_sample_c_l v23, 1, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[28:35], s[16:19] ; F0B00100 00871717 v_add_f32_e32 v24, -0.5, v31 ; 06303EF1 v_add_f32_e32 v25, -0.5, v32 ; 063240F1 v_add_f32_e32 v26, -0.5, v33 ; 063442F1 v_mad_f32 v31, v37, v13, -v34 ; D282001F 848A1B25 v_mac_f32_e32 v31, v14, v36 ; 3E3E490E v_add_f32_e32 v31, v11, v31 ; 063E3F0B v_cmp_gt_f32_e32 vcc, 0, v31 ; 7C083E80 v_xor_b32_e32 v32, v31, v12 ; 3A40191F v_cndmask_b32_e32 v31, v31, v32 ; 003E411F v_add_f32_e32 v24, v24, v24 ; 06303118 v_add_f32_e32 v25, v25, v25 ; 06323319 v_mul_f32_e32 v32, v19, v38 ; 10404D13 v_floor_f32_e32 v32, v32 ; 7E404920 v_mul_f32_e32 v33, v17, v32 ; 10424111 v_mac_f32_e32 v33, v18, v38 ; 3E424D12 v_add_f32_e32 v32, v16, v32 ; 06404110 v_add_f32_e32 v33, v16, v33 ; 06424310 v_mul_f32_e32 v32, v15, v32 ; 1040410F v_mul_f32_e32 v33, v15, v33 ; 1042430F v_mad_f32 v36, -v32, v32, 1.0 ; D2820024 23CA4120 v_mad_f32 v36, -v33, v33, v36 ; D2820024 24924321 v_mul_f32_e32 v32, v24, v32 ; 10404118 v_mac_f32_e32 v32, v25, v33 ; 3E404319 v_add_f32_e32 v26, v26, v26 ; 0634351A v_max_f32_e32 v33, 0, v36 ; 20424880 v_sqrt_f32_e32 v33, v33 ; 7E426721 v_mac_f32_e32 v32, v26, v33 ; 3E40431A v_mul_f32_e32 v31, v10, v31 ; 103E3F0A v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 v_sub_f32_e32 v31, 1.0, v31 ; 083E3EF2 v_add_f32_e32 v32, v9, v32 ; 06404109 v_mul_f32_e32 v32, v8, v32 ; 10404108 v_add_f32_e64 v32, 0, v32 clamp ; D2060820 00024080 v_mul_f32_e32 v31, v32, v31 ; 103E3F20 v_mad_f32 v32, v41, v13, -v34 ; D2820020 848A1B29 v_mac_f32_e32 v32, v14, v40 ; 3E40510E v_add_f32_e32 v32, v11, v32 ; 0640410B v_cmp_gt_f32_e32 vcc, 0, v32 ; 7C084080 v_xor_b32_e32 v33, v32, v12 ; 3A421920 v_cndmask_b32_e32 v32, v32, v33 ; 00404320 v_mul_f32_e32 v33, v19, v42 ; 10425513 v_floor_f32_e32 v33, v33 ; 7E424921 v_mul_f32_e32 v36, v17, v33 ; 10484311 v_mac_f32_e32 v36, v18, v42 ; 3E485512 v_add_f32_e32 v33, v16, v33 ; 06424310 v_add_f32_e32 v36, v16, v36 ; 06484910 v_mul_f32_e32 v33, v15, v33 ; 1042430F v_mul_f32_e32 v36, v15, v36 ; 1048490F v_mad_f32 v37, -v33, v33, 1.0 ; D2820025 23CA4321 v_mad_f32 v37, -v36, v36, v37 ; D2820025 24964924 v_mul_f32_e32 v33, v24, v33 ; 10424318 v_mac_f32_e32 v33, v25, v36 ; 3E424919 v_max_f32_e32 v36, 0, v37 ; 20484A80 v_sqrt_f32_e32 v36, v36 ; 7E486724 v_mac_f32_e32 v33, v26, v36 ; 3E42491A v_mul_f32_e32 v32, v10, v32 ; 1040410A v_add_f32_e64 v32, 0, v32 clamp ; D2060820 00024080 v_sub_f32_e32 v32, 1.0, v32 ; 084040F2 v_add_f32_e32 v33, v9, v33 ; 06424309 v_mul_f32_e32 v33, v8, v33 ; 10424308 v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 v_mul_f32_e32 v32, v33, v32 ; 10404121 v_sub_f32_e32 v33, 1.0, v1 ; 084202F2 v_sub_f32_e32 v36, 1.0, v22 ; 08482CF2 v_mul_f32_e32 v37, v33, v36 ; 104A4921 v_mul_f32_e32 v36, v1, v36 ; 10484901 v_mul_f32_e32 v38, v31, v37 ; 104C4B1F v_mul_f32_e32 v38, v35, v38 ; 104C4D23 v_mul_f32_e32 v32, v32, v36 ; 10404920 v_mac_f32_e32 v38, v39, v32 ; 3E4C4127 v_mul_f32_e32 v36, v39, v36 ; 10484927 v_mac_f32_e32 v36, v35, v37 ; 3E484B23 v_mad_f32 v35, v45, v13, -v34 ; D2820023 848A1B2D v_mac_f32_e32 v35, v14, v44 ; 3E46590E v_add_f32_e32 v35, v11, v35 ; 0646470B v_cmp_gt_f32_e32 vcc, 0, v35 ; 7C084680 v_xor_b32_e32 v39, v35, v12 ; 3A4E1923 v_cndmask_b32_e32 v35, v35, v39 ; 00464F23 v_mul_f32_e32 v39, v19, v46 ; 104E5D13 v_floor_f32_e32 v39, v39 ; 7E4E4927 v_mul_f32_e32 v40, v17, v39 ; 10504F11 v_mac_f32_e32 v40, v18, v46 ; 3E505D12 v_add_f32_e32 v39, v16, v39 ; 064E4F10 v_add_f32_e32 v40, v16, v40 ; 06505110 v_mul_f32_e32 v39, v15, v39 ; 104E4F0F v_mul_f32_e32 v40, v15, v40 ; 1050510F v_mad_f32 v41, -v39, v39, 1.0 ; D2820029 23CA4F27 v_mad_f32 v41, -v40, v40, v41 ; D2820029 24A65128 v_mul_f32_e32 v39, v24, v39 ; 104E4F18 v_mac_f32_e32 v39, v25, v40 ; 3E4E5119 v_max_f32_e32 v40, 0, v41 ; 20505280 v_sqrt_f32_e32 v40, v40 ; 7E506728 v_mac_f32_e32 v39, v26, v40 ; 3E4E511A v_mul_f32_e32 v35, v10, v35 ; 1046470A v_add_f32_e64 v35, 0, v35 clamp ; D2060823 00024680 v_sub_f32_e32 v35, 1.0, v35 ; 084646F2 v_add_f32_e32 v39, v9, v39 ; 064E4F09 v_mul_f32_e32 v39, v8, v39 ; 104E4F08 v_add_f32_e64 v39, 0, v39 clamp ; D2060827 00024E80 v_mul_f32_e32 v35, v39, v35 ; 10464727 v_mul_f32_e32 v33, v22, v33 ; 10424316 v_mul_f32_e32 v39, v35, v33 ; 104E4323 v_mac_f32_e32 v38, v43, v39 ; 3E4C4F2B v_mac_f32_e32 v36, v43, v33 ; 3E48432B v_mul_f32_e32 v1, v22, v1 ; 10020316 v_mul_f32_e32 v19, v19, v30 ; 10263D13 v_floor_f32_e32 v19, v19 ; 7E264913 v_mul_f32_e32 v17, v17, v19 ; 10222711 v_mac_f32_e32 v17, v18, v30 ; 3E223D12 v_add_f32_e32 v18, v16, v19 ; 06242710 v_add_f32_e32 v16, v16, v17 ; 06202310 v_mul_f32_e32 v17, v15, v18 ; 1022250F v_mul_f32_e32 v15, v15, v16 ; 101E210F v_mad_f32 v13, v29, v13, -v34 ; D282000D 848A1B1D v_mac_f32_e32 v13, v14, v28 ; 3E1A390E v_add_f32_e32 v11, v11, v13 ; 06161B0B v_xor_b32_e32 v12, v11, v12 ; 3A18190B v_cmp_gt_f32_e32 vcc, 0, v11 ; 7C081680 v_cndmask_b32_e32 v11, v11, v12 ; 0016190B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mad_f32 v11, -v17, v17, 1.0 ; D282000B 23CA2311 v_mad_f32 v11, -v15, v15, v11 ; D282000B 242E1F0F v_mul_f32_e32 v12, v24, v17 ; 10182318 v_mac_f32_e32 v12, v25, v15 ; 3E181F19 v_max_f32_e32 v11, 0, v11 ; 20161680 v_sqrt_f32_e32 v11, v11 ; 7E16670B v_mac_f32_e32 v12, v26, v11 ; 3E18171A v_add_f32_e32 v9, v9, v12 ; 06121909 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_add_f32_e64 v9, 0, v10 clamp ; D2060809 00021480 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mac_f32_e32 v38, v27, v9 ; 3E4C131B v_mac_f32_e32 v36, v27, v1 ; 3E48031B v_mac_f32_e32 v32, v31, v37 ; 3E404B1F v_subrev_f32_e32 v9, s13, v20 ; 0A12280D v_subrev_f32_e32 v10, s12, v21 ; 0A142A0C v_mul_f32_e32 v11, v9, v9 ; 10161309 v_mac_f32_e32 v11, v10, v10 ; 3E16150A v_subrev_f32_e32 v12, s5, v0 ; 0A180005 v_mac_f32_e32 v11, v12, v12 ; 3E16190C v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B v_mac_f32_e32 v32, v35, v33 ; 3E404323 v_mac_f32_e32 v32, v8, v1 ; 3E400308 v_cmp_gt_f32_e32 vcc, v7, v32 ; 7C084107 v_mul_f32_e32 v1, v11, v9 ; 1002130B v_mul_f32_e32 v7, v11, v10 ; 100E150B v_mul_f32_e32 v1, v24, v1 ; 10020318 v_mad_f32 v1, -v7, v25, -v1 ; D2820001 A4063307 v_mul_f32_e32 v7, v11, v12 ; 100E190B v_mad_f32 v1, -v7, v26, v1 ; D2820001 24063507 v_mul_f32_e32 v7, v24, v1 ; 100E0318 v_mac_f32_e32 v7, v24, v1 ; 3E0E0318 v_mul_f32_e32 v8, v25, v1 ; 10100319 v_mac_f32_e32 v8, v25, v1 ; 3E100319 v_mul_f32_e32 v9, v26, v1 ; 1012031A v_mac_f32_e32 v9, v26, v1 ; 3E12031A v_sub_f32_e32 v1, s13, v20 ; 0802280D v_mad_f32 v1, v1, v11, -v7 ; D2820001 841E1701 v_sub_f32_e32 v7, s12, v21 ; 080E2A0C s_buffer_load_dword s6, s[8:11], 0x8 ; C2030908 s_buffer_load_dword s7, s[8:11], 0x9 ; C2038909 s_buffer_load_dword s12, s[8:11], 0xa ; C206090A v_rcp_f32_e32 v10, v32 ; 7E145520 v_mad_f32 v7, v7, v11, -v8 ; D2820007 84221707 v_sub_f32_e32 v0, s5, v0 ; 08000005 v_mad_f32 v0, v0, v11, -v9 ; D2820000 84261700 v_mul_f32_e32 v8, v10, v38 ; 10104D0A v_cndmask_b32_e32 v8, v8, v36 ; 00104908 s_buffer_load_dword s5, s[8:11], 0xb ; C202890B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s6, v1 ; 10020206 v_mac_f32_e32 v1, s7, v7 ; 3E020E07 v_mac_f32_e32 v1, s12, v0 ; 3E02000C s_buffer_load_dword s6, s[8:11], 0x6 ; C2030906 s_buffer_load_dword s7, s[8:11], 0x17 ; C2038917 s_buffer_load_dword s12, s[8:11], 0x18 ; C2060918 s_buffer_load_dword s13, s[8:11], 0x19 ; C2068919 s_buffer_load_dword s14, s[8:11], 0x1a ; C207091A s_buffer_load_dword s15, s[8:11], 0xc ; C207890C s_buffer_load_dword s16, s[8:11], 0xd ; C208090D s_buffer_load_dword s17, s[8:11], 0xe ; C208890E s_buffer_load_dword s18, s[8:11], 0x10 ; C2090910 s_buffer_load_dword s19, s[8:11], 0x11 ; C2098911 s_buffer_load_dword s20, s[8:11], 0x12 ; C20A0912 s_buffer_load_dword s21, s[8:11], 0x13 ; C20A8913 s_buffer_load_dword s22, s[8:11], 0x14 ; C20B0914 s_buffer_load_dword s23, s[8:11], 0x15 ; C20B8915 s_buffer_load_dword s8, s[8:11], 0x16 ; C2040916 v_mul_f32_e32 v0, v3, v63 ; 10007F03 v_mac_f32_e32 v0, v3, v62 ; 3E007D03 v_mac_f32_e32 v0, v3, v58 ; 3E007503 v_mac_f32_e32 v0, v3, v47 ; 3E005F03 v_mul_f32_e32 v3, v2, v53 ; 10066B02 v_mac_f32_e32 v3, v2, v51 ; 3E066702 v_mac_f32_e32 v3, v2, v54 ; 3E066D02 v_mac_f32_e32 v3, v2, v55 ; 3E066F02 v_mul_f32_e32 v2, s3, v24 ; 10043003 v_mad_f32 v2, -s4, v25, -v2 ; D2820002 A40A3204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s18, v24 ; 100E3012 v_mac_f32_e32 v7, s19, v25 ; 3E0E3213 v_mad_f32 v2, -s2, v26, v2 ; D2820002 240A3402 v_mac_f32_e32 v7, s20, v26 ; 3E0E3414 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_log_f32_e32 v8, v8 ; 7E104F08 v_mov_b32_e32 v9, s22 ; 7E120216 v_mul_f32_e32 v9, s21, v9 ; 10121215 v_mov_b32_e32 v10, s23 ; 7E140217 v_mul_f32_e32 v10, s21, v10 ; 10141415 v_mov_b32_e32 v11, s8 ; 7E160208 v_mul_f32_e32 v11, s21, v11 ; 10161615 v_add_f32_e32 v0, v3, v0 ; 06000103 v_madmk_f32_e32 v0, v23, v0, 0x3e52efd4 ; 40000117 3E52EFD4 v_mul_legacy_f32_e32 v3, 0x3feccccd, v8 ; 0E0610FF 3FECCCCD v_exp_f32_e32 v3, v3 ; 7E064B03 v_sub_f32_e32 v12, 1.0, v3 ; 081806F2 v_madmk_f32_e32 v3, v12, v3, 0x3ee66666 ; 4006070C 3EE66666 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mul_f32_e32 v12, v2, v0 ; 10180102 v_mul_f32_e32 v3, v3, v12 ; 10061903 v_mul_f32_e32 v12, s0, v3 ; 10180600 v_mul_f32_e32 v13, s1, v3 ; 101A0601 v_mul_f32_e32 v3, s6, v3 ; 10060606 v_mov_b32_e32 v14, 0x3e59999a ; 7E1C02FF 3E59999A v_mov_b32_e32 v15, 0x3f372474 ; 7E1E02FF 3F372474 v_mul_f32_e32 v16, v14, v9 ; 1020130E v_mac_f32_e32 v16, v15, v10 ; 3E20150F v_mov_b32_e32 v17, 0x3d93a92a ; 7E2202FF 3D93A92A v_mac_f32_e32 v16, v17, v11 ; 3E201711 v_sub_f32_e32 v18, 1.0, v50 ; 082464F2 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mac_f32_e32 v9, v16, v50 ; 3E126510 v_mul_f32_e32 v10, v10, v18 ; 1014250A v_mac_f32_e32 v10, v16, v50 ; 3E146510 v_mul_f32_e32 v11, v11, v18 ; 1016250B v_mac_f32_e32 v11, v16, v50 ; 3E166510 v_mul_legacy_f32_e32 v8, 0x40066666, v8 ; 0E1010FF 40066666 v_exp_f32_e32 v8, v8 ; 7E104B08 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mac_f32_e32 v12, v9, v7 ; 3E180F09 v_mac_f32_e32 v13, v10, v7 ; 3E1A0F0A v_mac_f32_e32 v3, v11, v7 ; 3E060F0B v_mad_f32 v0, -v0, v2, 1.0 ; D2820000 23CA0500 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_mov_b32_e32 v2, s12 ; 7E04020C v_mul_f32_e32 v2, s7, v2 ; 10040407 v_mov_b32_e32 v7, s13 ; 7E0E020D v_mul_f32_e32 v7, s7, v7 ; 100E0E07 v_mul_f32_e32 v8, v14, v2 ; 1010050E v_mac_f32_e32 v8, v15, v7 ; 3E100F0F v_mov_b32_e32 v9, s14 ; 7E12020E v_mul_f32_e32 v9, s7, v9 ; 10121207 v_mac_f32_e32 v8, v17, v9 ; 3E101311 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_log_f32_e32 v1, v1 ; 7E024F01 v_mul_f32_e32 v2, v2, v18 ; 10042502 v_mul_f32_e32 v7, v7, v18 ; 100E2507 v_mul_f32_e32 v9, v9, v18 ; 10122509 v_mul_legacy_f32_e32 v1, s5, v1 ; 0E020205 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v48, v1 ; 10020330 v_mac_f32_e32 v2, v8, v50 ; 3E046508 v_mac_f32_e32 v7, v8, v50 ; 3E0E6508 v_mac_f32_e32 v9, v8, v50 ; 3E126508 v_mad_f32 v4, s15, v1, v4 ; D2820004 0412020F v_mac_f32_e32 v12, v2, v0 ; 3E180102 v_mad_f32 v2, s16, v1, v5 ; D2820002 04160210 v_mac_f32_e32 v6, s17, v1 ; 3E0C0211 v_mac_f32_e32 v13, v7, v0 ; 3E1A0107 v_mac_f32_e32 v3, v9, v0 ; 3E060109 v_mul_f32_e32 v0, v12, v4 ; 1000090C v_mul_f32_e32 v5, v13, v2 ; 100A050D v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_sub_f32_e32 v7, 1.0, v49 ; 080E62F2 v_mul_f32_e32 v0, v0, v7 ; 10000F00 v_mac_f32_e32 v0, v4, v49 ; 3E006304 v_mul_f32_e32 v4, v5, v7 ; 10080F05 v_mac_f32_e32 v4, v2, v49 ; 3E086302 v_mul_f32_e32 v2, v3, v7 ; 10040F03 v_mac_f32_e32 v2, v6, v49 ; 3E046306 v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280 v_mul_f32_e32 v1, v50, v1 ; 10020332 v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 88 VGPRS: 72 Code Size: 2196 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {0, 32, 16, 4} IMM[1] FLT32 { 0.0000, -2.0000, 0.5000, -1.0000} IMM[2] UINT32 {48, 0, 0, 0} IMM[3] FLT32 { 0.7500, 1.0000, -1.0000, 0.0000} IMM[4] FLT32 { 2.0000, 0.0000, 0.1250, 0.0000} 0: MUL TEMP[0], CONST[1][0].xyxy, CONST[1][2] 1: MAD TEMP[1].xy, CONST[1][0].xyyy, IMM[1].xyyy, IN[0].xyyy 2: MAX TEMP[1].xy, TEMP[1].xyyy, TEMP[0].xyyy 3: MIN TEMP[1].xy, TEMP[1].xyyy, TEMP[0].zwww 4: MOV TEMP[2].xy, TEMP[1].xyyy 5: MOV TEMP[2].w, IMM[1].xxxx 6: TXL TEMP[2].w, TEMP[2], SAMP[0], 2D 7: MUL TEMP[2].x, TEMP[2].wwww, CONST[1][1].xxxx 8: MOV_SAT TEMP[2].x, TEMP[2].xxxx 9: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][3].xyzz 10: MOV TEMP[3].xy, TEMP[1].xyyy 11: MOV TEMP[3].w, IMM[1].xxxx 12: TXL TEMP[3].yz, TEMP[3], SAMP[1], 2D 13: MOV TEMP[4].xyz, TEMP[2].xyzx 14: FSLT TEMP[5].x, IMM[1].xxxx, TEMP[3].yyyy 15: UIF TEMP[5].xxxx :0 16: MUL TEMP[3].x, TEMP[3].yyyy, TEMP[3].zzzz 17: MOV_SAT TEMP[3].x, TEMP[3].xxxx 18: MOV TEMP[1].xy, TEMP[1].xyyy 19: MOV TEMP[1].w, IMM[1].xxxx 20: TXL TEMP[1].xyz, TEMP[1], SAMP[2], 2D 21: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 22: ENDIF 23: MUL TEMP[1].xyz, TEMP[4].xyzz, IMM[1].zzzz 24: ADD TEMP[2].xy, -CONST[1][0].xyyy, IN[0].xyyy 25: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 26: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 27: MOV TEMP[3].xy, TEMP[2].xyyy 28: MOV TEMP[3].w, IMM[1].xxxx 29: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 30: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 31: MOV_SAT TEMP[3].x, TEMP[3].xxxx 32: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 33: MOV TEMP[4].xy, TEMP[2].xyyy 34: MOV TEMP[4].w, IMM[1].xxxx 35: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 36: MOV TEMP[5].xyz, TEMP[3].xyzx 37: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 38: UIF TEMP[6].xxxx :0 39: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 40: MOV_SAT TEMP[4].x, TEMP[4].xxxx 41: MOV TEMP[2].xy, TEMP[2].xyyy 42: MOV TEMP[2].w, IMM[1].xxxx 43: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 44: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 45: ENDIF 46: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 47: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].xwww, IN[0].xyyy 48: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 49: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 50: MOV TEMP[3].xy, TEMP[2].xyyy 51: MOV TEMP[3].w, IMM[1].xxxx 52: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 53: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 54: MOV_SAT TEMP[3].x, TEMP[3].xxxx 55: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 56: MOV TEMP[4].xy, TEMP[2].xyyy 57: MOV TEMP[4].w, IMM[1].xxxx 58: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 59: MOV TEMP[5].xyz, TEMP[3].xyzx 60: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 61: UIF TEMP[6].xxxx :0 62: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 63: MOV_SAT TEMP[4].x, TEMP[4].xxxx 64: MOV TEMP[2].xy, TEMP[2].xyyy 65: MOV TEMP[2].w, IMM[1].xxxx 66: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 67: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 68: ENDIF 69: MAD TEMP[1].xyz, TEMP[5].xyzz, IMM[3].xxxx, TEMP[1].xyzz 70: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].yzzz, IN[0].xyyy 71: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 72: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 73: MOV TEMP[3].xy, TEMP[2].xyyy 74: MOV TEMP[3].w, IMM[1].xxxx 75: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 76: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 77: MOV_SAT TEMP[3].x, TEMP[3].xxxx 78: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 79: MOV TEMP[4].xy, TEMP[2].xyyy 80: MOV TEMP[4].w, IMM[1].xxxx 81: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 82: MOV TEMP[5].xyz, TEMP[3].xyzx 83: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 84: UIF TEMP[6].xxxx :0 85: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 86: MOV_SAT TEMP[4].x, TEMP[4].xxxx 87: MOV TEMP[2].xy, TEMP[2].xyyy 88: MOV TEMP[2].w, IMM[1].xxxx 89: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 90: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 91: ENDIF 92: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 93: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].yxxx, IN[0].xyyy 94: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 95: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 96: MOV TEMP[3].xy, TEMP[2].xyyy 97: MOV TEMP[3].w, IMM[1].xxxx 98: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 99: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 100: MOV_SAT TEMP[3].x, TEMP[3].xxxx 101: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 102: MOV TEMP[4].xy, TEMP[2].xyyy 103: MOV TEMP[4].w, IMM[1].xxxx 104: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 105: MOV TEMP[5].xyz, TEMP[3].xyzx 106: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 107: UIF TEMP[6].xxxx :0 108: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 109: MOV_SAT TEMP[4].x, TEMP[4].xxxx 110: MOV TEMP[2].xy, TEMP[2].xyyy 111: MOV TEMP[2].w, IMM[1].xxxx 112: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 113: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 114: ENDIF 115: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 116: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[1].wxxx, IN[0].xyyy 117: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 118: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 119: MOV TEMP[3].xy, TEMP[2].xyyy 120: MOV TEMP[3].w, IMM[1].xxxx 121: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 122: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 123: MOV_SAT TEMP[3].x, TEMP[3].xxxx 124: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 125: MOV TEMP[4].xy, TEMP[2].xyyy 126: MOV TEMP[4].w, IMM[1].xxxx 127: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 128: MOV TEMP[5].xyz, TEMP[3].xyzx 129: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 130: UIF TEMP[6].xxxx :0 131: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 132: MOV_SAT TEMP[4].x, TEMP[4].xxxx 133: MOV TEMP[2].xy, TEMP[2].xyyy 134: MOV TEMP[2].w, IMM[1].xxxx 135: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 136: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 137: ENDIF 138: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 139: MAX TEMP[2].xy, IN[0].xyyy, TEMP[0].xyyy 140: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 141: MOV TEMP[3].xy, TEMP[2].xyyy 142: MOV TEMP[3].w, IMM[1].xxxx 143: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 144: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 145: MOV_SAT TEMP[3].x, TEMP[3].xxxx 146: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 147: MOV TEMP[4].xy, TEMP[2].xyyy 148: MOV TEMP[4].w, IMM[1].xxxx 149: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 150: MOV TEMP[5].xyz, TEMP[3].xyzx 151: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 152: UIF TEMP[6].xxxx :0 153: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 154: MOV_SAT TEMP[4].x, TEMP[4].xxxx 155: MOV TEMP[2].xy, TEMP[2].xyyy 156: MOV TEMP[2].w, IMM[1].xxxx 157: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 158: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 159: ENDIF 160: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz 161: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].ywww, IN[0].xyyy 162: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 163: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 164: MOV TEMP[3].xy, TEMP[2].xyyy 165: MOV TEMP[3].w, IMM[1].xxxx 166: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 167: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 168: MOV_SAT TEMP[3].x, TEMP[3].xxxx 169: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 170: MOV TEMP[4].xy, TEMP[2].xyyy 171: MOV TEMP[4].w, IMM[1].xxxx 172: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 173: MOV TEMP[5].xyz, TEMP[3].xyzx 174: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 175: UIF TEMP[6].xxxx :0 176: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 177: MOV_SAT TEMP[4].x, TEMP[4].xxxx 178: MOV TEMP[2].xy, TEMP[2].xyyy 179: MOV TEMP[2].w, IMM[1].xxxx 180: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 181: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 182: ENDIF 183: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 184: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[4].xyyy, IN[0].xyyy 185: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 186: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 187: MOV TEMP[3].xy, TEMP[2].xyyy 188: MOV TEMP[3].w, IMM[1].xxxx 189: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 190: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 191: MOV_SAT TEMP[3].x, TEMP[3].xxxx 192: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 193: MOV TEMP[4].xy, TEMP[2].xyyy 194: MOV TEMP[4].w, IMM[1].xxxx 195: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 196: MOV TEMP[5].xyz, TEMP[3].xyzx 197: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 198: UIF TEMP[6].xxxx :0 199: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 200: MOV_SAT TEMP[4].x, TEMP[4].xxxx 201: MOV TEMP[2].xy, TEMP[2].xyyy 202: MOV TEMP[2].w, IMM[1].xxxx 203: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 204: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 205: ENDIF 206: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 207: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].zyyy, IN[0].xyyy 208: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 209: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 210: MOV TEMP[3].xy, TEMP[2].xyyy 211: MOV TEMP[3].w, IMM[1].xxxx 212: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 213: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 214: MOV_SAT TEMP[3].x, TEMP[3].xxxx 215: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 216: MOV TEMP[4].xy, TEMP[2].xyyy 217: MOV TEMP[4].w, IMM[1].xxxx 218: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 219: MOV TEMP[5].xyz, TEMP[3].xyzx 220: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 221: UIF TEMP[6].xxxx :0 222: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 223: MOV_SAT TEMP[4].x, TEMP[4].xxxx 224: MOV TEMP[2].xy, TEMP[2].xyyy 225: MOV TEMP[2].w, IMM[1].xxxx 226: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 227: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 228: ENDIF 229: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 230: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[3].wyyy, IN[0].xyyy 231: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 232: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 233: MOV TEMP[3].xy, TEMP[2].xyyy 234: MOV TEMP[3].w, IMM[1].xxxx 235: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 236: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 237: MOV_SAT TEMP[3].x, TEMP[3].xxxx 238: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 239: MOV TEMP[4].xy, TEMP[2].xyyy 240: MOV TEMP[4].w, IMM[1].xxxx 241: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 242: MOV TEMP[5].xyz, TEMP[3].xyzx 243: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 244: UIF TEMP[6].xxxx :0 245: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 246: MOV_SAT TEMP[4].x, TEMP[4].xxxx 247: MOV TEMP[2].xy, TEMP[2].xyyy 248: MOV TEMP[2].w, IMM[1].xxxx 249: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 250: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 251: ENDIF 252: MAD TEMP[1].xyz, IMM[3].xxxx, TEMP[5].xyzz, TEMP[1].xyzz 253: ADD TEMP[2].xy, IN[0].xyyy, CONST[1][0].xyyy 254: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 255: MIN TEMP[2].xy, TEMP[2].xyyy, TEMP[0].zwww 256: MOV TEMP[3].xy, TEMP[2].xyyy 257: MOV TEMP[3].w, IMM[1].xxxx 258: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 259: MUL TEMP[3].x, TEMP[3].wwww, CONST[1][1].xxxx 260: MOV_SAT TEMP[3].x, TEMP[3].xxxx 261: MUL TEMP[3].xyz, TEMP[3].xxxx, CONST[5][3].xyzz 262: MOV TEMP[4].xy, TEMP[2].xyyy 263: MOV TEMP[4].w, IMM[1].xxxx 264: TXL TEMP[4].yz, TEMP[4], SAMP[1], 2D 265: MOV TEMP[5].xyz, TEMP[3].xyzx 266: FSLT TEMP[6].x, IMM[1].xxxx, TEMP[4].yyyy 267: UIF TEMP[6].xxxx :0 268: MUL TEMP[4].x, TEMP[4].yyyy, TEMP[4].zzzz 269: MOV_SAT TEMP[4].x, TEMP[4].xxxx 270: MOV TEMP[2].xy, TEMP[2].xyyy 271: MOV TEMP[2].w, IMM[1].xxxx 272: TXL TEMP[2].xyz, TEMP[2], SAMP[2], 2D 273: MAD TEMP[5].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xyzz 274: ENDIF 275: MAD TEMP[1].xyz, IMM[1].zzzz, TEMP[5].xyzz, TEMP[1].xyzz 276: MAD TEMP[2].xy, CONST[1][0].xyyy, IMM[4].yxxx, IN[0].xyyy 277: MAX TEMP[2].xy, TEMP[2].xyyy, TEMP[0].xyyy 278: MIN TEMP[0].xy, TEMP[2].xyyy, TEMP[0].zwww 279: MOV TEMP[2].xy, TEMP[0].xyyy 280: MOV TEMP[2].w, IMM[1].xxxx 281: TXL TEMP[2].w, TEMP[2], SAMP[0], 2D 282: MUL TEMP[2].x, TEMP[2].wwww, CONST[1][1].xxxx 283: MOV_SAT TEMP[2].x, TEMP[2].xxxx 284: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[5][3].xyzz 285: MOV TEMP[3].xy, TEMP[0].xyyy 286: MOV TEMP[3].w, IMM[1].xxxx 287: TXL TEMP[3].yz, TEMP[3], SAMP[1], 2D 288: MOV TEMP[4].xyz, TEMP[2].xyzx 289: FSLT TEMP[5].x, IMM[1].xxxx, TEMP[3].yyyy 290: UIF TEMP[5].xxxx :0 291: MUL TEMP[3].x, TEMP[3].yyyy, TEMP[3].zzzz 292: MOV_SAT TEMP[3].x, TEMP[3].xxxx 293: MOV TEMP[0].xy, TEMP[0].xyyy 294: MOV TEMP[0].w, IMM[1].xxxx 295: TXL TEMP[0].xyz, TEMP[0], SAMP[2], 2D 296: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 297: ENDIF 298: MAD TEMP[0].xyz, IMM[1].zzzz, TEMP[4].xyzz, TEMP[1].xyzz 299: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[4].zzzz 300: MOV TEMP[1].w, IMM[3].yyyy 301: MOV TEMP[1].x, TEMP[0].xxxx 302: MOV TEMP[1].y, TEMP[0].yyyy 303: MOV TEMP[1].z, TEMP[0].zzzz 304: MOV OUT[0], TEMP[1] 305: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %32, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %32, i32 56) %36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %37 = load <8 x i32>, <8 x i32> addrspace(2)* %36, align 32, !tbaa !0 %38 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %39 = load <4 x i32>, <4 x i32> addrspace(2)* %38, align 16, !tbaa !0 %40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %41 = load <8 x i32>, <8 x i32> addrspace(2)* %40, align 32, !tbaa !0 %42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %43 = load <4 x i32>, <4 x i32> addrspace(2)* %42, align 16, !tbaa !0 %44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %45 = load <8 x i32>, <8 x i32> addrspace(2)* %44, align 32, !tbaa !0 %46 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %47 = load <4 x i32>, <4 x i32> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = fmul float %24, %27 %51 = fmul float %25, %28 %52 = fmul float %24, %29 %53 = fmul float %25, %30 %54 = fmul float %24, 0.000000e+00 %55 = fadd float %54, %48 %56 = fmul float %25, -2.000000e+00 %57 = fadd float %56, %49 %58 = call float @llvm.maxnum.f32(float %55, float %50) %59 = call float @llvm.maxnum.f32(float %57, float %51) %60 = call float @llvm.minnum.f32(float %58, float %52) %61 = call float @llvm.minnum.f32(float %59, float %53) %62 = bitcast float %60 to i32 %63 = bitcast float %61 to i32 %64 = insertelement <4 x i32> undef, i32 %62, i32 0 %65 = insertelement <4 x i32> %64, i32 %63, i32 1 %66 = insertelement <4 x i32> %65, i32 0, i32 2 %67 = bitcast <8 x i32> %37 to <32 x i8> %68 = bitcast <4 x i32> %39 to <16 x i8> %69 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %66, <32 x i8> %67, <16 x i8> %68, i32 2) %70 = extractelement <4 x float> %69, i32 3 %71 = fmul float %70, %26 %72 = call float @llvm.AMDIL.clamp.(float %71, float 0.000000e+00, float 1.000000e+00) %73 = fmul float %72, %33 %74 = fmul float %72, %34 %75 = fmul float %72, %35 %76 = bitcast float %60 to i32 %77 = bitcast float %61 to i32 %78 = insertelement <4 x i32> undef, i32 %76, i32 0 %79 = insertelement <4 x i32> %78, i32 %77, i32 1 %80 = insertelement <4 x i32> %79, i32 0, i32 2 %81 = bitcast <8 x i32> %41 to <32 x i8> %82 = bitcast <4 x i32> %43 to <16 x i8> %83 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2) %84 = extractelement <4 x float> %83, i32 1 %85 = fcmp ogt float %84, 0.000000e+00 br i1 %85, label %IF, label %ENDIF IF: ; preds = %main_body %86 = extractelement <4 x float> %83, i32 2 %87 = fmul float %84, %86 %88 = call float @llvm.AMDIL.clamp.(float %87, float 0.000000e+00, float 1.000000e+00) %89 = bitcast float %60 to i32 %90 = bitcast float %61 to i32 %91 = insertelement <4 x i32> undef, i32 %89, i32 0 %92 = insertelement <4 x i32> %91, i32 %90, i32 1 %93 = insertelement <4 x i32> %92, i32 0, i32 2 %94 = bitcast <8 x i32> %45 to <32 x i8> %95 = bitcast <4 x i32> %47 to <16 x i8> %96 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %93, <32 x i8> %94, <16 x i8> %95, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = fmul float %88, %97 %101 = fadd float %100, %73 %102 = fmul float %88, %98 %103 = fadd float %102, %74 %104 = fmul float %88, %99 %105 = fadd float %104, %75 br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp16.0 = phi float [ %101, %IF ], [ %73, %main_body ] %temp17.0 = phi float [ %103, %IF ], [ %74, %main_body ] %temp18.0 = phi float [ %105, %IF ], [ %75, %main_body ] %106 = fmul float %temp16.0, 5.000000e-01 %107 = fmul float %temp17.0, 5.000000e-01 %108 = fmul float %temp18.0, 5.000000e-01 %109 = fsub float %48, %24 %110 = fsub float %49, %25 %111 = call float @llvm.maxnum.f32(float %109, float %50) %112 = call float @llvm.maxnum.f32(float %110, float %51) %113 = call float @llvm.minnum.f32(float %111, float %52) %114 = call float @llvm.minnum.f32(float %112, float %53) %115 = bitcast float %113 to i32 %116 = bitcast float %114 to i32 %117 = insertelement <4 x i32> undef, i32 %115, i32 0 %118 = insertelement <4 x i32> %117, i32 %116, i32 1 %119 = insertelement <4 x i32> %118, i32 0, i32 2 %120 = bitcast <8 x i32> %37 to <32 x i8> %121 = bitcast <4 x i32> %39 to <16 x i8> %122 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 3 %124 = fmul float %123, %26 %125 = call float @llvm.AMDIL.clamp.(float %124, float 0.000000e+00, float 1.000000e+00) %126 = fmul float %125, %33 %127 = fmul float %125, %34 %128 = fmul float %125, %35 %129 = bitcast float %113 to i32 %130 = bitcast float %114 to i32 %131 = insertelement <4 x i32> undef, i32 %129, i32 0 %132 = insertelement <4 x i32> %131, i32 %130, i32 1 %133 = insertelement <4 x i32> %132, i32 0, i32 2 %134 = bitcast <8 x i32> %41 to <32 x i8> %135 = bitcast <4 x i32> %43 to <16 x i8> %136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %133, <32 x i8> %134, <16 x i8> %135, i32 2) %137 = extractelement <4 x float> %136, i32 1 %138 = fcmp ogt float %137, 0.000000e+00 br i1 %138, label %IF29, label %ENDIF28 IF29: ; preds = %ENDIF %139 = extractelement <4 x float> %136, i32 2 %140 = fmul float %137, %139 %141 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00) %142 = bitcast float %113 to i32 %143 = bitcast float %114 to i32 %144 = insertelement <4 x i32> undef, i32 %142, i32 0 %145 = insertelement <4 x i32> %144, i32 %143, i32 1 %146 = insertelement <4 x i32> %145, i32 0, i32 2 %147 = bitcast <8 x i32> %45 to <32 x i8> %148 = bitcast <4 x i32> %47 to <16 x i8> %149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2) %150 = extractelement <4 x float> %149, i32 0 %151 = extractelement <4 x float> %149, i32 1 %152 = extractelement <4 x float> %149, i32 2 %153 = fmul float %141, %150 %154 = fadd float %153, %126 %155 = fmul float %141, %151 %156 = fadd float %155, %127 %157 = fmul float %141, %152 %158 = fadd float %157, %128 br label %ENDIF28 ENDIF28: ; preds = %ENDIF, %IF29 %temp20.0 = phi float [ %154, %IF29 ], [ %126, %ENDIF ] %temp21.0 = phi float [ %156, %IF29 ], [ %127, %ENDIF ] %temp22.0 = phi float [ %158, %IF29 ], [ %128, %ENDIF ] %159 = fmul float %temp20.0, 5.000000e-01 %160 = fadd float %159, %106 %161 = fmul float %temp21.0, 5.000000e-01 %162 = fadd float %161, %107 %163 = fmul float %temp22.0, 5.000000e-01 %164 = fadd float %163, %108 %165 = fmul float %24, 0.000000e+00 %166 = fadd float %165, %48 %167 = fsub float %49, %25 %168 = call float @llvm.maxnum.f32(float %166, float %50) %169 = call float @llvm.maxnum.f32(float %167, float %51) %170 = call float @llvm.minnum.f32(float %168, float %52) %171 = call float @llvm.minnum.f32(float %169, float %53) %172 = bitcast float %170 to i32 %173 = bitcast float %171 to i32 %174 = insertelement <4 x i32> undef, i32 %172, i32 0 %175 = insertelement <4 x i32> %174, i32 %173, i32 1 %176 = insertelement <4 x i32> %175, i32 0, i32 2 %177 = bitcast <8 x i32> %37 to <32 x i8> %178 = bitcast <4 x i32> %39 to <16 x i8> %179 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %176, <32 x i8> %177, <16 x i8> %178, i32 2) %180 = extractelement <4 x float> %179, i32 3 %181 = fmul float %180, %26 %182 = call float @llvm.AMDIL.clamp.(float %181, float 0.000000e+00, float 1.000000e+00) %183 = fmul float %182, %33 %184 = fmul float %182, %34 %185 = fmul float %182, %35 %186 = bitcast float %170 to i32 %187 = bitcast float %171 to i32 %188 = insertelement <4 x i32> undef, i32 %186, i32 0 %189 = insertelement <4 x i32> %188, i32 %187, i32 1 %190 = insertelement <4 x i32> %189, i32 0, i32 2 %191 = bitcast <8 x i32> %41 to <32 x i8> %192 = bitcast <4 x i32> %43 to <16 x i8> %193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %190, <32 x i8> %191, <16 x i8> %192, i32 2) %194 = extractelement <4 x float> %193, i32 1 %195 = fcmp ogt float %194, 0.000000e+00 br i1 %195, label %IF32, label %ENDIF31 IF32: ; preds = %ENDIF28 %196 = extractelement <4 x float> %193, i32 2 %197 = fmul float %194, %196 %198 = call float @llvm.AMDIL.clamp.(float %197, float 0.000000e+00, float 1.000000e+00) %199 = bitcast float %170 to i32 %200 = bitcast float %171 to i32 %201 = insertelement <4 x i32> undef, i32 %199, i32 0 %202 = insertelement <4 x i32> %201, i32 %200, i32 1 %203 = insertelement <4 x i32> %202, i32 0, i32 2 %204 = bitcast <8 x i32> %45 to <32 x i8> %205 = bitcast <4 x i32> %47 to <16 x i8> %206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %203, <32 x i8> %204, <16 x i8> %205, i32 2) %207 = extractelement <4 x float> %206, i32 0 %208 = extractelement <4 x float> %206, i32 1 %209 = extractelement <4 x float> %206, i32 2 %210 = fmul float %198, %207 %211 = fadd float %210, %183 %212 = fmul float %198, %208 %213 = fadd float %212, %184 %214 = fmul float %198, %209 %215 = fadd float %214, %185 br label %ENDIF31 ENDIF31: ; preds = %ENDIF28, %IF32 %temp20.1 = phi float [ %211, %IF32 ], [ %183, %ENDIF28 ] %temp21.1 = phi float [ %213, %IF32 ], [ %184, %ENDIF28 ] %temp22.1 = phi float [ %215, %IF32 ], [ %185, %ENDIF28 ] %216 = fmul float %temp20.1, 7.500000e-01 %217 = fadd float %216, %160 %218 = fmul float %temp21.1, 7.500000e-01 %219 = fadd float %218, %162 %220 = fmul float %temp22.1, 7.500000e-01 %221 = fadd float %220, %164 %222 = fadd float %24, %48 %223 = fsub float %49, %25 %224 = call float @llvm.maxnum.f32(float %222, float %50) %225 = call float @llvm.maxnum.f32(float %223, float %51) %226 = call float @llvm.minnum.f32(float %224, float %52) %227 = call float @llvm.minnum.f32(float %225, float %53) %228 = bitcast float %226 to i32 %229 = bitcast float %227 to i32 %230 = insertelement <4 x i32> undef, i32 %228, i32 0 %231 = insertelement <4 x i32> %230, i32 %229, i32 1 %232 = insertelement <4 x i32> %231, i32 0, i32 2 %233 = bitcast <8 x i32> %37 to <32 x i8> %234 = bitcast <4 x i32> %39 to <16 x i8> %235 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 2) %236 = extractelement <4 x float> %235, i32 3 %237 = fmul float %236, %26 %238 = call float @llvm.AMDIL.clamp.(float %237, float 0.000000e+00, float 1.000000e+00) %239 = fmul float %238, %33 %240 = fmul float %238, %34 %241 = fmul float %238, %35 %242 = bitcast float %226 to i32 %243 = bitcast float %227 to i32 %244 = insertelement <4 x i32> undef, i32 %242, i32 0 %245 = insertelement <4 x i32> %244, i32 %243, i32 1 %246 = insertelement <4 x i32> %245, i32 0, i32 2 %247 = bitcast <8 x i32> %41 to <32 x i8> %248 = bitcast <4 x i32> %43 to <16 x i8> %249 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 1 %251 = fcmp ogt float %250, 0.000000e+00 br i1 %251, label %IF35, label %ENDIF34 IF35: ; preds = %ENDIF31 %252 = extractelement <4 x float> %249, i32 2 %253 = fmul float %250, %252 %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00) %255 = bitcast float %226 to i32 %256 = bitcast float %227 to i32 %257 = insertelement <4 x i32> undef, i32 %255, i32 0 %258 = insertelement <4 x i32> %257, i32 %256, i32 1 %259 = insertelement <4 x i32> %258, i32 0, i32 2 %260 = bitcast <8 x i32> %45 to <32 x i8> %261 = bitcast <4 x i32> %47 to <16 x i8> %262 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %259, <32 x i8> %260, <16 x i8> %261, i32 2) %263 = extractelement <4 x float> %262, i32 0 %264 = extractelement <4 x float> %262, i32 1 %265 = extractelement <4 x float> %262, i32 2 %266 = fmul float %254, %263 %267 = fadd float %266, %239 %268 = fmul float %254, %264 %269 = fadd float %268, %240 %270 = fmul float %254, %265 %271 = fadd float %270, %241 br label %ENDIF34 ENDIF34: ; preds = %ENDIF31, %IF35 %temp20.2 = phi float [ %267, %IF35 ], [ %239, %ENDIF31 ] %temp21.2 = phi float [ %269, %IF35 ], [ %240, %ENDIF31 ] %temp22.2 = phi float [ %271, %IF35 ], [ %241, %ENDIF31 ] %272 = fmul float %temp20.2, 5.000000e-01 %273 = fadd float %272, %217 %274 = fmul float %temp21.2, 5.000000e-01 %275 = fadd float %274, %219 %276 = fmul float %temp22.2, 5.000000e-01 %277 = fadd float %276, %221 %278 = fmul float %24, -2.000000e+00 %279 = fadd float %278, %48 %280 = fmul float %25, 0.000000e+00 %281 = fadd float %280, %49 %282 = call float @llvm.maxnum.f32(float %279, float %50) %283 = call float @llvm.maxnum.f32(float %281, float %51) %284 = call float @llvm.minnum.f32(float %282, float %52) %285 = call float @llvm.minnum.f32(float %283, float %53) %286 = bitcast float %284 to i32 %287 = bitcast float %285 to i32 %288 = insertelement <4 x i32> undef, i32 %286, i32 0 %289 = insertelement <4 x i32> %288, i32 %287, i32 1 %290 = insertelement <4 x i32> %289, i32 0, i32 2 %291 = bitcast <8 x i32> %37 to <32 x i8> %292 = bitcast <4 x i32> %39 to <16 x i8> %293 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %290, <32 x i8> %291, <16 x i8> %292, i32 2) %294 = extractelement <4 x float> %293, i32 3 %295 = fmul float %294, %26 %296 = call float @llvm.AMDIL.clamp.(float %295, float 0.000000e+00, float 1.000000e+00) %297 = fmul float %296, %33 %298 = fmul float %296, %34 %299 = fmul float %296, %35 %300 = bitcast float %284 to i32 %301 = bitcast float %285 to i32 %302 = insertelement <4 x i32> undef, i32 %300, i32 0 %303 = insertelement <4 x i32> %302, i32 %301, i32 1 %304 = insertelement <4 x i32> %303, i32 0, i32 2 %305 = bitcast <8 x i32> %41 to <32 x i8> %306 = bitcast <4 x i32> %43 to <16 x i8> %307 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %304, <32 x i8> %305, <16 x i8> %306, i32 2) %308 = extractelement <4 x float> %307, i32 1 %309 = fcmp ogt float %308, 0.000000e+00 br i1 %309, label %IF38, label %ENDIF37 IF38: ; preds = %ENDIF34 %310 = extractelement <4 x float> %307, i32 2 %311 = fmul float %308, %310 %312 = call float @llvm.AMDIL.clamp.(float %311, float 0.000000e+00, float 1.000000e+00) %313 = bitcast float %284 to i32 %314 = bitcast float %285 to i32 %315 = insertelement <4 x i32> undef, i32 %313, i32 0 %316 = insertelement <4 x i32> %315, i32 %314, i32 1 %317 = insertelement <4 x i32> %316, i32 0, i32 2 %318 = bitcast <8 x i32> %45 to <32 x i8> %319 = bitcast <4 x i32> %47 to <16 x i8> %320 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = fmul float %312, %321 %325 = fadd float %324, %297 %326 = fmul float %312, %322 %327 = fadd float %326, %298 %328 = fmul float %312, %323 %329 = fadd float %328, %299 br label %ENDIF37 ENDIF37: ; preds = %ENDIF34, %IF38 %temp20.3 = phi float [ %325, %IF38 ], [ %297, %ENDIF34 ] %temp21.3 = phi float [ %327, %IF38 ], [ %298, %ENDIF34 ] %temp22.3 = phi float [ %329, %IF38 ], [ %299, %ENDIF34 ] %330 = fmul float %temp20.3, 5.000000e-01 %331 = fadd float %330, %273 %332 = fmul float %temp21.3, 5.000000e-01 %333 = fadd float %332, %275 %334 = fmul float %temp22.3, 5.000000e-01 %335 = fadd float %334, %277 %336 = fsub float %48, %24 %337 = fmul float %25, 0.000000e+00 %338 = fadd float %337, %49 %339 = call float @llvm.maxnum.f32(float %336, float %50) %340 = call float @llvm.maxnum.f32(float %338, float %51) %341 = call float @llvm.minnum.f32(float %339, float %52) %342 = call float @llvm.minnum.f32(float %340, float %53) %343 = bitcast float %341 to i32 %344 = bitcast float %342 to i32 %345 = insertelement <4 x i32> undef, i32 %343, i32 0 %346 = insertelement <4 x i32> %345, i32 %344, i32 1 %347 = insertelement <4 x i32> %346, i32 0, i32 2 %348 = bitcast <8 x i32> %37 to <32 x i8> %349 = bitcast <4 x i32> %39 to <16 x i8> %350 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %347, <32 x i8> %348, <16 x i8> %349, i32 2) %351 = extractelement <4 x float> %350, i32 3 %352 = fmul float %351, %26 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fmul float %353, %33 %355 = fmul float %353, %34 %356 = fmul float %353, %35 %357 = bitcast float %341 to i32 %358 = bitcast float %342 to i32 %359 = insertelement <4 x i32> undef, i32 %357, i32 0 %360 = insertelement <4 x i32> %359, i32 %358, i32 1 %361 = insertelement <4 x i32> %360, i32 0, i32 2 %362 = bitcast <8 x i32> %41 to <32 x i8> %363 = bitcast <4 x i32> %43 to <16 x i8> %364 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %361, <32 x i8> %362, <16 x i8> %363, i32 2) %365 = extractelement <4 x float> %364, i32 1 %366 = fcmp ogt float %365, 0.000000e+00 br i1 %366, label %IF41, label %ENDIF40 IF41: ; preds = %ENDIF37 %367 = extractelement <4 x float> %364, i32 2 %368 = fmul float %365, %367 %369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00) %370 = bitcast float %341 to i32 %371 = bitcast float %342 to i32 %372 = insertelement <4 x i32> undef, i32 %370, i32 0 %373 = insertelement <4 x i32> %372, i32 %371, i32 1 %374 = insertelement <4 x i32> %373, i32 0, i32 2 %375 = bitcast <8 x i32> %45 to <32 x i8> %376 = bitcast <4 x i32> %47 to <16 x i8> %377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2) %378 = extractelement <4 x float> %377, i32 0 %379 = extractelement <4 x float> %377, i32 1 %380 = extractelement <4 x float> %377, i32 2 %381 = fmul float %369, %378 %382 = fadd float %381, %354 %383 = fmul float %369, %379 %384 = fadd float %383, %355 %385 = fmul float %369, %380 %386 = fadd float %385, %356 br label %ENDIF40 ENDIF40: ; preds = %ENDIF37, %IF41 %temp20.4 = phi float [ %382, %IF41 ], [ %354, %ENDIF37 ] %temp21.4 = phi float [ %384, %IF41 ], [ %355, %ENDIF37 ] %temp22.4 = phi float [ %386, %IF41 ], [ %356, %ENDIF37 ] %387 = fmul float %temp20.4, 7.500000e-01 %388 = fadd float %387, %331 %389 = fmul float %temp21.4, 7.500000e-01 %390 = fadd float %389, %333 %391 = fmul float %temp22.4, 7.500000e-01 %392 = fadd float %391, %335 %393 = call float @llvm.maxnum.f32(float %48, float %50) %394 = call float @llvm.maxnum.f32(float %49, float %51) %395 = call float @llvm.minnum.f32(float %393, float %52) %396 = call float @llvm.minnum.f32(float %394, float %53) %397 = bitcast float %395 to i32 %398 = bitcast float %396 to i32 %399 = insertelement <4 x i32> undef, i32 %397, i32 0 %400 = insertelement <4 x i32> %399, i32 %398, i32 1 %401 = insertelement <4 x i32> %400, i32 0, i32 2 %402 = bitcast <8 x i32> %37 to <32 x i8> %403 = bitcast <4 x i32> %39 to <16 x i8> %404 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %401, <32 x i8> %402, <16 x i8> %403, i32 2) %405 = extractelement <4 x float> %404, i32 3 %406 = fmul float %405, %26 %407 = call float @llvm.AMDIL.clamp.(float %406, float 0.000000e+00, float 1.000000e+00) %408 = fmul float %407, %33 %409 = fmul float %407, %34 %410 = fmul float %407, %35 %411 = bitcast float %395 to i32 %412 = bitcast float %396 to i32 %413 = insertelement <4 x i32> undef, i32 %411, i32 0 %414 = insertelement <4 x i32> %413, i32 %412, i32 1 %415 = insertelement <4 x i32> %414, i32 0, i32 2 %416 = bitcast <8 x i32> %41 to <32 x i8> %417 = bitcast <4 x i32> %43 to <16 x i8> %418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %415, <32 x i8> %416, <16 x i8> %417, i32 2) %419 = extractelement <4 x float> %418, i32 1 %420 = fcmp ogt float %419, 0.000000e+00 br i1 %420, label %IF44, label %ENDIF43 IF44: ; preds = %ENDIF40 %421 = extractelement <4 x float> %418, i32 2 %422 = fmul float %419, %421 %423 = call float @llvm.AMDIL.clamp.(float %422, float 0.000000e+00, float 1.000000e+00) %424 = bitcast float %395 to i32 %425 = bitcast float %396 to i32 %426 = insertelement <4 x i32> undef, i32 %424, i32 0 %427 = insertelement <4 x i32> %426, i32 %425, i32 1 %428 = insertelement <4 x i32> %427, i32 0, i32 2 %429 = bitcast <8 x i32> %45 to <32 x i8> %430 = bitcast <4 x i32> %47 to <16 x i8> %431 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2) %432 = extractelement <4 x float> %431, i32 0 %433 = extractelement <4 x float> %431, i32 1 %434 = extractelement <4 x float> %431, i32 2 %435 = fmul float %423, %432 %436 = fadd float %435, %408 %437 = fmul float %423, %433 %438 = fadd float %437, %409 %439 = fmul float %423, %434 %440 = fadd float %439, %410 br label %ENDIF43 ENDIF43: ; preds = %ENDIF40, %IF44 %temp20.5 = phi float [ %436, %IF44 ], [ %408, %ENDIF40 ] %temp21.5 = phi float [ %438, %IF44 ], [ %409, %ENDIF40 ] %temp22.5 = phi float [ %440, %IF44 ], [ %410, %ENDIF40 ] %441 = fadd float %388, %temp20.5 %442 = fadd float %390, %temp21.5 %443 = fadd float %392, %temp22.5 %444 = fadd float %24, %48 %445 = fmul float %25, 0.000000e+00 %446 = fadd float %445, %49 %447 = call float @llvm.maxnum.f32(float %444, float %50) %448 = call float @llvm.maxnum.f32(float %446, float %51) %449 = call float @llvm.minnum.f32(float %447, float %52) %450 = call float @llvm.minnum.f32(float %448, float %53) %451 = bitcast float %449 to i32 %452 = bitcast float %450 to i32 %453 = insertelement <4 x i32> undef, i32 %451, i32 0 %454 = insertelement <4 x i32> %453, i32 %452, i32 1 %455 = insertelement <4 x i32> %454, i32 0, i32 2 %456 = bitcast <8 x i32> %37 to <32 x i8> %457 = bitcast <4 x i32> %39 to <16 x i8> %458 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %455, <32 x i8> %456, <16 x i8> %457, i32 2) %459 = extractelement <4 x float> %458, i32 3 %460 = fmul float %459, %26 %461 = call float @llvm.AMDIL.clamp.(float %460, float 0.000000e+00, float 1.000000e+00) %462 = fmul float %461, %33 %463 = fmul float %461, %34 %464 = fmul float %461, %35 %465 = bitcast float %449 to i32 %466 = bitcast float %450 to i32 %467 = insertelement <4 x i32> undef, i32 %465, i32 0 %468 = insertelement <4 x i32> %467, i32 %466, i32 1 %469 = insertelement <4 x i32> %468, i32 0, i32 2 %470 = bitcast <8 x i32> %41 to <32 x i8> %471 = bitcast <4 x i32> %43 to <16 x i8> %472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2) %473 = extractelement <4 x float> %472, i32 1 %474 = fcmp ogt float %473, 0.000000e+00 br i1 %474, label %IF47, label %ENDIF46 IF47: ; preds = %ENDIF43 %475 = extractelement <4 x float> %472, i32 2 %476 = fmul float %473, %475 %477 = call float @llvm.AMDIL.clamp.(float %476, float 0.000000e+00, float 1.000000e+00) %478 = bitcast float %449 to i32 %479 = bitcast float %450 to i32 %480 = insertelement <4 x i32> undef, i32 %478, i32 0 %481 = insertelement <4 x i32> %480, i32 %479, i32 1 %482 = insertelement <4 x i32> %481, i32 0, i32 2 %483 = bitcast <8 x i32> %45 to <32 x i8> %484 = bitcast <4 x i32> %47 to <16 x i8> %485 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %482, <32 x i8> %483, <16 x i8> %484, i32 2) %486 = extractelement <4 x float> %485, i32 0 %487 = extractelement <4 x float> %485, i32 1 %488 = extractelement <4 x float> %485, i32 2 %489 = fmul float %477, %486 %490 = fadd float %489, %462 %491 = fmul float %477, %487 %492 = fadd float %491, %463 %493 = fmul float %477, %488 %494 = fadd float %493, %464 br label %ENDIF46 ENDIF46: ; preds = %ENDIF43, %IF47 %temp20.6 = phi float [ %490, %IF47 ], [ %462, %ENDIF43 ] %temp21.6 = phi float [ %492, %IF47 ], [ %463, %ENDIF43 ] %temp22.6 = phi float [ %494, %IF47 ], [ %464, %ENDIF43 ] %495 = fmul float %temp20.6, 7.500000e-01 %496 = fadd float %495, %441 %497 = fmul float %temp21.6, 7.500000e-01 %498 = fadd float %497, %442 %499 = fmul float %temp22.6, 7.500000e-01 %500 = fadd float %499, %443 %501 = fmul float %24, 2.000000e+00 %502 = fadd float %501, %48 %503 = fmul float %25, 0.000000e+00 %504 = fadd float %503, %49 %505 = call float @llvm.maxnum.f32(float %502, float %50) %506 = call float @llvm.maxnum.f32(float %504, float %51) %507 = call float @llvm.minnum.f32(float %505, float %52) %508 = call float @llvm.minnum.f32(float %506, float %53) %509 = bitcast float %507 to i32 %510 = bitcast float %508 to i32 %511 = insertelement <4 x i32> undef, i32 %509, i32 0 %512 = insertelement <4 x i32> %511, i32 %510, i32 1 %513 = insertelement <4 x i32> %512, i32 0, i32 2 %514 = bitcast <8 x i32> %37 to <32 x i8> %515 = bitcast <4 x i32> %39 to <16 x i8> %516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2) %517 = extractelement <4 x float> %516, i32 3 %518 = fmul float %517, %26 %519 = call float @llvm.AMDIL.clamp.(float %518, float 0.000000e+00, float 1.000000e+00) %520 = fmul float %519, %33 %521 = fmul float %519, %34 %522 = fmul float %519, %35 %523 = bitcast float %507 to i32 %524 = bitcast float %508 to i32 %525 = insertelement <4 x i32> undef, i32 %523, i32 0 %526 = insertelement <4 x i32> %525, i32 %524, i32 1 %527 = insertelement <4 x i32> %526, i32 0, i32 2 %528 = bitcast <8 x i32> %41 to <32 x i8> %529 = bitcast <4 x i32> %43 to <16 x i8> %530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2) %531 = extractelement <4 x float> %530, i32 1 %532 = fcmp ogt float %531, 0.000000e+00 br i1 %532, label %IF50, label %ENDIF49 IF50: ; preds = %ENDIF46 %533 = extractelement <4 x float> %530, i32 2 %534 = fmul float %531, %533 %535 = call float @llvm.AMDIL.clamp.(float %534, float 0.000000e+00, float 1.000000e+00) %536 = bitcast float %507 to i32 %537 = bitcast float %508 to i32 %538 = insertelement <4 x i32> undef, i32 %536, i32 0 %539 = insertelement <4 x i32> %538, i32 %537, i32 1 %540 = insertelement <4 x i32> %539, i32 0, i32 2 %541 = bitcast <8 x i32> %45 to <32 x i8> %542 = bitcast <4 x i32> %47 to <16 x i8> %543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %541, <16 x i8> %542, i32 2) %544 = extractelement <4 x float> %543, i32 0 %545 = extractelement <4 x float> %543, i32 1 %546 = extractelement <4 x float> %543, i32 2 %547 = fmul float %535, %544 %548 = fadd float %547, %520 %549 = fmul float %535, %545 %550 = fadd float %549, %521 %551 = fmul float %535, %546 %552 = fadd float %551, %522 br label %ENDIF49 ENDIF49: ; preds = %ENDIF46, %IF50 %temp20.7 = phi float [ %548, %IF50 ], [ %520, %ENDIF46 ] %temp21.7 = phi float [ %550, %IF50 ], [ %521, %ENDIF46 ] %temp22.7 = phi float [ %552, %IF50 ], [ %522, %ENDIF46 ] %553 = fmul float %temp20.7, 5.000000e-01 %554 = fadd float %553, %496 %555 = fmul float %temp21.7, 5.000000e-01 %556 = fadd float %555, %498 %557 = fmul float %temp22.7, 5.000000e-01 %558 = fadd float %557, %500 %559 = fsub float %48, %24 %560 = fadd float %25, %49 %561 = call float @llvm.maxnum.f32(float %559, float %50) %562 = call float @llvm.maxnum.f32(float %560, float %51) %563 = call float @llvm.minnum.f32(float %561, float %52) %564 = call float @llvm.minnum.f32(float %562, float %53) %565 = bitcast float %563 to i32 %566 = bitcast float %564 to i32 %567 = insertelement <4 x i32> undef, i32 %565, i32 0 %568 = insertelement <4 x i32> %567, i32 %566, i32 1 %569 = insertelement <4 x i32> %568, i32 0, i32 2 %570 = bitcast <8 x i32> %37 to <32 x i8> %571 = bitcast <4 x i32> %39 to <16 x i8> %572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2) %573 = extractelement <4 x float> %572, i32 3 %574 = fmul float %573, %26 %575 = call float @llvm.AMDIL.clamp.(float %574, float 0.000000e+00, float 1.000000e+00) %576 = fmul float %575, %33 %577 = fmul float %575, %34 %578 = fmul float %575, %35 %579 = bitcast float %563 to i32 %580 = bitcast float %564 to i32 %581 = insertelement <4 x i32> undef, i32 %579, i32 0 %582 = insertelement <4 x i32> %581, i32 %580, i32 1 %583 = insertelement <4 x i32> %582, i32 0, i32 2 %584 = bitcast <8 x i32> %41 to <32 x i8> %585 = bitcast <4 x i32> %43 to <16 x i8> %586 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %583, <32 x i8> %584, <16 x i8> %585, i32 2) %587 = extractelement <4 x float> %586, i32 1 %588 = fcmp ogt float %587, 0.000000e+00 br i1 %588, label %IF53, label %ENDIF52 IF53: ; preds = %ENDIF49 %589 = extractelement <4 x float> %586, i32 2 %590 = fmul float %587, %589 %591 = call float @llvm.AMDIL.clamp.(float %590, float 0.000000e+00, float 1.000000e+00) %592 = bitcast float %563 to i32 %593 = bitcast float %564 to i32 %594 = insertelement <4 x i32> undef, i32 %592, i32 0 %595 = insertelement <4 x i32> %594, i32 %593, i32 1 %596 = insertelement <4 x i32> %595, i32 0, i32 2 %597 = bitcast <8 x i32> %45 to <32 x i8> %598 = bitcast <4 x i32> %47 to <16 x i8> %599 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %596, <32 x i8> %597, <16 x i8> %598, i32 2) %600 = extractelement <4 x float> %599, i32 0 %601 = extractelement <4 x float> %599, i32 1 %602 = extractelement <4 x float> %599, i32 2 %603 = fmul float %591, %600 %604 = fadd float %603, %576 %605 = fmul float %591, %601 %606 = fadd float %605, %577 %607 = fmul float %591, %602 %608 = fadd float %607, %578 br label %ENDIF52 ENDIF52: ; preds = %ENDIF49, %IF53 %temp20.8 = phi float [ %604, %IF53 ], [ %576, %ENDIF49 ] %temp21.8 = phi float [ %606, %IF53 ], [ %577, %ENDIF49 ] %temp22.8 = phi float [ %608, %IF53 ], [ %578, %ENDIF49 ] %609 = fmul float %temp20.8, 5.000000e-01 %610 = fadd float %609, %554 %611 = fmul float %temp21.8, 5.000000e-01 %612 = fadd float %611, %556 %613 = fmul float %temp22.8, 5.000000e-01 %614 = fadd float %613, %558 %615 = fmul float %24, 0.000000e+00 %616 = fadd float %615, %48 %617 = fadd float %25, %49 %618 = call float @llvm.maxnum.f32(float %616, float %50) %619 = call float @llvm.maxnum.f32(float %617, float %51) %620 = call float @llvm.minnum.f32(float %618, float %52) %621 = call float @llvm.minnum.f32(float %619, float %53) %622 = bitcast float %620 to i32 %623 = bitcast float %621 to i32 %624 = insertelement <4 x i32> undef, i32 %622, i32 0 %625 = insertelement <4 x i32> %624, i32 %623, i32 1 %626 = insertelement <4 x i32> %625, i32 0, i32 2 %627 = bitcast <8 x i32> %37 to <32 x i8> %628 = bitcast <4 x i32> %39 to <16 x i8> %629 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %626, <32 x i8> %627, <16 x i8> %628, i32 2) %630 = extractelement <4 x float> %629, i32 3 %631 = fmul float %630, %26 %632 = call float @llvm.AMDIL.clamp.(float %631, float 0.000000e+00, float 1.000000e+00) %633 = fmul float %632, %33 %634 = fmul float %632, %34 %635 = fmul float %632, %35 %636 = bitcast float %620 to i32 %637 = bitcast float %621 to i32 %638 = insertelement <4 x i32> undef, i32 %636, i32 0 %639 = insertelement <4 x i32> %638, i32 %637, i32 1 %640 = insertelement <4 x i32> %639, i32 0, i32 2 %641 = bitcast <8 x i32> %41 to <32 x i8> %642 = bitcast <4 x i32> %43 to <16 x i8> %643 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %640, <32 x i8> %641, <16 x i8> %642, i32 2) %644 = extractelement <4 x float> %643, i32 1 %645 = fcmp ogt float %644, 0.000000e+00 br i1 %645, label %IF56, label %ENDIF55 IF56: ; preds = %ENDIF52 %646 = extractelement <4 x float> %643, i32 2 %647 = fmul float %644, %646 %648 = call float @llvm.AMDIL.clamp.(float %647, float 0.000000e+00, float 1.000000e+00) %649 = bitcast float %620 to i32 %650 = bitcast float %621 to i32 %651 = insertelement <4 x i32> undef, i32 %649, i32 0 %652 = insertelement <4 x i32> %651, i32 %650, i32 1 %653 = insertelement <4 x i32> %652, i32 0, i32 2 %654 = bitcast <8 x i32> %45 to <32 x i8> %655 = bitcast <4 x i32> %47 to <16 x i8> %656 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %653, <32 x i8> %654, <16 x i8> %655, i32 2) %657 = extractelement <4 x float> %656, i32 0 %658 = extractelement <4 x float> %656, i32 1 %659 = extractelement <4 x float> %656, i32 2 %660 = fmul float %648, %657 %661 = fadd float %660, %633 %662 = fmul float %648, %658 %663 = fadd float %662, %634 %664 = fmul float %648, %659 %665 = fadd float %664, %635 br label %ENDIF55 ENDIF55: ; preds = %ENDIF52, %IF56 %temp20.9 = phi float [ %661, %IF56 ], [ %633, %ENDIF52 ] %temp21.9 = phi float [ %663, %IF56 ], [ %634, %ENDIF52 ] %temp22.9 = phi float [ %665, %IF56 ], [ %635, %ENDIF52 ] %666 = fmul float %temp20.9, 7.500000e-01 %667 = fadd float %666, %610 %668 = fmul float %temp21.9, 7.500000e-01 %669 = fadd float %668, %612 %670 = fmul float %temp22.9, 7.500000e-01 %671 = fadd float %670, %614 %672 = fadd float %48, %24 %673 = fadd float %49, %25 %674 = call float @llvm.maxnum.f32(float %672, float %50) %675 = call float @llvm.maxnum.f32(float %673, float %51) %676 = call float @llvm.minnum.f32(float %674, float %52) %677 = call float @llvm.minnum.f32(float %675, float %53) %678 = bitcast float %676 to i32 %679 = bitcast float %677 to i32 %680 = insertelement <4 x i32> undef, i32 %678, i32 0 %681 = insertelement <4 x i32> %680, i32 %679, i32 1 %682 = insertelement <4 x i32> %681, i32 0, i32 2 %683 = bitcast <8 x i32> %37 to <32 x i8> %684 = bitcast <4 x i32> %39 to <16 x i8> %685 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %682, <32 x i8> %683, <16 x i8> %684, i32 2) %686 = extractelement <4 x float> %685, i32 3 %687 = fmul float %686, %26 %688 = call float @llvm.AMDIL.clamp.(float %687, float 0.000000e+00, float 1.000000e+00) %689 = fmul float %688, %33 %690 = fmul float %688, %34 %691 = fmul float %688, %35 %692 = bitcast float %676 to i32 %693 = bitcast float %677 to i32 %694 = insertelement <4 x i32> undef, i32 %692, i32 0 %695 = insertelement <4 x i32> %694, i32 %693, i32 1 %696 = insertelement <4 x i32> %695, i32 0, i32 2 %697 = bitcast <8 x i32> %41 to <32 x i8> %698 = bitcast <4 x i32> %43 to <16 x i8> %699 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %696, <32 x i8> %697, <16 x i8> %698, i32 2) %700 = extractelement <4 x float> %699, i32 1 %701 = fcmp ogt float %700, 0.000000e+00 br i1 %701, label %IF59, label %ENDIF58 IF59: ; preds = %ENDIF55 %702 = extractelement <4 x float> %699, i32 2 %703 = fmul float %700, %702 %704 = call float @llvm.AMDIL.clamp.(float %703, float 0.000000e+00, float 1.000000e+00) %705 = bitcast float %676 to i32 %706 = bitcast float %677 to i32 %707 = insertelement <4 x i32> undef, i32 %705, i32 0 %708 = insertelement <4 x i32> %707, i32 %706, i32 1 %709 = insertelement <4 x i32> %708, i32 0, i32 2 %710 = bitcast <8 x i32> %45 to <32 x i8> %711 = bitcast <4 x i32> %47 to <16 x i8> %712 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %709, <32 x i8> %710, <16 x i8> %711, i32 2) %713 = extractelement <4 x float> %712, i32 0 %714 = extractelement <4 x float> %712, i32 1 %715 = extractelement <4 x float> %712, i32 2 %716 = fmul float %704, %713 %717 = fadd float %716, %689 %718 = fmul float %704, %714 %719 = fadd float %718, %690 %720 = fmul float %704, %715 %721 = fadd float %720, %691 br label %ENDIF58 ENDIF58: ; preds = %ENDIF55, %IF59 %temp20.10 = phi float [ %717, %IF59 ], [ %689, %ENDIF55 ] %temp21.10 = phi float [ %719, %IF59 ], [ %690, %ENDIF55 ] %temp22.10 = phi float [ %721, %IF59 ], [ %691, %ENDIF55 ] %722 = fmul float %temp20.10, 5.000000e-01 %723 = fadd float %722, %667 %724 = fmul float %temp21.10, 5.000000e-01 %725 = fadd float %724, %669 %726 = fmul float %temp22.10, 5.000000e-01 %727 = fadd float %726, %671 %728 = fmul float %24, 0.000000e+00 %729 = fadd float %728, %48 %730 = fmul float %25, 2.000000e+00 %731 = fadd float %730, %49 %732 = call float @llvm.maxnum.f32(float %729, float %50) %733 = call float @llvm.maxnum.f32(float %731, float %51) %734 = call float @llvm.minnum.f32(float %732, float %52) %735 = call float @llvm.minnum.f32(float %733, float %53) %736 = bitcast float %734 to i32 %737 = bitcast float %735 to i32 %738 = insertelement <4 x i32> undef, i32 %736, i32 0 %739 = insertelement <4 x i32> %738, i32 %737, i32 1 %740 = insertelement <4 x i32> %739, i32 0, i32 2 %741 = bitcast <8 x i32> %37 to <32 x i8> %742 = bitcast <4 x i32> %39 to <16 x i8> %743 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %740, <32 x i8> %741, <16 x i8> %742, i32 2) %744 = extractelement <4 x float> %743, i32 3 %745 = fmul float %744, %26 %746 = call float @llvm.AMDIL.clamp.(float %745, float 0.000000e+00, float 1.000000e+00) %747 = fmul float %746, %33 %748 = fmul float %746, %34 %749 = fmul float %746, %35 %750 = bitcast float %734 to i32 %751 = bitcast float %735 to i32 %752 = insertelement <4 x i32> undef, i32 %750, i32 0 %753 = insertelement <4 x i32> %752, i32 %751, i32 1 %754 = insertelement <4 x i32> %753, i32 0, i32 2 %755 = bitcast <8 x i32> %41 to <32 x i8> %756 = bitcast <4 x i32> %43 to <16 x i8> %757 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %754, <32 x i8> %755, <16 x i8> %756, i32 2) %758 = extractelement <4 x float> %757, i32 1 %759 = fcmp ogt float %758, 0.000000e+00 br i1 %759, label %IF62, label %ENDIF61 IF62: ; preds = %ENDIF58 %760 = extractelement <4 x float> %757, i32 2 %761 = fmul float %758, %760 %762 = call float @llvm.AMDIL.clamp.(float %761, float 0.000000e+00, float 1.000000e+00) %763 = bitcast float %734 to i32 %764 = bitcast float %735 to i32 %765 = insertelement <4 x i32> undef, i32 %763, i32 0 %766 = insertelement <4 x i32> %765, i32 %764, i32 1 %767 = insertelement <4 x i32> %766, i32 0, i32 2 %768 = bitcast <8 x i32> %45 to <32 x i8> %769 = bitcast <4 x i32> %47 to <16 x i8> %770 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %767, <32 x i8> %768, <16 x i8> %769, i32 2) %771 = extractelement <4 x float> %770, i32 0 %772 = extractelement <4 x float> %770, i32 1 %773 = extractelement <4 x float> %770, i32 2 %774 = fmul float %762, %771 %775 = fadd float %774, %747 %776 = fmul float %762, %772 %777 = fadd float %776, %748 %778 = fmul float %762, %773 %779 = fadd float %778, %749 br label %ENDIF61 ENDIF61: ; preds = %ENDIF58, %IF62 %temp16.1 = phi float [ %775, %IF62 ], [ %747, %ENDIF58 ] %temp17.1 = phi float [ %777, %IF62 ], [ %748, %ENDIF58 ] %temp18.1 = phi float [ %779, %IF62 ], [ %749, %ENDIF58 ] %780 = fmul float %temp16.1, 5.000000e-01 %781 = fadd float %780, %723 %782 = fmul float %temp17.1, 5.000000e-01 %783 = fadd float %782, %725 %784 = fmul float %temp18.1, 5.000000e-01 %785 = fadd float %784, %727 %786 = fmul float %781, 1.250000e-01 %787 = fmul float %783, 1.250000e-01 %788 = fmul float %785, 1.250000e-01 %789 = call i32 @llvm.SI.packf16(float %786, float %787) %790 = bitcast i32 %789 to float %791 = call i32 @llvm.SI.packf16(float %788, float 1.000000e+00) %792 = bitcast i32 %791 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %790, float %792, float %790, float %792) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s17, s[12:15], 0x0 ; C2088D00 s_buffer_load_dword s16, s[12:15], 0x1 ; C2080D01 s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04 s_buffer_load_dword s8, s[12:15], 0x8 ; C2040D08 s_buffer_load_dword s10, s[12:15], 0x9 ; C2050D09 s_buffer_load_dword s11, s[12:15], 0xa ; C2058D0A s_buffer_load_dword s12, s[12:15], 0xb ; C2060D0B s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s1, s[20:23], 0xc ; C200950C s_buffer_load_dword s2, s[20:23], 0xd ; C201150D s_buffer_load_dword s3, s[20:23], 0xe ; C201950E v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mul_f32_e32 v5, s17, v0 ; 100A0011 v_mov_b32_e32 v0, s10 ; 7E00020A v_mul_f32_e32 v6, s16, v0 ; 100C0010 v_mov_b32_e32 v0, s11 ; 7E00020B v_mul_f32_e32 v7, s17, v0 ; 100E0011 v_mov_b32_e32 v0, s12 ; 7E00020C v_mul_f32_e32 v8, s16, v0 ; 10100010 v_mad_f32 v0, -2.0, s16, v4 ; D2820000 041020F5 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 v_max_f32_e32 v0, v6, v0 ; 20000106 v_max_f32_e32 v1, v5, v3 ; 20020705 v_min_f32_e32 v9, v7, v1 ; 1E120307 v_min_f32_e32 v10, v8, v0 ; 1E140108 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[4:11], s[6:7], 0x10 ; C0C20710 v_mov_b32_e32 v11, 0 ; 7E160280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v0, 8, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[36:43], s[32:35] ; F0900800 01090009 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s0, v0 ; 10000000 v_add_f32_e64 v2, 0, v0 clamp ; D2060802 00020080 v_mul_f32_e32 v0, s1, v2 ; 10000401 v_mul_f32_e32 v1, s2, v2 ; 10020402 v_mul_f32_e32 v2, s3, v2 ; 10040403 image_sample_l v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[24:31], s[20:23] ; F0900F00 00A60B09 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v12 ; 7C021880 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v12, v13, v12 ; 1018190D v_mov_b32_e32 v11, 0 ; 7E160280 image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[4:11], s[12:15] ; F0900700 00610909 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v9, v12 ; 3E001909 v_mac_f32_e32 v1, v10, v12 ; 3E02190A v_mac_f32_e32 v2, v11, v12 ; 3E04190B s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v9, s17, v3 ; 0A120611 v_subrev_f32_e32 v10, s16, v4 ; 0A140810 v_max_f32_e32 v9, v5, v9 ; 20121305 v_max_f32_e32 v10, v6, v10 ; 20141506 v_min_f32_e32 v12, v7, v9 ; 1E181307 v_min_f32_e32 v13, v8, v10 ; 1E1A1508 v_mov_b32_e32 v14, 0 ; 7E1C0280 image_sample_l v9, 8, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[36:43], s[32:35] ; F0900800 0109090C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, s0, v9 ; 10121200 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_mul_f32_e32 v10, s1, v9 ; 10141201 v_mul_f32_e32 v11, s2, v9 ; 10161202 v_mul_f32_e32 v9, s3, v9 ; 10121203 image_sample_l v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[24:31], s[20:23] ; F0900F00 00A60E0C s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_mov_b32_e32 v14, 0 ; 7E1C0280 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[12:15], s[4:11], s[12:15] ; F0900700 00610C0C v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v12, v15 ; 3E141F0C v_mac_f32_e32 v11, v13, v15 ; 3E161F0D v_mac_f32_e32 v9, v14, v15 ; 3E121F0E s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v12, s16, v4 ; 0A180810 v_max_f32_e32 v13, v5, v3 ; 201A0705 v_max_f32_e32 v12, v6, v12 ; 20181906 v_min_f32_e32 v15, v7, v13 ; 1E1E1B07 v_min_f32_e32 v16, v8, v12 ; 1E201908 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v12, 8, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[36:43], s[32:35] ; F0900800 01090C0F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, s0, v12 ; 10181800 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_mul_f32_e32 v14, s1, v12 ; 101C1801 v_mul_f32_e32 v13, s2, v12 ; 101A1802 v_mul_f32_e32 v12, s3, v12 ; 10181803 image_sample_l v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[24:31], s[20:23] ; F0900F00 00A6110F s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v18, v19, v18 ; 10242513 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[12:15] ; F0900700 00610F0F v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v14, v15, v18 ; 3E1C250F v_mac_f32_e32 v13, v16, v18 ; 3E1A2510 v_mac_f32_e32 v12, v17, v18 ; 3E182511 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v15, s17, v3 ; 061E0611 v_subrev_f32_e32 v16, s16, v4 ; 0A200810 v_max_f32_e32 v15, v5, v15 ; 201E1F05 v_max_f32_e32 v16, v6, v16 ; 20202106 v_min_f32_e32 v18, v7, v15 ; 1E241F07 v_min_f32_e32 v19, v8, v16 ; 1E262108 v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v15, 8, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[36:43], s[32:35] ; F0900800 01090F12 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, s0, v15 ; 101E1E00 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_mul_f32_e32 v17, s1, v15 ; 10221E01 v_mul_f32_e32 v16, s2, v15 ; 10201E02 v_mul_f32_e32 v15, s3, v15 ; 101E1E03 image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[24:31], s[20:23] ; F0900F00 00A61412 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[4:11], s[12:15] ; F0900700 00611212 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v17, v18, v21 ; 3E222B12 v_mac_f32_e32 v16, v19, v21 ; 3E202B13 v_mac_f32_e32 v15, v20, v21 ; 3E1E2B14 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v18, -2.0, s17, v3 ; D2820012 040C22F5 v_max_f32_e32 v18, v5, v18 ; 20242505 v_max_f32_e32 v19, v6, v4 ; 20260906 v_min_f32_e32 v21, v7, v18 ; 1E2A2507 v_min_f32_e32 v22, v8, v19 ; 1E2C2708 v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v18, 8, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[36:43], s[32:35] ; F0900800 01091215 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, s0, v18 ; 10242400 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_mul_f32_e32 v20, s1, v18 ; 10282401 v_mul_f32_e32 v19, s2, v18 ; 10262402 v_mul_f32_e32 v18, s3, v18 ; 10242403 image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[24:31], s[20:23] ; F0900F00 00A61715 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v24, v25, v24 ; 10303119 v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[4:11], s[12:15] ; F0900700 00611515 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v20, v21, v24 ; 3E283115 v_mac_f32_e32 v19, v22, v24 ; 3E263116 v_mac_f32_e32 v18, v23, v24 ; 3E243117 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v21, s17, v3 ; 0A2A0611 v_max_f32_e32 v21, v5, v21 ; 202A2B05 v_max_f32_e32 v22, v6, v4 ; 202C0906 v_min_f32_e32 v24, v7, v21 ; 1E302B07 v_min_f32_e32 v25, v8, v22 ; 1E322D08 v_mov_b32_e32 v26, 0 ; 7E340280 image_sample_l v21, 8, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[36:43], s[32:35] ; F0900800 01091518 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, s0, v21 ; 102A2A00 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_mul_f32_e32 v23, s1, v21 ; 102E2A01 v_mul_f32_e32 v22, s2, v21 ; 102C2A02 v_mul_f32_e32 v21, s3, v21 ; 102A2A03 image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[24:31], s[20:23] ; F0900F00 00A61A18 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mov_b32_e32 v26, 0 ; 7E340280 image_sample_l v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[4:11], s[12:15] ; F0900700 00611818 v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v23, v24, v27 ; 3E2E3718 v_mac_f32_e32 v22, v25, v27 ; 3E2C3719 v_mac_f32_e32 v21, v26, v27 ; 3E2A371A s_or_b64 exec, exec, s[18:19] ; 88FE127E v_max_f32_e32 v24, v5, v3 ; 20300705 v_max_f32_e32 v25, v6, v4 ; 20320906 v_min_f32_e32 v27, v7, v24 ; 1E363107 v_min_f32_e32 v28, v8, v25 ; 1E383308 v_mov_b32_e32 v29, 0 ; 7E3A0280 image_sample_l v24, 8, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[32:35] ; F0900800 0109181B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, s0, v24 ; 10303000 v_add_f32_e64 v26, 0, v24 clamp ; D206081A 00023080 v_mul_f32_e32 v24, s1, v26 ; 10303401 v_mul_f32_e32 v25, s2, v26 ; 10323402 v_mul_f32_e32 v26, s3, v26 ; 10343403 image_sample_l v[29:32], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[24:31], s[20:23] ; F0900F00 00A61D1B s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_mov_b32_e32 v29, 0 ; 7E3A0280 image_sample_l v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[4:11], s[12:15] ; F0900700 00611B1B v_add_f32_e64 v30, 0, v30 clamp ; D206081E 00023C80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v24, v27, v30 ; 3E303D1B v_mac_f32_e32 v25, v28, v30 ; 3E323D1C v_mac_f32_e32 v26, v29, v30 ; 3E343D1D s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v27, s17, v3 ; 06360611 v_max_f32_e32 v27, v5, v27 ; 20363705 v_max_f32_e32 v28, v6, v4 ; 20380906 v_min_f32_e32 v30, v7, v27 ; 1E3C3707 v_min_f32_e32 v31, v8, v28 ; 1E3E3908 v_mov_b32_e32 v32, 0 ; 7E400280 image_sample_l v27, 8, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[36:43], s[32:35] ; F0900800 01091B1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, s0, v27 ; 10363600 v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 v_mul_f32_e32 v29, s1, v27 ; 103A3601 v_mul_f32_e32 v28, s2, v27 ; 10383602 v_mul_f32_e32 v27, s3, v27 ; 10363603 image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[24:31], s[20:23] ; F0900F00 00A6201E s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v33 ; 7C024280 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v33, v34, v33 ; 10424322 v_mov_b32_e32 v32, 0 ; 7E400280 image_sample_l v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[4:11], s[12:15] ; F0900700 00611E1E v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v29, v30, v33 ; 3E3A431E v_mac_f32_e32 v28, v31, v33 ; 3E38431F v_mac_f32_e32 v27, v32, v33 ; 3E364320 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mad_f32 v30, 2.0, s17, v3 ; D282001E 040C22F4 v_max_f32_e32 v30, v5, v30 ; 203C3D05 v_max_f32_e32 v31, v6, v4 ; 203E0906 v_min_f32_e32 v33, v7, v30 ; 1E423D07 v_min_f32_e32 v34, v8, v31 ; 1E443F08 v_mov_b32_e32 v35, 0 ; 7E460280 image_sample_l v30, 8, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[36:43], s[32:35] ; F0900800 01091E21 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, s0, v30 ; 103C3C00 v_add_f32_e64 v30, 0, v30 clamp ; D206081E 00023C80 v_mul_f32_e32 v32, s1, v30 ; 10403C01 v_mul_f32_e32 v31, s2, v30 ; 103E3C02 v_mul_f32_e32 v30, s3, v30 ; 103C3C03 image_sample_l v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[24:31], s[20:23] ; F0900F00 00A62321 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v36 ; 7C024880 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v36, v37, v36 ; 10484925 v_mov_b32_e32 v35, 0 ; 7E460280 image_sample_l v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[4:11], s[12:15] ; F0900700 00612121 v_add_f32_e64 v36, 0, v36 clamp ; D2060824 00024880 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v32, v33, v36 ; 3E404921 v_mac_f32_e32 v31, v34, v36 ; 3E3E4922 v_mac_f32_e32 v30, v35, v36 ; 3E3C4923 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v33, s17, v3 ; 0A420611 v_add_f32_e32 v34, s16, v4 ; 06440810 v_max_f32_e32 v33, v5, v33 ; 20424305 v_max_f32_e32 v34, v6, v34 ; 20444506 v_min_f32_e32 v36, v7, v33 ; 1E484307 v_min_f32_e32 v37, v8, v34 ; 1E4A4508 v_mov_b32_e32 v38, 0 ; 7E4C0280 image_sample_l v33, 8, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[36:43], s[32:35] ; F0900800 01092124 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, s0, v33 ; 10424200 v_add_f32_e64 v33, 0, v33 clamp ; D2060821 00024280 v_mul_f32_e32 v35, s1, v33 ; 10464201 v_mul_f32_e32 v34, s2, v33 ; 10444202 v_mul_f32_e32 v33, s3, v33 ; 10424203 image_sample_l v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[24:31], s[20:23] ; F0900F00 00A62624 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v39 ; 7C024E80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v39, v40, v39 ; 104E4F28 v_mov_b32_e32 v38, 0 ; 7E4C0280 image_sample_l v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[4:11], s[12:15] ; F0900700 00612424 v_add_f32_e64 v39, 0, v39 clamp ; D2060827 00024E80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v35, v36, v39 ; 3E464F24 v_mac_f32_e32 v34, v37, v39 ; 3E444F25 v_mac_f32_e32 v33, v38, v39 ; 3E424F26 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v36, s16, v4 ; 06480810 v_max_f32_e32 v37, v5, v3 ; 204A0705 v_max_f32_e32 v36, v6, v36 ; 20484906 v_min_f32_e32 v39, v7, v37 ; 1E4E4B07 v_min_f32_e32 v40, v8, v36 ; 1E504908 v_mov_b32_e32 v41, 0 ; 7E520280 image_sample_l v36, 8, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[36:43], s[32:35] ; F0900800 01092427 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, s0, v36 ; 10484800 v_add_f32_e64 v36, 0, v36 clamp ; D2060824 00024880 v_mul_f32_e32 v38, s1, v36 ; 104C4801 v_mul_f32_e32 v37, s2, v36 ; 104A4802 v_mul_f32_e32 v36, s3, v36 ; 10484803 image_sample_l v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[24:31], s[20:23] ; F0900F00 00A62927 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v42 ; 7C025480 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v42, v43, v42 ; 1054552B v_mov_b32_e32 v41, 0 ; 7E520280 image_sample_l v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[4:11], s[12:15] ; F0900700 00612727 v_add_f32_e64 v42, 0, v42 clamp ; D206082A 00025480 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v38, v39, v42 ; 3E4C5527 v_mac_f32_e32 v37, v40, v42 ; 3E4A5528 v_mac_f32_e32 v36, v41, v42 ; 3E485529 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v39, s17, v3 ; 064E0611 v_add_f32_e32 v40, s16, v4 ; 06500810 v_max_f32_e32 v39, v5, v39 ; 204E4F05 v_max_f32_e32 v40, v6, v40 ; 20505106 v_min_f32_e32 v42, v7, v39 ; 1E544F07 v_min_f32_e32 v43, v8, v40 ; 1E565108 v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v39, 8, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[36:43], s[32:35] ; F0900800 0109272A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v39, s0, v39 ; 104E4E00 v_add_f32_e64 v39, 0, v39 clamp ; D2060827 00024E80 v_mul_f32_e32 v41, s1, v39 ; 10524E01 v_mul_f32_e32 v40, s2, v39 ; 10504E02 v_mul_f32_e32 v39, s3, v39 ; 104E4E03 image_sample_l v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[20:23] ; F0900F00 00A62C2A s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mul_f32_e32 v45, v46, v45 ; 105A5B2E v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[4:11], s[12:15] ; F0900700 00612A2A v_add_f32_e64 v45, 0, v45 clamp ; D206082D 00025A80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v41, v42, v45 ; 3E525B2A v_mac_f32_e32 v40, v43, v45 ; 3E505B2B v_mac_f32_e32 v39, v44, v45 ; 3E4E5B2C s_or_b64 exec, exec, s[18:19] ; 88FE127E v_mac_f32_e64 v4, 2.0, s16 ; D23E0004 000020F4 v_max_f32_e32 v3, v5, v3 ; 20060705 v_max_f32_e32 v4, v6, v4 ; 20080906 v_min_f32_e32 v42, v7, v3 ; 1E540707 v_min_f32_e32 v43, v8, v4 ; 1E560908 v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v3, 8, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[36:43], s[32:35] ; F0900800 0109032A image_sample_l v[44:47], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[20:23] ; F0900F00 00A62C2A s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v3, s0, v3 ; 10060600 v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_mul_f32_e32 v5, s1, v3 ; 100A0601 v_mul_f32_e32 v4, s2, v3 ; 10080602 v_mul_f32_e32 v3, s3, v3 ; 10060603 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e32 vcc, 0, v45 ; 7C025A80 s_and_saveexec_b64 s[0:1], vcc ; BE80246A s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E v_mul_f32_e32 v6, v46, v45 ; 100C5B2E v_mov_b32_e32 v44, 0 ; 7E580280 image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[4:11], s[12:15] ; F0900700 00612A2A v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v5, v42, v6 ; 3E0A0D2A v_mac_f32_e32 v4, v43, v6 ; 3E080D2B v_mac_f32_e32 v3, v44, v6 ; 3E060D2C s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mac_f32_e32 v0, 0.5, v10 ; 3E0014F0 v_mul_f32_e32 v1, 0.5, v1 ; 100202F0 v_mac_f32_e32 v1, 0.5, v11 ; 3E0216F0 v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 v_mac_f32_e32 v2, 0.5, v9 ; 3E0412F0 v_mov_b32_e32 v6, 0x3f400000 ; 7E0C02FF 3F400000 v_mac_f32_e32 v0, v6, v14 ; 3E001D06 v_mac_f32_e32 v1, v6, v13 ; 3E021B06 v_mac_f32_e32 v2, v6, v12 ; 3E041906 v_mac_f32_e32 v0, 0.5, v17 ; 3E0022F0 v_mac_f32_e32 v1, 0.5, v16 ; 3E0220F0 v_mac_f32_e32 v2, 0.5, v15 ; 3E041EF0 v_mac_f32_e32 v0, 0.5, v20 ; 3E0028F0 v_mac_f32_e32 v1, 0.5, v19 ; 3E0226F0 v_mac_f32_e32 v2, 0.5, v18 ; 3E0424F0 v_mac_f32_e32 v0, v6, v23 ; 3E002F06 v_mac_f32_e32 v1, v6, v22 ; 3E022D06 v_mac_f32_e32 v2, v6, v21 ; 3E042B06 v_add_f32_e32 v0, v24, v0 ; 06000118 v_add_f32_e32 v1, v25, v1 ; 06020319 v_add_f32_e32 v2, v26, v2 ; 0604051A v_mac_f32_e32 v0, v6, v29 ; 3E003B06 v_mac_f32_e32 v1, v6, v28 ; 3E023906 v_mac_f32_e32 v2, v6, v27 ; 3E043706 v_mac_f32_e32 v0, 0.5, v32 ; 3E0040F0 v_mac_f32_e32 v1, 0.5, v31 ; 3E023EF0 v_mac_f32_e32 v2, 0.5, v30 ; 3E043CF0 v_mac_f32_e32 v0, 0.5, v35 ; 3E0046F0 v_mac_f32_e32 v1, 0.5, v34 ; 3E0244F0 v_mac_f32_e32 v2, 0.5, v33 ; 3E0442F0 v_mac_f32_e32 v0, v6, v38 ; 3E004D06 v_mac_f32_e32 v1, v6, v37 ; 3E024B06 v_mac_f32_e32 v2, v6, v36 ; 3E044906 v_mac_f32_e32 v0, 0.5, v41 ; 3E0052F0 v_mac_f32_e32 v1, 0.5, v40 ; 3E0250F0 v_mac_f32_e32 v2, 0.5, v39 ; 3E044EF0 v_mac_f32_e32 v0, 0.5, v5 ; 3E000AF0 v_mac_f32_e32 v1, 0.5, v4 ; 3E0208F0 v_mac_f32_e32 v2, 0.5, v3 ; 3E0406F0 v_mov_b32_e32 v3, 0x3e000000 ; 7E0602FF 3E000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 48 Code Size: 2036 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {1, 416, 424, 0} IMM[1] FLT32 { 0.0000, -3.0962, -1.2769, 1.2769} IMM[2] FLT32 { 3.0962, 0.0198, 0.3206, 0.3192} 0: ADD TEMP[0].xy, CONST[2][26].xyyy, CONST[2][26].zwww 1: MOV TEMP[1].x, CONST[2][26].xxxx 2: MOV TEMP[1].y, CONST[2][26].yyyy 3: MOV TEMP[1].z, TEMP[0].xxxx 4: MOV TEMP[1].w, TEMP[0].yyyy 5: MUL TEMP[0].xz, TEMP[1], CONST[1][0].xyxy 6: MOV TEMP[1].y, IMM[1].xxxx 7: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[1].yyyy 8: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 9: MOV TEMP[2].y, TEMP[1].yyyy 10: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 11: MIN TEMP[2].x, TEMP[1].xxxx, TEMP[0].zzzz 12: MOV TEMP[1].y, IMM[1].xxxx 13: MUL TEMP[1].x, CONST[1][0].xxxx, IMM[1].zzzz 14: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 15: MOV TEMP[3].y, TEMP[1].yyyy 16: MAX TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 17: MIN TEMP[3].x, TEMP[1].xxxx, TEMP[0].zzzz 18: MOV TEMP[1].y, IN[0].yyyy 19: MAX TEMP[4].x, IN[0].xxxx, TEMP[0].xxxx 20: MIN TEMP[1].x, TEMP[4].xxxx, TEMP[0].zzzz 21: MOV TEMP[4].y, IMM[1].xxxx 22: MUL TEMP[4].x, CONST[1][0].xxxx, IMM[1].wwww 23: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 24: MOV TEMP[5].y, TEMP[4].yyyy 25: MAX TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 26: MIN TEMP[5].x, TEMP[4].xxxx, TEMP[0].zzzz 27: MOV TEMP[4].y, IMM[1].xxxx 28: MUL TEMP[4].x, CONST[1][0].xxxx, IMM[2].xxxx 29: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 30: MOV TEMP[6].y, TEMP[4].yyyy 31: MAX TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 32: MIN TEMP[6].x, TEMP[4].xxxx, TEMP[0].zzzz 33: MOV TEMP[0].xy, TEMP[6].xyyy 34: TEX TEMP[0], TEMP[0], SAMP[0], 2D 35: MOV TEMP[4].xy, TEMP[5].xyyy 36: TEX TEMP[4], TEMP[4], SAMP[0], 2D 37: MOV TEMP[1].xy, TEMP[1].xyyy 38: TEX TEMP[1], TEMP[1], SAMP[0], 2D 39: MOV TEMP[2].xy, TEMP[2].xyyy 40: TEX TEMP[2], TEMP[2], SAMP[0], 2D 41: MOV TEMP[3].xy, TEMP[3].xyyy 42: TEX TEMP[3], TEMP[3], SAMP[0], 2D 43: MUL TEMP[3], IMM[2].zzzz, TEMP[3] 44: MAD TEMP[2], IMM[2].yyyy, TEMP[2], TEMP[3] 45: MAD TEMP[1], IMM[2].wwww, TEMP[1], TEMP[2] 46: MAD TEMP[1], IMM[2].zzzz, TEMP[4], TEMP[1] 47: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 48: MOV OUT[0], TEMP[0] 49: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 416) %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 424) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fadd float %27, %28 %36 = fmul float %27, %24 %37 = fmul float %35, %24 %38 = fmul float %24, 0xC008C50F00000000 %39 = fadd float %38, %33 %40 = fadd float %34, 0.000000e+00 %41 = call float @llvm.maxnum.f32(float %39, float %36) %42 = call float @llvm.minnum.f32(float %41, float %37) %43 = fmul float %24, 0xBFF46E19C0000000 %44 = fadd float %43, %33 %45 = fadd float %34, 0.000000e+00 %46 = call float @llvm.maxnum.f32(float %44, float %36) %47 = call float @llvm.minnum.f32(float %46, float %37) %48 = call float @llvm.maxnum.f32(float %33, float %36) %49 = call float @llvm.minnum.f32(float %48, float %37) %50 = fmul float %24, 0x3FF46E19C0000000 %51 = fadd float %50, %33 %52 = fadd float %34, 0.000000e+00 %53 = call float @llvm.maxnum.f32(float %51, float %36) %54 = call float @llvm.minnum.f32(float %53, float %37) %55 = fmul float %24, 0x4008C50F00000000 %56 = fadd float %55, %33 %57 = fadd float %34, 0.000000e+00 %58 = call float @llvm.maxnum.f32(float %56, float %36) %59 = call float @llvm.minnum.f32(float %58, float %37) %60 = bitcast float %59 to i32 %61 = bitcast float %57 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %30 to <32 x i8> %65 = bitcast <4 x i32> %32 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %54 to i32 %72 = bitcast float %52 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %30 to <32 x i8> %76 = bitcast <4 x i32> %32 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = bitcast float %49 to i32 %83 = bitcast float %34 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = bitcast <8 x i32> %30 to <32 x i8> %87 = bitcast <4 x i32> %32 to <16 x i8> %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %86, <16 x i8> %87, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = extractelement <4 x float> %88, i32 3 %93 = bitcast float %42 to i32 %94 = bitcast float %40 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %30 to <32 x i8> %98 = bitcast <4 x i32> %32 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = bitcast float %47 to i32 %105 = bitcast float %45 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <8 x i32> %30 to <32 x i8> %109 = bitcast <4 x i32> %32 to <16 x i8> %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %108, <16 x i8> %109, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %111, 0x3FD4841240000000 %116 = fmul float %112, 0x3FD4841240000000 %117 = fmul float %113, 0x3FD4841240000000 %118 = fmul float %114, 0x3FD4841240000000 %119 = fmul float %100, 0x3F944D8780000000 %120 = fadd float %119, %115 %121 = fmul float %101, 0x3F944D8780000000 %122 = fadd float %121, %116 %123 = fmul float %102, 0x3F944D8780000000 %124 = fadd float %123, %117 %125 = fmul float %103, 0x3F944D8780000000 %126 = fadd float %125, %118 %127 = fmul float %89, 0x3FD46E2A80000000 %128 = fadd float %127, %120 %129 = fmul float %90, 0x3FD46E2A80000000 %130 = fadd float %129, %122 %131 = fmul float %91, 0x3FD46E2A80000000 %132 = fadd float %131, %124 %133 = fmul float %92, 0x3FD46E2A80000000 %134 = fadd float %133, %126 %135 = fmul float %78, 0x3FD4841240000000 %136 = fadd float %135, %128 %137 = fmul float %79, 0x3FD4841240000000 %138 = fadd float %137, %130 %139 = fmul float %80, 0x3FD4841240000000 %140 = fadd float %139, %132 %141 = fmul float %81, 0x3FD4841240000000 %142 = fadd float %141, %134 %143 = fmul float %67, 0x3F944D8780000000 %144 = fadd float %143, %136 %145 = fmul float %68, 0x3F944D8780000000 %146 = fadd float %145, %138 %147 = fmul float %69, 0x3F944D8780000000 %148 = fadd float %147, %140 %149 = fmul float %70, 0x3F944D8780000000 %150 = fadd float %149, %142 %151 = call i32 @llvm.SI.packf16(float %144, float %146) %152 = bitcast i32 %151 to float %153 = call i32 @llvm.SI.packf16(float %148, float %150) %154 = bitcast i32 %153 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %152, float %154, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[12:15], 0x0 ; C2020D00 s_buffer_load_dword s5, s[0:3], 0x6a ; C202816A s_buffer_load_dword s0, s[0:3], 0x68 ; C2000168 v_mov_b32_e32 v2, 0xc0462878 ; 7E0402FF C0462878 v_mov_b32_e32 v3, 0xbfa370ce ; 7E0602FF BFA370CE v_mov_b32_e32 v4, 0x3fa370ce ; 7E0802FF 3FA370CE v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_add_f32_e32 v0, s0, v0 ; 06000000 v_mov_b32_e32 v1, s4 ; 7E020204 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mad_f32 v2, s4, v2, v5 ; D2820002 04160404 v_mad_f32 v3, s4, v3, v5 ; D2820003 04160604 v_mad_f32 v4, s4, v4, v5 ; D2820004 04160804 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_max_f32_e32 v2, v1, v2 ; 20040501 v_max_f32_e32 v8, v1, v5 ; 20100B01 v_mov_b32_e32 v6, 0x40462878 ; 7E0C02FF 40462878 v_mac_f32_e32 v5, s4, v6 ; 3E0A0C04 s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 v_max_f32_e32 v3, v1, v3 ; 20060701 v_max_f32_e32 v4, v1, v4 ; 20080901 v_max_f32_e32 v1, v1, v5 ; 20020B01 v_min_f32_e32 v6, v0, v1 ; 1E0C0300 v_min_f32_e32 v1, v0, v2 ; 1E020500 v_min_f32_e32 v9, v0, v3 ; 1E120700 v_min_f32_e32 v11, v0, v8 ; 1E161100 v_min_f32_e32 v3, v0, v4 ; 1E060900 v_mov_b32_e32 v4, v7 ; 7E080307 v_mov_b32_e32 v12, v7 ; 7E180307 v_mov_b32_e32 v2, v7 ; 7E040307 v_mov_b32_e32 v10, v7 ; 7E140307 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400506 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[0:7], s[8:11] ; F0800F00 00400D03 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[0:7], s[8:11] ; F0800F00 0040110B image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[0:7], s[8:11] ; F0800F00 00400001 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[0:7], s[8:11] ; F0800F00 00400909 v_mov_b32_e32 v4, 0x3ea42092 ; 7E0802FF 3EA42092 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v10, v4, v10 ; 10141504 v_mul_f32_e32 v11, v4, v11 ; 10161704 v_mul_f32_e32 v12, v4, v12 ; 10181904 v_mov_b32_e32 v21, 0x3ca26c3c ; 7E2A02FF 3CA26C3C v_mac_f32_e32 v9, v21, v0 ; 3E120115 v_mac_f32_e32 v10, v21, v1 ; 3E140315 v_mac_f32_e32 v11, v21, v2 ; 3E160515 v_mac_f32_e32 v12, v21, v3 ; 3E180715 v_mov_b32_e32 v0, 0x3ea37154 ; 7E0002FF 3EA37154 v_mac_f32_e32 v9, v0, v17 ; 3E122300 v_mac_f32_e32 v10, v0, v18 ; 3E142500 v_mac_f32_e32 v11, v0, v19 ; 3E162700 v_mac_f32_e32 v12, v0, v20 ; 3E182900 v_mac_f32_e32 v9, v4, v13 ; 3E121B04 v_mac_f32_e32 v10, v4, v14 ; 3E141D04 v_mac_f32_e32 v11, v4, v15 ; 3E161F04 v_mac_f32_e32 v12, v4, v16 ; 3E182104 v_mac_f32_e32 v9, v21, v5 ; 3E120B15 v_mac_f32_e32 v10, v21, v6 ; 3E140D15 v_mac_f32_e32 v11, v21, v7 ; 3E160F15 v_mac_f32_e32 v12, v21, v8 ; 3E181115 v_cvt_pkrtz_f16_f32_e32 v0, v9, v10 ; 5E001509 v_cvt_pkrtz_f16_f32_e32 v1, v11, v12 ; 5E02190B exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 368 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {1, 416, 424, 0} IMM[1] FLT32 { 0.0000, -3.0962, -1.2769, 1.2769} IMM[2] FLT32 { 3.0962, 0.0198, 0.3206, 0.3192} 0: ADD TEMP[0].xy, CONST[2][26].xyyy, CONST[2][26].zwww 1: MOV TEMP[1].x, CONST[2][26].xxxx 2: MOV TEMP[1].y, CONST[2][26].yyyy 3: MOV TEMP[1].z, TEMP[0].xxxx 4: MOV TEMP[1].w, TEMP[0].yyyy 5: MUL TEMP[0].yw, TEMP[1], CONST[1][0].xyxy 6: MOV TEMP[1].x, IMM[1].xxxx 7: MUL TEMP[2].x, CONST[1][0].yyyy, IMM[1].yyyy 8: MOV TEMP[1].y, TEMP[2].xxxx 9: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 10: MOV TEMP[2].x, TEMP[1].xxxx 11: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[0].yyyy 12: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww 13: MOV TEMP[2].y, TEMP[1].xxxx 14: MOV TEMP[1].x, IMM[1].xxxx 15: MUL TEMP[3].x, CONST[1][0].yyyy, IMM[1].zzzz 16: MOV TEMP[1].y, TEMP[3].xxxx 17: ADD TEMP[1].xy, TEMP[1].xyyy, IN[0].xyyy 18: MOV TEMP[3].x, TEMP[1].xxxx 19: MAX TEMP[1].x, TEMP[1].yyyy, TEMP[0].yyyy 20: MIN TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww 21: MOV TEMP[3].y, TEMP[1].xxxx 22: MOV TEMP[1].x, IN[0].xxxx 23: MAX TEMP[4].x, IN[0].yyyy, TEMP[0].yyyy 24: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[0].wwww 25: MOV TEMP[1].y, TEMP[4].xxxx 26: MOV TEMP[4].x, IMM[1].xxxx 27: MUL TEMP[5].x, CONST[1][0].yyyy, IMM[1].wwww 28: MOV TEMP[4].y, TEMP[5].xxxx 29: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 30: MOV TEMP[5].x, TEMP[4].xxxx 31: MAX TEMP[4].x, TEMP[4].yyyy, TEMP[0].yyyy 32: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[0].wwww 33: MOV TEMP[5].y, TEMP[4].xxxx 34: MOV TEMP[4].x, IMM[1].xxxx 35: MUL TEMP[6].x, CONST[1][0].yyyy, IMM[2].xxxx 36: MOV TEMP[4].y, TEMP[6].xxxx 37: ADD TEMP[4].xy, TEMP[4].xyyy, IN[0].xyyy 38: MOV TEMP[6].x, TEMP[4].xxxx 39: MAX TEMP[4].x, TEMP[4].yyyy, TEMP[0].yyyy 40: MIN TEMP[0].x, TEMP[4].xxxx, TEMP[0].wwww 41: MOV TEMP[6].y, TEMP[0].xxxx 42: MOV TEMP[0].xy, TEMP[6].xyyy 43: TEX TEMP[0], TEMP[0], SAMP[0], 2D 44: MOV TEMP[4].xy, TEMP[5].xyyy 45: TEX TEMP[4], TEMP[4], SAMP[0], 2D 46: MOV TEMP[1].xy, TEMP[1].xyyy 47: TEX TEMP[1], TEMP[1], SAMP[0], 2D 48: MOV TEMP[2].xy, TEMP[2].xyyy 49: TEX TEMP[2], TEMP[2], SAMP[0], 2D 50: MOV TEMP[3].xy, TEMP[3].xyyy 51: TEX TEMP[3], TEMP[3], SAMP[0], 2D 52: MUL TEMP[3], IMM[2].zzzz, TEMP[3] 53: MAD TEMP[2], IMM[2].yyyy, TEMP[2], TEMP[3] 54: MAD TEMP[1], IMM[2].wwww, TEMP[1], TEMP[2] 55: MAD TEMP[1], IMM[2].zzzz, TEMP[4], TEMP[1] 56: MAD TEMP[0], IMM[2].yyyy, TEMP[0], TEMP[1] 57: MOV OUT[0], TEMP[0] 58: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, align 16, !tbaa !0 %27 = call float @llvm.SI.load.const(<16 x i8> %26, i32 420) %28 = call float @llvm.SI.load.const(<16 x i8> %26, i32 428) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fadd float %27, %28 %36 = fmul float %27, %24 %37 = fmul float %35, %24 %38 = fmul float %24, 0xC008C50F00000000 %39 = fadd float %33, 0.000000e+00 %40 = fadd float %38, %34 %41 = call float @llvm.maxnum.f32(float %40, float %36) %42 = call float @llvm.minnum.f32(float %41, float %37) %43 = fmul float %24, 0xBFF46E19C0000000 %44 = fadd float %33, 0.000000e+00 %45 = fadd float %43, %34 %46 = call float @llvm.maxnum.f32(float %45, float %36) %47 = call float @llvm.minnum.f32(float %46, float %37) %48 = call float @llvm.maxnum.f32(float %34, float %36) %49 = call float @llvm.minnum.f32(float %48, float %37) %50 = fmul float %24, 0x3FF46E19C0000000 %51 = fadd float %33, 0.000000e+00 %52 = fadd float %50, %34 %53 = call float @llvm.maxnum.f32(float %52, float %36) %54 = call float @llvm.minnum.f32(float %53, float %37) %55 = fmul float %24, 0x4008C50F00000000 %56 = fadd float %33, 0.000000e+00 %57 = fadd float %55, %34 %58 = call float @llvm.maxnum.f32(float %57, float %36) %59 = call float @llvm.minnum.f32(float %58, float %37) %60 = bitcast float %56 to i32 %61 = bitcast float %59 to i32 %62 = insertelement <2 x i32> undef, i32 %60, i32 0 %63 = insertelement <2 x i32> %62, i32 %61, i32 1 %64 = bitcast <8 x i32> %30 to <32 x i8> %65 = bitcast <4 x i32> %32 to <16 x i8> %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = extractelement <4 x float> %66, i32 1 %69 = extractelement <4 x float> %66, i32 2 %70 = extractelement <4 x float> %66, i32 3 %71 = bitcast float %51 to i32 %72 = bitcast float %54 to i32 %73 = insertelement <2 x i32> undef, i32 %71, i32 0 %74 = insertelement <2 x i32> %73, i32 %72, i32 1 %75 = bitcast <8 x i32> %30 to <32 x i8> %76 = bitcast <4 x i32> %32 to <16 x i8> %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = extractelement <4 x float> %77, i32 3 %82 = bitcast float %33 to i32 %83 = bitcast float %49 to i32 %84 = insertelement <2 x i32> undef, i32 %82, i32 0 %85 = insertelement <2 x i32> %84, i32 %83, i32 1 %86 = bitcast <8 x i32> %30 to <32 x i8> %87 = bitcast <4 x i32> %32 to <16 x i8> %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %86, <16 x i8> %87, i32 2) %89 = extractelement <4 x float> %88, i32 0 %90 = extractelement <4 x float> %88, i32 1 %91 = extractelement <4 x float> %88, i32 2 %92 = extractelement <4 x float> %88, i32 3 %93 = bitcast float %39 to i32 %94 = bitcast float %42 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %30 to <32 x i8> %98 = bitcast <4 x i32> %32 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = extractelement <4 x float> %99, i32 3 %104 = bitcast float %44 to i32 %105 = bitcast float %47 to i32 %106 = insertelement <2 x i32> undef, i32 %104, i32 0 %107 = insertelement <2 x i32> %106, i32 %105, i32 1 %108 = bitcast <8 x i32> %30 to <32 x i8> %109 = bitcast <4 x i32> %32 to <16 x i8> %110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %108, <16 x i8> %109, i32 2) %111 = extractelement <4 x float> %110, i32 0 %112 = extractelement <4 x float> %110, i32 1 %113 = extractelement <4 x float> %110, i32 2 %114 = extractelement <4 x float> %110, i32 3 %115 = fmul float %111, 0x3FD4841240000000 %116 = fmul float %112, 0x3FD4841240000000 %117 = fmul float %113, 0x3FD4841240000000 %118 = fmul float %114, 0x3FD4841240000000 %119 = fmul float %100, 0x3F944D8780000000 %120 = fadd float %119, %115 %121 = fmul float %101, 0x3F944D8780000000 %122 = fadd float %121, %116 %123 = fmul float %102, 0x3F944D8780000000 %124 = fadd float %123, %117 %125 = fmul float %103, 0x3F944D8780000000 %126 = fadd float %125, %118 %127 = fmul float %89, 0x3FD46E2A80000000 %128 = fadd float %127, %120 %129 = fmul float %90, 0x3FD46E2A80000000 %130 = fadd float %129, %122 %131 = fmul float %91, 0x3FD46E2A80000000 %132 = fadd float %131, %124 %133 = fmul float %92, 0x3FD46E2A80000000 %134 = fadd float %133, %126 %135 = fmul float %78, 0x3FD4841240000000 %136 = fadd float %135, %128 %137 = fmul float %79, 0x3FD4841240000000 %138 = fadd float %137, %130 %139 = fmul float %80, 0x3FD4841240000000 %140 = fadd float %139, %132 %141 = fmul float %81, 0x3FD4841240000000 %142 = fadd float %141, %134 %143 = fmul float %67, 0x3F944D8780000000 %144 = fadd float %143, %136 %145 = fmul float %68, 0x3F944D8780000000 %146 = fadd float %145, %138 %147 = fmul float %69, 0x3F944D8780000000 %148 = fadd float %147, %140 %149 = fmul float %70, 0x3F944D8780000000 %150 = fadd float %149, %142 %151 = call i32 @llvm.SI.packf16(float %144, float %146) %152 = bitcast i32 %151 to float %153 = call i32 @llvm.SI.packf16(float %148, float %150) %154 = bitcast i32 %153 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %152, float %154, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 v_mov_b32_e32 v2, 0xc0462878 ; 7E0402FF C0462878 v_mov_b32_e32 v3, 0xbfa370ce ; 7E0602FF BFA370CE v_mov_b32_e32 v4, 0x3fa370ce ; 7E0802FF 3FA370CE v_mov_b32_e32 v5, 0x40462878 ; 7E0A02FF 40462878 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100 v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[12:15], 0x1 ; C2060D01 s_buffer_load_dword s13, s[0:3], 0x69 ; C2068169 s_buffer_load_dword s14, s[0:3], 0x6b ; C207016B s_load_dwordx8 s[0:7], s[6:7], 0x0 ; C0C00700 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, s12, v2, v0 ; D2820001 0402040C v_mov_b32_e32 v2, s12 ; 7E04020C v_mul_f32_e32 v2, s13, v2 ; 1004040D v_mov_b32_e32 v7, s14 ; 7E0E020E v_add_f32_e32 v7, s13, v7 ; 060E0E0D v_mad_f32 v3, s12, v3, v0 ; D2820003 0402060C v_mad_f32 v4, s12, v4, v0 ; D2820004 0402080C v_max_f32_e32 v8, v2, v0 ; 20100102 v_mac_f32_e32 v0, s12, v5 ; 3E000A0C v_mul_f32_e32 v5, s12, v7 ; 100A0E0C v_max_f32_e32 v1, v2, v1 ; 20020302 v_max_f32_e32 v3, v2, v3 ; 20060702 v_max_f32_e32 v4, v2, v4 ; 20080902 v_max_f32_e32 v0, v2, v0 ; 20000102 v_min_f32_e32 v7, v5, v0 ; 1E0E0105 v_min_f32_e32 v0, v5, v1 ; 1E000305 v_min_f32_e32 v1, v5, v3 ; 1E020705 v_min_f32_e32 v2, v5, v8 ; 1E041105 v_min_f32_e32 v3, v5, v4 ; 1E060905 image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400806 v_mov_b32_e32 v7, v3 ; 7E0E0303 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400C06 v_mov_b32_e32 v7, v2 ; 7E0E0302 image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00400206 v_mov_b32_e32 v7, v0 ; 7E0E0300 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00401006 v_mov_b32_e32 v7, v1 ; 7E0E0301 image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[8:11] ; F0800F00 00401406 v_mov_b32_e32 v0, 0x3ea42092 ; 7E0002FF 3EA42092 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v20 ; 10022900 v_mul_f32_e32 v6, v0, v21 ; 100C2B00 v_mul_f32_e32 v7, v0, v22 ; 100E2D00 v_mul_f32_e32 v20, v0, v23 ; 10282F00 v_mov_b32_e32 v21, 0x3ca26c3c ; 7E2A02FF 3CA26C3C v_mac_f32_e32 v1, v21, v16 ; 3E022115 v_mac_f32_e32 v6, v21, v17 ; 3E0C2315 v_mac_f32_e32 v7, v21, v18 ; 3E0E2515 v_mac_f32_e32 v20, v21, v19 ; 3E282715 v_mov_b32_e32 v16, 0x3ea37154 ; 7E2002FF 3EA37154 v_mac_f32_e32 v1, v16, v2 ; 3E020510 v_mac_f32_e32 v6, v16, v3 ; 3E0C0710 v_mac_f32_e32 v7, v16, v4 ; 3E0E0910 v_mac_f32_e32 v20, v16, v5 ; 3E280B10 v_mac_f32_e32 v1, v0, v12 ; 3E021900 v_mac_f32_e32 v6, v0, v13 ; 3E0C1B00 v_mac_f32_e32 v7, v0, v14 ; 3E0E1D00 v_mac_f32_e32 v20, v0, v15 ; 3E281F00 v_mac_f32_e32 v1, v21, v8 ; 3E021115 v_mac_f32_e32 v6, v21, v9 ; 3E0C1315 v_mac_f32_e32 v7, v21, v10 ; 3E0E1515 v_mac_f32_e32 v20, v21, v11 ; 3E281715 v_cvt_pkrtz_f16_f32_e32 v0, v1, v6 ; 5E000D01 v_cvt_pkrtz_f16_f32_e32 v1, v7, v20 ; 5E022907 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..1] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..6], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.0000} IMM[1] INT32 {1, 0, 2, 0} IMM[2] UINT32 {1, 0, 16, 32} IMM[3] UINT32 {48, 4, 20, 36} IMM[4] UINT32 {52, 8, 24, 40} IMM[5] UINT32 {56, 12, 28, 44} IMM[6] UINT32 {60, 0, 0, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].w, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[0].z, IN[0].zzzz 7: MOV TEMP[2].xy, IN[1].xyyy 8: MOV TEMP[2].w, IMM[0].xxxx 9: TXL TEMP[2], TEMP[2], SAMP[0], 2D 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].xy, IN[1].xyyy 12: MOV TEMP[3].w, IMM[0].xxxx 13: TXL TEMP[3], TEMP[3], SAMP[0], 2D, IMM[1].xyx 14: DP4 TEMP[3].x, TEMP[0], TEMP[3] 15: MOV TEMP[4].xy, IN[1].xyyy 16: MOV TEMP[4].w, IMM[0].xxxx 17: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].zyz 18: DP4 TEMP[0].x, TEMP[0], TEMP[4] 19: MOV TEMP[4].x, TEMP[2].xxxx 20: MOV TEMP[4].y, TEMP[3].xxxx 21: MOV TEMP[4].z, TEMP[0].xxxx 22: MOV TEMP[5].w, IMM[0].yyyy 23: MOV TEMP[5].x, TEMP[2].xxxx 24: MOV TEMP[5].y, TEMP[3].xxxx 25: MOV TEMP[5].z, TEMP[0].xxxx 26: MOV TEMP[0].x, CONST[2][0].xxxx 27: MOV TEMP[0].y, CONST[2][1].xxxx 28: MOV TEMP[0].z, CONST[2][2].xxxx 29: MOV TEMP[0].w, CONST[2][3].xxxx 30: DP4 TEMP[0].x, TEMP[5], TEMP[0] 31: MOV TEMP[2].x, CONST[2][0].yyyy 32: MOV TEMP[2].y, CONST[2][1].yyyy 33: MOV TEMP[2].z, CONST[2][2].yyyy 34: MOV TEMP[2].w, CONST[2][3].yyyy 35: DP4 TEMP[2].x, TEMP[5], TEMP[2] 36: MOV TEMP[3].x, CONST[2][0].zzzz 37: MOV TEMP[3].y, CONST[2][1].zzzz 38: MOV TEMP[3].z, CONST[2][2].zzzz 39: MOV TEMP[3].w, CONST[2][3].zzzz 40: DP4 TEMP[3].x, TEMP[5], TEMP[3] 41: MOV TEMP[6].x, CONST[2][0].wwww 42: MOV TEMP[6].y, CONST[2][1].wwww 43: MOV TEMP[6].z, CONST[2][2].wwww 44: MOV TEMP[6].w, CONST[2][3].wwww 45: DP4 TEMP[5].x, TEMP[5], TEMP[6] 46: MOV TEMP[6].x, TEMP[0].xxxx 47: MOV TEMP[6].y, TEMP[2].xxxx 48: MOV TEMP[6].z, TEMP[3].xxxx 49: MOV TEMP[6].w, TEMP[5].xxxx 50: MOV TEMP[0].x, TEMP[0].xxxx 51: MOV TEMP[0].y, TEMP[2].xxxx 52: MOV TEMP[0].z, TEMP[5].xxxx 53: MOV TEMP[0].w, TEMP[5].xxxx 54: RCP TEMP[2].x, TEMP[5].xxxx 55: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx 56: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx 57: UIF TEMP[2].xxxx :0 58: MOV TEMP[0], TEMP[0] 59: ELSE :0 60: MOV TEMP[0], TEMP[6] 61: ENDIF 62: MOV TEMP[2].xw, TEMP[0].xxxw 63: MOV TEMP[3].x, -TEMP[0].yyyy 64: MAD TEMP[0].x, TEMP[0].zzzz, IMM[0].zzzz, -TEMP[0].wwww 65: MOV TEMP[3].y, TEMP[0].xxxx 66: MOV TEMP[2].yz, TEMP[3].yxyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[2] 69: MOV OUT[2].xyz, TEMP[4].xyzx 70: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %30 = load <8 x i32>, <8 x i32> addrspace(2)* %29, align 32, !tbaa !0 %31 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %32 = load <4 x i32>, <4 x i32> addrspace(2)* %31, align 16, !tbaa !0 %33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0 %35 = add i32 %5, %7 %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35) %37 = extractelement <4 x float> %36, i32 0 %38 = extractelement <4 x float> %36, i32 1 %39 = extractelement <4 x float> %36, i32 2 %40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = add i32 %10, %6 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = bitcast float %44 to i32 %47 = bitcast float %45 to i32 %48 = insertelement <4 x i32> undef, i32 %46, i32 0 %49 = insertelement <4 x i32> %48, i32 %47, i32 1 %50 = insertelement <4 x i32> %49, i32 0, i32 2 %51 = bitcast <8 x i32> %30 to <32 x i8> %52 = bitcast <4 x i32> %32 to <16 x i8> %53 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = extractelement <4 x float> %53, i32 3 %58 = fmul float %37, %54 %59 = fmul float %38, %55 %60 = fadd float %58, %59 %61 = fmul float %39, %56 %62 = fadd float %60, %61 %63 = fadd float %62, %57 %64 = bitcast float %44 to i32 %65 = bitcast float %45 to i32 %66 = insertelement <4 x i32> , i32 %64, i32 1 %67 = insertelement <4 x i32> %66, i32 %65, i32 2 %68 = insertelement <4 x i32> %67, i32 0, i32 3 %69 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %68, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = fmul float %37, %70 %75 = fmul float %38, %71 %76 = fadd float %74, %75 %77 = fmul float %39, %72 %78 = fadd float %76, %77 %79 = fadd float %78, %73 %80 = bitcast float %44 to i32 %81 = bitcast float %45 to i32 %82 = insertelement <4 x i32> , i32 %80, i32 1 %83 = insertelement <4 x i32> %82, i32 %81, i32 2 %84 = insertelement <4 x i32> %83, i32 0, i32 3 %85 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %84, <8 x i32> %30, <4 x i32> %32, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = fmul float %37, %86 %91 = fmul float %38, %87 %92 = fadd float %90, %91 %93 = fmul float %39, %88 %94 = fadd float %92, %93 %95 = fadd float %94, %89 %96 = fmul float %63, %13 %97 = fmul float %79, %17 %98 = fadd float %96, %97 %99 = fmul float %95, %21 %100 = fadd float %98, %99 %101 = fadd float %100, %25 %102 = fmul float %63, %14 %103 = fmul float %79, %18 %104 = fadd float %102, %103 %105 = fmul float %95, %22 %106 = fadd float %104, %105 %107 = fadd float %106, %26 %108 = fmul float %63, %15 %109 = fmul float %79, %19 %110 = fadd float %108, %109 %111 = fmul float %95, %23 %112 = fadd float %110, %111 %113 = fadd float %112, %27 %114 = fmul float %63, %16 %115 = fmul float %79, %20 %116 = fadd float %114, %115 %117 = fmul float %95, %24 %118 = fadd float %116, %117 %119 = fadd float %118, %28 %120 = fdiv float 1.000000e+00, %119 %121 = fmul float %113, %120 %122 = fcmp ogt float %121, 1.000000e+00 %. = select i1 %122, float %119, float %113 %123 = fsub float -0.000000e+00, %107 %124 = fmul float %., 2.000000e+00 %125 = fsub float %124, %119 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %63, float %79, float %95, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %101, float %123, float %125, float %119) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 v_add_i32_e32 v0, s11, v3 ; 4A00060B buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v13, 0 ; 7E1A0280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[12:19], s[8:11] ; F0900F00 0043000B v_mov_b32_e32 v10, 0x10001 ; 7E1402FF 00010001 image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[8:11] ; F0D00F00 00430E0A v_mov_b32_e32 v10, 0x20002 ; 7E1402FF 00020002 s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mac_f32_e32 v1, v0, v4 ; 3E020900 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v15, v5 ; 10000B0F v_mac_f32_e32 v0, v14, v4 ; 3E00090E s_load_dwordx4 s[0:3], s[2:3], 0x8 ; C0800308 image_sample_l_o v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[12:19], s[8:11] ; F0D00F00 0043070A s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v5, v8, v5 ; 100A0B08 v_mac_f32_e32 v5, v7, v4 ; 3E0A0907 v_mac_f32_e32 v1, v2, v6 ; 3E020D02 v_mac_f32_e32 v0, v16, v6 ; 3E000D10 v_mac_f32_e32 v5, v9, v6 ; 3E0A0D09 v_add_f32_e32 v1, v3, v1 ; 06020303 v_add_f32_e32 v0, v17, v0 ; 06000111 v_add_f32_e32 v2, v10, v5 ; 06040B0A s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F exp 15, 32, 0, 0, 0, v13, v13, v13, v13 ; F800020F 0D0D0D0D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s8, v0 ; 10060008 v_mul_f32_e32 v4, s9, v0 ; 10080009 v_mul_f32_e32 v5, s10, v0 ; 100A000A v_mul_f32_e32 v6, s11, v0 ; 100C000B exp 15, 33, 0, 0, 0, v1, v0, v2, v13 ; F800021F 0D020001 v_mac_f32_e32 v3, s4, v1 ; 3E060204 v_mac_f32_e32 v4, s5, v1 ; 3E080205 v_mac_f32_e32 v5, s6, v1 ; 3E0A0206 v_mac_f32_e32 v6, s7, v1 ; 3E0C0207 v_mac_f32_e32 v3, s12, v2 ; 3E06040C v_mac_f32_e32 v6, s15, v2 ; 3E0C040F s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v0, s0, v6 ; 06000C00 v_rcp_f32_e32 v1, v0 ; 7E025500 v_mac_f32_e32 v4, s13, v2 ; 3E08040D v_mac_f32_e32 v5, s14, v2 ; 3E0A040E v_add_f32_e32 v2, s18, v5 ; 06040A12 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_cmp_lt_f32_e32 vcc, 1.0, v1 ; 7C0202F2 v_cndmask_b32_e32 v1, v2, v0 ; 00020102 v_add_f32_e32 v2, s16, v3 ; 06040610 v_add_f32_e32 v3, s17, v4 ; 06060811 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 v_mad_f32 v1, 2.0, v1, -v0 ; D2820001 840202F4 exp 15, 12, 0, 0, 0, v2, v3, v1, v0 ; F80000CF 00010302 exp 15, 13, 0, 1, 0, v13, v13, v13, v13 ; F80008DF 0D0D0D0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 364 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[3] DCL CONST[1][0..8] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..5], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, -0.5000, 2.0000} IMM[1] UINT32 {3, 400, 304, 0} IMM[2] UINT32 {320, 12, 36, 16} IMM[3] FLT32 { 1.0000, 3.0000, 0.0000, 0.0000} IMM[4] UINT32 {28, 0, 0, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[3].xxxx, CONST[3].yyyy 2: ADD TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4][25].xyyy 4: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[4][19].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].xyz, TEMP[3], SAMP[1], 2D 11: MOV TEMP[4].xy, TEMP[1].xyyy 12: MOV TEMP[4].w, IMM[0].yyyy 13: TXL TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz 15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww 16: MOV TEMP[1].xy, TEMP[1].xyyy 17: MOV TEMP[1].w, IMM[0].yyyy 18: TXL TEMP[1].x, TEMP[1], SAMP[0], 2D 19: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[2].xyzz 20: RCP TEMP[5].xyz, TEMP[5].xxxx 21: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 22: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[4][19].xyzz 23: ADD TEMP[1].xyz, CONST[1][0].xyzz, -TEMP[1].xyzz 24: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 25: RSQ TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[2].xxxx 27: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 28: SQRT TEMP[1].x, TEMP[1].xxxx 29: ADD TEMP[1].x, CONST[1][0].wwww, -TEMP[1].xxxx 30: FSLT TEMP[5].x, TEMP[1].xxxx, IMM[0].yyyy 31: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 32: KILL_IF -TEMP[5].xxxx 33: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz 34: FSLT TEMP[4].x, TEMP[2].xxxx, IMM[0].yyyy 35: AND TEMP[4].x, TEMP[4].xxxx, IMM[3].xxxx 36: KILL_IF -TEMP[4].xxxx 37: RCP TEMP[4].x, CONST[1][0].wwww 38: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 39: RCP TEMP[4].x, CONST[1][2].yyyy 40: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 41: MOV_SAT TEMP[1].x, TEMP[1].xxxx 42: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[1][1].xyzz 43: MUL TEMP[4].x, IMM[0].wwww, TEMP[1].xxxx 44: ADD TEMP[4].x, IMM[3].yyyy, -TEMP[4].xxxx 45: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx 46: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx 47: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xxxx 48: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1][1].wwww 49: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz 50: MOV TEMP[2].w, IMM[3].xxxx 51: MOV TEMP[2].x, TEMP[1].xxxx 52: MOV TEMP[2].y, TEMP[1].yyyy 53: MOV TEMP[2].z, TEMP[1].zzzz 54: MOV OUT[0], TEMP[2] 55: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 4) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 8) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 12) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 24) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 28) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %37 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = call float @llvm.SI.load.const(<16 x i8> %38, i32 304) %40 = call float @llvm.SI.load.const(<16 x i8> %38, i32 308) %41 = call float @llvm.SI.load.const(<16 x i8> %38, i32 312) %42 = call float @llvm.SI.load.const(<16 x i8> %38, i32 320) %43 = call float @llvm.SI.load.const(<16 x i8> %38, i32 324) %44 = call float @llvm.SI.load.const(<16 x i8> %38, i32 328) %45 = call float @llvm.SI.load.const(<16 x i8> %38, i32 400) %46 = call float @llvm.SI.load.const(<16 x i8> %38, i32 404) %47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0 %49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0 %51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)* %53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0 %54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)* %56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0 %57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %58 = bitcast <8 x i32> addrspace(2)* %57 to <32 x i8> addrspace(2)* %59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %61 = bitcast <4 x i32> addrspace(2)* %60 to <16 x i8> addrspace(2)* %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0 %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %66 = fmul float %24, %15 %67 = fadd float %66, %25 %68 = fadd float %14, 5.000000e-01 %69 = fadd float %67, 5.000000e-01 %70 = fmul float %68, %45 %71 = fmul float %69, %46 %72 = fsub float %63, %39 %73 = fsub float %64, %40 %74 = fsub float %65, %41 %75 = fmul float %72, %72 %76 = fmul float %73, %73 %77 = fadd float %76, %75 %78 = fmul float %74, %74 %79 = fadd float %77, %78 %80 = call float @llvm.AMDGPU.rsq.clamped.f32(float %79) %81 = fmul float %72, %80 %82 = fmul float %73, %80 %83 = fmul float %74, %80 %84 = bitcast float %70 to i32 %85 = bitcast float %71 to i32 %86 = insertelement <4 x i32> undef, i32 %84, i32 0 %87 = insertelement <4 x i32> %86, i32 %85, i32 1 %88 = insertelement <4 x i32> %87, i32 0, i32 2 %89 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %88, <32 x i8> %53, <16 x i8> %56, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = bitcast float %70 to i32 %94 = bitcast float %71 to i32 %95 = insertelement <4 x i32> undef, i32 %93, i32 0 %96 = insertelement <4 x i32> %95, i32 %94, i32 1 %97 = insertelement <4 x i32> %96, i32 0, i32 2 %98 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %97, <32 x i8> %59, <16 x i8> %62, i32 2) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 2 %102 = fadd float %99, -5.000000e-01 %103 = fadd float %100, -5.000000e-01 %104 = fadd float %101, -5.000000e-01 %105 = fmul float %102, 2.000000e+00 %106 = fmul float %103, 2.000000e+00 %107 = fmul float %104, 2.000000e+00 %108 = bitcast float %70 to i32 %109 = bitcast float %71 to i32 %110 = insertelement <4 x i32> undef, i32 %108, i32 0 %111 = insertelement <4 x i32> %110, i32 %109, i32 1 %112 = insertelement <4 x i32> %111, i32 0, i32 2 %113 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %112, <32 x i8> %48, <16 x i8> %50, i32 2) %114 = extractelement <4 x float> %113, i32 0 %115 = fmul float %42, %81 %116 = fmul float %43, %82 %117 = fadd float %116, %115 %118 = fmul float %44, %83 %119 = fadd float %117, %118 %120 = fdiv float 1.000000e+00, %119 %121 = fmul float %81, %120 %122 = fmul float %82, %120 %123 = fmul float %83, %120 %124 = fmul float %114, %121 %125 = fadd float %124, %39 %126 = fmul float %114, %122 %127 = fadd float %126, %40 %128 = fmul float %114, %123 %129 = fadd float %128, %41 %130 = fsub float %28, %125 %131 = fsub float %29, %127 %132 = fsub float %30, %129 %133 = fmul float %130, %130 %134 = fmul float %131, %131 %135 = fadd float %134, %133 %136 = fmul float %132, %132 %137 = fadd float %135, %136 %138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137) %139 = fmul float %130, %138 %140 = fmul float %131, %138 %141 = fmul float %132, %138 %142 = fmul float %130, %130 %143 = fmul float %131, %131 %144 = fadd float %143, %142 %145 = fmul float %132, %132 %146 = fadd float %144, %145 %147 = call float @llvm.sqrt.f32(float %146) %148 = fsub float %31, %147 %149 = fcmp olt float %148, 0.000000e+00 %150 = select i1 %149, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %150) %151 = fmul float %105, %139 %152 = fmul float %106, %140 %153 = fadd float %152, %151 %154 = fmul float %107, %141 %155 = fadd float %153, %154 %156 = fcmp olt float %155, 0.000000e+00 %157 = select i1 %156, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %157) %158 = fdiv float 1.000000e+00, %31 %159 = fmul float %148, %158 %160 = fdiv float 1.000000e+00, %36 %161 = fmul float %159, %160 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %155, %32 %164 = fmul float %155, %33 %165 = fmul float %155, %34 %166 = fmul float %162, 2.000000e+00 %167 = fsub float 3.000000e+00, %166 %168 = fmul float %162, %167 %169 = fmul float %162, %168 %170 = fmul float %90, %169 %171 = fmul float %91, %169 %172 = fmul float %92, %169 %173 = fmul float %170, %35 %174 = fmul float %171, %35 %175 = fmul float %172, %35 %176 = fmul float %163, %173 %177 = fmul float %164, %174 %178 = fmul float %165, %175 %179 = call i32 @llvm.SI.packf16(float %176, float %177) %180 = bitcast i32 %179 to float %181 = call i32 @llvm.SI.packf16(float %178, float 1.000000e+00) %182 = bitcast i32 %181 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %180, float %182, float %180, float %182) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C s_buffer_load_dword s10, s[12:15], 0xd ; C2050D0D s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s9, s[16:19], 0x4c ; C204914C s_buffer_load_dword s11, s[16:19], 0x4d ; C205914D s_buffer_load_dword s12, s[16:19], 0x4e ; C206114E s_buffer_load_dword s13, s[16:19], 0x50 ; C2069150 s_buffer_load_dword s14, s[16:19], 0x51 ; C2071151 s_buffer_load_dword s15, s[16:19], 0x52 ; C2079152 s_buffer_load_dword s20, s[16:19], 0x64 ; C20A1164 s_buffer_load_dword s16, s[16:19], 0x65 ; C2081165 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s10 ; 7E02020A v_mac_f32_e32 v1, s8, v3 ; 3E020608 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_mul_f32_e32 v6, s20, v2 ; 100C0414 v_mul_f32_e32 v7, s16, v1 ; 100E0210 v_subrev_f32_e32 v1, s9, v4 ; 0A020809 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 v_subrev_f32_e32 v2, s11, v5 ; 0A040A0B v_subrev_f32_e32 v0, s12, v0 ; 0A00000C v_mul_f32_e32 v3, v1, v1 ; 10060301 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mov_b32_e32 v8, 0 ; 7E100280 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[32:39], s[16:19] ; F0900700 00880906 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[40:47], s[20:23] ; F0900700 00AA0C06 image_sample_l v4, 1, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[24:31], s[4:7] ; F0900100 00260406 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v3, s13, v1 ; 1006020D v_mac_f32_e32 v3, s14, v2 ; 3E06040E v_mac_f32_e32 v3, s15, v0 ; 3E06000F v_rcp_f32_e32 v3, v3 ; 7E065503 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107 s_buffer_load_dword s0, s[0:3], 0x9 ; C2000109 v_mul_f32_e32 v1, v3, v1 ; 10020303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v1, s9 ; D2820001 00260304 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mad_f32 v2, v4, v2, s11 ; D2820002 002E0504 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v0, v4, v0, s12 ; D2820000 00320104 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s4, v1 ; 08020204 v_sub_f32_e32 v2, s5, v2 ; 08040405 v_sub_f32_e32 v0, s6, v0 ; 08000006 v_add_f32_e32 v3, -0.5, v12 ; 060618F1 v_add_f32_e32 v4, -0.5, v13 ; 06081AF1 v_add_f32_e32 v5, -0.5, v14 ; 060A1CF1 v_mul_f32_e32 v6, v1, v1 ; 100C0301 v_mac_f32_e32 v6, v2, v2 ; 3E0C0502 v_mac_f32_e32 v6, v0, v0 ; 3E0C0100 v_rsq_clamp_f32_e32 v7, v6 ; 7E0E5906 v_sqrt_f32_e32 v6, v6 ; 7E0C6706 v_sub_f32_e32 v6, s7, v6 ; 080C0C07 v_cmp_gt_f32_e32 vcc, 0, v6 ; 7C080C80 v_cndmask_b32_e64 v8, 0, -1.0, vcc ; D2000008 01A9E680 v_rcp_f32_e32 v12, s7 ; 7E185407 v_rcp_f32_e32 v13, s0 ; 7E1A5400 v_mul_f32_e32 v14, v7, v1 ; 101C0307 v_mul_f32_e32 v15, v7, v2 ; 101E0507 v_mul_f32_e32 v16, v7, v0 ; 10200107 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_mac_f32_e32 v14, v7, v1 ; 3E1C0307 v_mul_f32_e32 v1, v14, v3 ; 1002070E v_mac_f32_e32 v15, v7, v2 ; 3E1E0507 v_mac_f32_e32 v1, v15, v4 ; 3E02090F v_mac_f32_e32 v16, v7, v0 ; 3E200107 v_mac_f32_e32 v1, v16, v5 ; 3E020B10 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 v_mul_f32_e32 v0, s8, v1 ; 10000208 v_mul_f32_e32 v2, s10, v1 ; 1004020A v_mul_f32_e32 v1, s13, v1 ; 1002020D v_mul_f32_e32 v3, v12, v6 ; 10060D0C v_mul_f32_e32 v3, v13, v3 ; 1006070D v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680 v_madak_f32_e32 v4, -2.0, v3, 0x40400000 ; 420806F5 40400000 v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mul_f32_e32 v3, v4, v3 ; 10060704 v_mul_f32_e32 v4, v3, v9 ; 10081303 v_mul_f32_e32 v5, v3, v10 ; 100A1503 v_mul_f32_e32 v3, v3, v11 ; 10061703 v_mul_f32_e32 v4, s14, v4 ; 1008080E v_mul_f32_e32 v5, s14, v5 ; 100A0A0E v_mul_f32_e32 v3, s14, v3 ; 1006060E v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mul_f32_e32 v2, v5, v2 ; 10040505 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 20 Code Size: 536 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000} IMM[1] UINT32 {0, 64, 80, 96} IMM[2] UINT32 {112, 68, 84, 100} IMM[3] UINT32 {116, 72, 88, 104} IMM[4] UINT32 {120, 76, 92, 108} IMM[5] UINT32 {124, 304, 320, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[2].x, CONST[1][4].xxxx 7: MOV TEMP[2].y, CONST[1][5].xxxx 8: MOV TEMP[2].z, CONST[1][6].xxxx 9: MOV TEMP[2].w, CONST[1][7].xxxx 10: DP4 TEMP[2].x, TEMP[0], TEMP[2] 11: MOV TEMP[3].x, CONST[1][4].yyyy 12: MOV TEMP[3].y, CONST[1][5].yyyy 13: MOV TEMP[3].z, CONST[1][6].yyyy 14: MOV TEMP[3].w, CONST[1][7].yyyy 15: DP4 TEMP[3].x, TEMP[0], TEMP[3] 16: MOV TEMP[2].y, TEMP[3].xxxx 17: MOV TEMP[3].x, CONST[1][4].zzzz 18: MOV TEMP[3].y, CONST[1][5].zzzz 19: MOV TEMP[3].z, CONST[1][6].zzzz 20: MOV TEMP[3].w, CONST[1][7].zzzz 21: DP4 TEMP[3].x, TEMP[0], TEMP[3] 22: MOV TEMP[2].z, TEMP[3].xxxx 23: MOV TEMP[3].x, CONST[1][4].wwww 24: MOV TEMP[3].y, CONST[1][5].wwww 25: MOV TEMP[3].z, CONST[1][6].wwww 26: MOV TEMP[3].w, CONST[1][7].wwww 27: DP4 TEMP[0].x, TEMP[0], TEMP[3] 28: RCP TEMP[0].xyz, TEMP[0].xxxx 29: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz, -CONST[1][19].xyzz 30: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 31: RSQ TEMP[2].x, TEMP[2].xxxx 32: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx 33: DP3 TEMP[2].x, CONST[1][20].xyzz, TEMP[0].xyzz 34: RCP TEMP[2].x, TEMP[2].xxxx 35: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 36: MOV TEMP[2].zw, IMM[0].yyzy 37: MOV TEMP[2].x, IN[0].xxxx 38: MOV TEMP[2].y, -IN[0].yyyy 39: MOV OUT[1], TEMP[1] 40: MOV OUT[2].xy, IN[1].xyxx 41: MOV OUT[0], TEMP[2] 42: MOV OUT[3].xyz, TEMP[0].xyzx 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328) %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = fmul float %39, %13 %48 = fmul float %40, %17 %49 = fadd float %47, %48 %50 = fadd float %49, %21 %51 = fadd float %50, %25 %52 = fmul float %39, %14 %53 = fmul float %40, %18 %54 = fadd float %52, %53 %55 = fadd float %54, %22 %56 = fadd float %55, %26 %57 = fmul float %39, %15 %58 = fmul float %40, %19 %59 = fadd float %57, %58 %60 = fadd float %59, %23 %61 = fadd float %60, %27 %62 = fmul float %39, %16 %63 = fmul float %40, %20 %64 = fadd float %62, %63 %65 = fadd float %64, %24 %66 = fadd float %65, %28 %67 = fdiv float 1.000000e+00, %66 %68 = fmul float %51, %67 %69 = fsub float %68, %29 %70 = fmul float %56, %67 %71 = fsub float %70, %30 %72 = fmul float %61, %67 %73 = fsub float %72, %31 %74 = fmul float %69, %69 %75 = fmul float %71, %71 %76 = fadd float %75, %74 %77 = fmul float %73, %73 %78 = fadd float %76, %77 %79 = call float @llvm.AMDGPU.rsq.clamped.f32(float %78) %80 = fmul float %69, %79 %81 = fmul float %71, %79 %82 = fmul float %73, %79 %83 = fmul float %32, %80 %84 = fmul float %33, %81 %85 = fadd float %84, %83 %86 = fmul float %34, %82 %87 = fadd float %85, %86 %88 = fdiv float 1.000000e+00, %87 %89 = fmul float %88, %80 %90 = fmul float %88, %81 %91 = fmul float %88, %82 %92 = fsub float -0.000000e+00, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %89, float %90, float %91, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %39, float %92, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[3:6], v0, s[8:11], 0 idxen ; E00C2000 80020300 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 v_mov_b32_e32 v0, s12 ; 7E00020C s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_waitcnt vmcnt(0) ; BF8C0770 v_mov_b32_e32 v5, s4 ; 7E0A0204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111 s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112 s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113 s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114 s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F s_buffer_load_dword s18, s[0:3], 0x4c ; C209014C s_buffer_load_dword s19, s[0:3], 0x4d ; C209814D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s11 ; 7E0C020B s_buffer_load_dword s11, s[0:3], 0x4e ; C205814E v_mov_b32_e32 v7, s12 ; 7E0E020C s_buffer_load_dword s12, s[0:3], 0x50 ; C2060150 s_buffer_load_dword s20, s[0:3], 0x51 ; C20A0151 s_buffer_load_dword s0, s[0:3], 0x52 ; C2000152 v_mac_f32_e32 v6, s16, v2 ; 3E0C0410 v_mac_f32_e32 v7, s8, v2 ; 3E0E0408 v_mac_f32_e32 v0, s9, v2 ; 3E000409 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s4, v1 ; 3E0C0204 v_mac_f32_e32 v7, s13, v1 ; 3E0E020D v_mac_f32_e32 v0, s14, v1 ; 3E00020E v_mac_f32_e32 v5, s15, v1 ; 3E0A020F v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_add_f32_e32 v7, s6, v7 ; 060E0E06 v_add_f32_e32 v0, s7, v0 ; 06000007 v_mad_f32 v6, v6, v5, -s18 ; D2820006 804A0B06 v_mad_f32 v7, v7, v5, -s19 ; D2820007 804E0B07 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, v5, -s11 ; D2820000 802E0B00 v_mul_f32_e32 v5, v6, v6 ; 100A0D06 v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07 v_mac_f32_e32 v5, v0, v0 ; 3E0A0100 v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905 v_mul_f32_e32 v6, v5, v6 ; 100C0D05 v_mul_f32_e32 v7, v5, v7 ; 100E0F05 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mul_f32_e32 v5, s12, v6 ; 100A0C0C v_mac_f32_e32 v5, s20, v7 ; 3E0A0E14 v_mac_f32_e32 v5, s0, v0 ; 3E0A0000 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v8, v8, v8, v8 ; F800020F 08080808 exp 15, 33, 0, 0, 0, v3, v4, v8, v8 ; F800021F 08080403 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v3, v6, v5 ; 10060B06 v_mul_f32_e32 v4, v7, v5 ; 10080B07 v_mul_f32_e32 v0, v0, v5 ; 10000B00 exp 15, 34, 0, 0, 0, v3, v4, v0, v8 ; F800022F 08000403 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_mov_b32_e32 v3, -1.0 ; 7E0602F3 exp 15, 12, 0, 0, 0, v1, v2, v3, v0 ; F80000CF 00030201 exp 15, 13, 0, 1, 0, v8, v8, v8, v8 ; F80008DF 08080808 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 360 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[1][0..5] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..6], LOCAL IMM[0] UINT32 {3, 304, 4, 160} IMM[1] FLT32 { 1.0000, 0.3000, 0.5900, 0.1100} IMM[2] UINT32 {176, 128, 156, 152} IMM[3] FLT32 { 0.5000, 0.2500, 0.0000, 0.0000} IMM[4] UINT32 {448, 468, 464, 460} IMM[5] UINT32 {472, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D 2: MAD TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx, CONST[4][19].xyzz 3: MOV TEMP[1].xy, IN[0].xyyy 4: TEX TEMP[1].xyz, TEMP[1], SAMP[3], 2D 5: MOV TEMP[2].xy, IN[0].xyyy 6: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D 7: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 8: MAD TEMP[2].xy, CONST[5][10].zwww, TEMP[0].xyyy, CONST[5][10].xyyy 9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[4][19].xyzz 10: MOV TEMP[3].w, IMM[1].xxxx 11: DP3 TEMP[4].x, TEMP[1].xyzz, IMM[1].yzww 12: POW TEMP[4].x, TEMP[4].xxxx, CONST[5][9].wwww 13: MAX TEMP[4].xyz, TEMP[4].xxxx, IMM[3].xxxx 14: MUL TEMP[5].xyz, CONST[5][9].zzzz, TEMP[1].xyzz 15: MAD TEMP[4].xyz, CONST[5][8].xyzz, TEMP[4].xyzz, TEMP[5].xyzz 16: ADD TEMP[5].xy, TEMP[2].xyyy, CONST[5][11].xyyy 17: MOV TEMP[5].xy, TEMP[5].xyyy 18: TEX TEMP[5].x, TEMP[5], SAMP[0], 2D 19: ADD TEMP[6].xy, TEMP[2].xyyy, CONST[5][11].zwww 20: MOV TEMP[6].xy, TEMP[6].xyyy 21: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 22: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: ADD TEMP[6].xy, TEMP[2].xyyy, -CONST[5][11].xyyy 24: MOV TEMP[6].xy, TEMP[6].xyyy 25: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 26: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 27: ADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5][11].zwww 28: MOV TEMP[2].xy, TEMP[2].xyyy 29: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D 30: ADD TEMP[2].x, TEMP[5].xxxx, TEMP[2].xxxx 31: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 32: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, TEMP[4].xyzz 33: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz 34: SQRT TEMP[0].x, TEMP[0].xxxx 35: MAD TEMP[0].x, TEMP[0].xxxx, CONST[4][29].xxxx, CONST[4][28].wwww 36: MOV_SAT TEMP[0].x, TEMP[0].xxxx 37: POW TEMP[0].x, TEMP[0].xxxx, CONST[4][29].zzzz 38: MIN TEMP[0].x, CONST[4][29].yyyy, TEMP[0].xxxx 39: MOV_SAT TEMP[0].x, TEMP[0].xxxx 40: LRP TEMP[3].xyz, TEMP[0].xxxx, CONST[4][28].xyzz, TEMP[1].xyzz 41: MOV OUT[0], TEMP[3] 42: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 460) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472) %34 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) %45 = call float @llvm.SI.load.const(<16 x i8> %35, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %35, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %35, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %35, i32 188) %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0 %51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %76 = bitcast float %71 to i32 %77 = bitcast float %72 to i32 %78 = insertelement <2 x i32> undef, i32 %76, i32 0 %79 = insertelement <2 x i32> %78, i32 %77, i32 1 %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %79, <32 x i8> %55, <16 x i8> %58, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = fmul float %73, %81 %83 = fadd float %82, %24 %84 = fmul float %74, %81 %85 = fadd float %84, %25 %86 = fmul float %75, %81 %87 = fadd float %86, %26 %88 = bitcast float %71 to i32 %89 = bitcast float %72 to i32 %90 = insertelement <2 x i32> undef, i32 %88, i32 0 %91 = insertelement <2 x i32> %90, i32 %89, i32 1 %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %67, <16 x i8> %70, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = bitcast float %71 to i32 %97 = bitcast float %72 to i32 %98 = insertelement <2 x i32> undef, i32 %96, i32 0 %99 = insertelement <2 x i32> %98, i32 %97, i32 1 %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %61, <16 x i8> %64, i32 2) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 1 %103 = extractelement <4 x float> %100, i32 2 %104 = fadd float %93, %101 %105 = fadd float %94, %102 %106 = fadd float %95, %103 %107 = fmul float %43, %83 %108 = fadd float %107, %41 %109 = fmul float %44, %85 %110 = fadd float %109, %42 %111 = fsub float %83, %24 %112 = fsub float %85, %25 %113 = fsub float %87, %26 %114 = fmul float %104, 0x3FD3333340000000 %115 = fmul float %105, 0x3FE2E147A0000000 %116 = fadd float %115, %114 %117 = fmul float %106, 0x3FBC28F5C0000000 %118 = fadd float %116, %117 %119 = call float @llvm.pow.f32(float %118, float %40) %120 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %121 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %122 = call float @llvm.maxnum.f32(float %119, float 5.000000e-01) %123 = fmul float %39, %104 %124 = fmul float %39, %105 %125 = fmul float %39, %106 %126 = fmul float %36, %120 %127 = fadd float %126, %123 %128 = fmul float %37, %121 %129 = fadd float %128, %124 %130 = fmul float %38, %122 %131 = fadd float %130, %125 %132 = fadd float %108, %45 %133 = fadd float %110, %46 %134 = bitcast float %132 to i32 %135 = bitcast float %133 to i32 %136 = insertelement <2 x i32> undef, i32 %134, i32 0 %137 = insertelement <2 x i32> %136, i32 %135, i32 1 %138 = bitcast <8 x i32> %50 to <32 x i8> %139 = bitcast <4 x i32> %52 to <16 x i8> %140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = fadd float %108, %47 %143 = fadd float %110, %48 %144 = bitcast float %142 to i32 %145 = bitcast float %143 to i32 %146 = insertelement <2 x i32> undef, i32 %144, i32 0 %147 = insertelement <2 x i32> %146, i32 %145, i32 1 %148 = bitcast <8 x i32> %50 to <32 x i8> %149 = bitcast <4 x i32> %52 to <16 x i8> %150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %148, <16 x i8> %149, i32 2) %151 = extractelement <4 x float> %150, i32 0 %152 = fadd float %141, %151 %153 = fsub float %108, %45 %154 = fsub float %110, %46 %155 = bitcast float %153 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %50 to <32 x i8> %160 = bitcast <4 x i32> %52 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = fadd float %152, %162 %164 = fsub float %108, %47 %165 = fsub float %110, %48 %166 = bitcast float %164 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %50 to <32 x i8> %171 = bitcast <4 x i32> %52 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = fadd float %163, %173 %175 = fmul float %174, 2.500000e-01 %176 = call float @llvm.AMDGPU.lrp(float %175, float %104, float %127) %177 = call float @llvm.AMDGPU.lrp(float %175, float %105, float %129) %178 = call float @llvm.AMDGPU.lrp(float %175, float %106, float %131) %179 = fmul float %111, %111 %180 = fmul float %112, %112 %181 = fadd float %180, %179 %182 = fmul float %113, %113 %183 = fadd float %181, %182 %184 = call float @llvm.sqrt.f32(float %183) %185 = fmul float %184, %31 %186 = fadd float %185, %30 %187 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00) %188 = call float @llvm.pow.f32(float %187, float %33) %189 = call float @llvm.minnum.f32(float %32, float %188) %190 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00) %191 = call float @llvm.AMDGPU.lrp(float %190, float %27, float %176) %192 = call float @llvm.AMDGPU.lrp(float %190, float %28, float %177) %193 = call float @llvm.AMDGPU.lrp(float %190, float %29, float %178) %194 = call i32 @llvm.SI.packf16(float %191, float %192) %195 = bitcast i32 %194 to float %196 = call i32 @llvm.SI.packf16(float %193, float 1.000000e+00) %197 = bitcast i32 %196 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %195, float %197, float %195, float %197) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718 s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710 s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C0840310 s_load_dwordx4 s[0:3], s[2:3], 0x14 ; C0800314 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600 v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800100 00660102 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[20:23] ; F0800700 00A80602 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[16:19] ; F0800700 008A0902 s_buffer_load_dword s12, s[8:11], 0x4c ; C206094C s_buffer_load_dword s13, s[0:3], 0x28 ; C2068128 s_buffer_load_dword s14, s[0:3], 0x2a ; C207012A s_buffer_load_dword s15, s[0:3], 0x29 ; C2078129 s_buffer_load_dword s16, s[8:11], 0x4d ; C208094D s_buffer_load_dword s17, s[8:11], 0x4e ; C208894E s_buffer_load_dword s18, s[8:11], 0x70 ; C2090970 s_buffer_load_dword s19, s[8:11], 0x71 ; C2098971 s_buffer_load_dword s20, s[0:3], 0x2b ; C20A012B s_buffer_load_dword s21, s[0:3], 0x2c ; C20A812C s_buffer_load_dword s22, s[0:3], 0x2d ; C20B012D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, v4, v1, s12 ; D2820002 00320304 v_mov_b32_e32 v3, s13 ; 7E06020D v_mac_f32_e32 v3, s14, v2 ; 3E06040E s_buffer_load_dword s13, s[0:3], 0x2e ; C206812E v_mad_f32 v4, v5, v1, s16 ; D2820004 00420305 v_mov_b32_e32 v5, s15 ; 7E0A020F s_buffer_load_dword s14, s[0:3], 0x2f ; C207012F s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_mac_f32_e32 v5, s20, v4 ; 3E0A0814 v_add_f32_e32 v12, s21, v3 ; 06180615 v_add_f32_e32 v13, s22, v5 ; 061A0A16 v_subrev_f32_e32 v14, s21, v3 ; 0A1C0615 v_subrev_f32_e32 v15, s22, v5 ; 0A1E0A16 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v16, s13, v3 ; 0620060D v_subrev_f32_e32 v18, s13, v3 ; 0A24060D v_add_f32_e32 v17, s14, v5 ; 06220A0E v_subrev_f32_e32 v19, s14, v5 ; 0A260A0E image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[28:35], s[24:27] ; F0800100 00C7030C image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[28:35], s[24:27] ; F0800100 00C70510 image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[28:35], s[24:27] ; F0800100 00C70C0E image_sample v13, 1, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[28:35], s[24:27] ; F0800100 00C70D12 v_add_f32_e32 v6, v9, v6 ; 060C0D09 v_add_f32_e32 v7, v10, v7 ; 060E0F0A v_add_f32_e32 v8, v11, v8 ; 0610110B s_buffer_load_dword s4, s[8:11], 0x72 ; C2020972 s_buffer_load_dword s5, s[8:11], 0x73 ; C2028973 s_buffer_load_dword s6, s[8:11], 0x74 ; C2030974 s_buffer_load_dword s7, s[8:11], 0x75 ; C2038975 s_buffer_load_dword s8, s[8:11], 0x76 ; C2040976 s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120 s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121 s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122 s_buffer_load_dword s13, s[0:3], 0x26 ; C2068126 s_buffer_load_dword s0, s[0:3], 0x27 ; C2000127 v_subrev_f32_e32 v2, s12, v2 ; 0A04040C v_subrev_f32_e32 v4, s16, v4 ; 0A080810 v_mad_f32 v0, v0, v1, s17 ; D2820000 00460300 v_subrev_f32_e32 v0, s17, v0 ; 0A000011 v_mul_f32_e32 v1, v2, v2 ; 10020502 v_mac_f32_e32 v1, v4, v4 ; 3E020904 v_mul_f32_e32 v2, 0x3e99999a, v6 ; 10040CFF 3E99999A v_madmk_f32_e32 v2, v7, v2, 0x3f170a3d ; 40040507 3F170A3D v_madmk_f32_e32 v2, v8, v2, 0x3de147ae ; 40040508 3DE147AE v_log_f32_e32 v2, v2 ; 7E044F02 v_mac_f32_e32 v1, v0, v0 ; 3E020100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v0, s5 ; 7E000205 v_sqrt_f32_e32 v1, v1 ; 7E026701 v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mul_legacy_f32_e32 v1, s0, v2 ; 0E020400 v_exp_f32_e32 v1, v1 ; 7E024B01 v_max_f32_e32 v1, 0.5, v1 ; 200202F0 v_mul_f32_e32 v2, s13, v6 ; 10040C0D v_mac_f32_e32 v2, s9, v1 ; 3E040209 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_f32_e32 v4, s13, v7 ; 10080E0D v_mac_f32_e32 v4, s10, v1 ; 3E08020A v_mul_f32_e32 v9, s13, v8 ; 1012100D v_mac_f32_e32 v9, s11, v1 ; 3E12020B v_mul_legacy_f32_e32 v0, s8, v0 ; 0E000008 v_exp_f32_e32 v0, v0 ; 7E004B00 v_min_f32_e32 v0, s7, v0 ; 1E000007 v_add_f32_e32 v1, v5, v3 ; 06020705 v_add_f32_e32 v1, v12, v1 ; 0602030C v_add_f32_e32 v1, v13, v1 ; 0602030D v_mov_b32_e32 v3, 0x3e800000 ; 7E0602FF 3E800000 v_mul_f32_e32 v5, v3, v1 ; 100A0303 v_mad_f32 v1, -v1, v3, 1.0 ; D2820001 23CA0701 v_mul_f32_e32 v2, v2, v1 ; 10040302 v_mac_f32_e32 v2, v6, v5 ; 3E040B06 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_sub_f32_e32 v3, 1.0, v0 ; 080600F2 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mac_f32_e32 v2, s18, v0 ; 3E040012 v_mul_f32_e32 v4, v4, v1 ; 10080304 v_mac_f32_e32 v4, v7, v5 ; 3E080B07 v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mac_f32_e32 v4, s19, v0 ; 3E080013 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v1, v8, v5 ; 3E020B08 v_mul_f32_e32 v1, v1, v3 ; 10020701 v_mac_f32_e32 v1, s4, v0 ; 3E020004 v_cvt_pkrtz_f16_f32_e32 v0, v2, v4 ; 5E000902 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 20 Code Size: 576 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL CONST[1][0..4] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0..3], LOCAL IMM[0] UINT32 {0, 64, 56, 3} IMM[1] UINT32 {348, 16, 4, 160} IMM[2] UINT32 {128, 156, 152, 0} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 0.5000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].yw, TEMP[0], SAMP[1], 2D 2: MUL TEMP[1].xy, CONST[1][4].xxxx, TEMP[0].yyyy 3: MUL TEMP[2].xy, CONST[1][3].zwww, CONST[4][21].wwww 4: MAD TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xyyy, IN[0].xyyy 5: MOV TEMP[1].xy, TEMP[1].xyyy 6: TEX TEMP[1], TEMP[1], SAMP[2], 2D 7: MOV TEMP[2].w, TEMP[1].wwww 8: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[1].xyzz 9: MUL TEMP[1].xyz, TEMP[2], IN[2] 10: DP3 TEMP[2].x, TEMP[1].xyzz, IMM[3].xyzz 11: POW TEMP[2].x, TEMP[2].xxxx, CONST[5][9].wwww 12: MAX TEMP[2].xyz, TEMP[2].xxxx, IMM[3].wwww 13: MUL TEMP[3].xyz, CONST[5][9].zzzz, TEMP[1].xyzz 14: MAD TEMP[2].xyz, CONST[5][8].xyzz, TEMP[2].xyzz, TEMP[3].xyzz 15: MAD TEMP[3].xy, IN[1].xyyy, CONST[5][10].zwww, CONST[5][10].xyyy 16: MOV TEMP[3].xy, TEMP[3].xyyy 17: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 18: LRP TEMP[1].xyz, TEMP[3].xxxx, TEMP[1].xyzz, TEMP[2].xyzz 19: MOV TEMP[2].x, TEMP[1].xxxx 20: MOV TEMP[2].y, TEMP[1].yyyy 21: MOV TEMP[2].z, TEMP[1].zzzz 22: MUL TEMP[0].x, IN[2].wwww, TEMP[0].wwww 23: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1][0].yyyy 24: MOV TEMP[2].w, TEMP[0].xxxx 25: MOV OUT[1], IN[1].wwww 26: MOV OUT[0], TEMP[2] 27: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %31 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0 %33 = call float @llvm.SI.load.const(<16 x i8> %32, i32 348) %34 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0 %36 = call float @llvm.SI.load.const(<16 x i8> %35, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %35, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %35, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %35, i32 152) %40 = call float @llvm.SI.load.const(<16 x i8> %35, i32 156) %41 = call float @llvm.SI.load.const(<16 x i8> %35, i32 160) %42 = call float @llvm.SI.load.const(<16 x i8> %35, i32 164) %43 = call float @llvm.SI.load.const(<16 x i8> %35, i32 168) %44 = call float @llvm.SI.load.const(<16 x i8> %35, i32 172) %45 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0 %47 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0 %49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %50 = bitcast <8 x i32> addrspace(2)* %49 to <32 x i8> addrspace(2)* %51 = load <32 x i8>, <32 x i8> addrspace(2)* %50, align 32, !tbaa !0 %52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %53 = bitcast <4 x i32> addrspace(2)* %52 to <16 x i8> addrspace(2)* %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)* %57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0 %58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)* %60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0 %61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %70 = bitcast float %61 to i32 %71 = bitcast float %62 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %51, <16 x i8> %54, i32 2) %75 = extractelement <4 x float> %74, i32 1 %76 = extractelement <4 x float> %74, i32 3 %77 = fmul float %30, %75 %78 = fmul float %30, %75 %79 = fmul float %28, %33 %80 = fmul float %29, %33 %81 = fmul float %77, %79 %82 = fadd float %81, %61 %83 = fmul float %78, %80 %84 = fadd float %83, %62 %85 = bitcast float %82 to i32 %86 = bitcast float %84 to i32 %87 = insertelement <2 x i32> undef, i32 %85, i32 0 %88 = insertelement <2 x i32> %87, i32 %86, i32 1 %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %57, <16 x i8> %60, i32 2) %90 = extractelement <4 x float> %89, i32 0 %91 = extractelement <4 x float> %89, i32 1 %92 = extractelement <4 x float> %89, i32 2 %93 = fmul float %25, %90 %94 = fmul float %26, %91 %95 = fmul float %27, %92 %96 = fmul float %93, %66 %97 = fmul float %94, %67 %98 = fmul float %95, %68 %99 = fmul float %96, 0x3FD3333340000000 %100 = fmul float %97, 0x3FE2E147A0000000 %101 = fadd float %100, %99 %102 = fmul float %98, 0x3FBC28F5C0000000 %103 = fadd float %101, %102 %104 = call float @llvm.pow.f32(float %103, float %40) %105 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %106 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %107 = call float @llvm.maxnum.f32(float %104, float 5.000000e-01) %108 = fmul float %39, %96 %109 = fmul float %39, %97 %110 = fmul float %39, %98 %111 = fmul float %36, %105 %112 = fadd float %111, %108 %113 = fmul float %37, %106 %114 = fadd float %113, %109 %115 = fmul float %38, %107 %116 = fadd float %115, %110 %117 = fmul float %63, %43 %118 = fadd float %117, %41 %119 = fmul float %64, %44 %120 = fadd float %119, %42 %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = insertelement <2 x i32> undef, i32 %121, i32 0 %124 = insertelement <2 x i32> %123, i32 %122, i32 1 %125 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %124, <32 x i8> %46, <16 x i8> %48, i32 2) %126 = extractelement <4 x float> %125, i32 0 %127 = call float @llvm.AMDGPU.lrp(float %126, float %96, float %112) %128 = call float @llvm.AMDGPU.lrp(float %126, float %97, float %114) %129 = call float @llvm.AMDGPU.lrp(float %126, float %98, float %116) %130 = fmul float %69, %76 %131 = fmul float %130, %24 %132 = call i32 @llvm.SI.packf16(float %127, float %128) %133 = bitcast i32 %132 to float %134 = call i32 @llvm.SI.packf16(float %129, float %131) %135 = bitcast i32 %134 to float %136 = call i32 @llvm.SI.packf16(float %65, float %65) %137 = bitcast i32 %136 to float %138 = call i32 @llvm.SI.packf16(float %65, float %65) %139 = bitcast i32 %138 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %133, float %135, float %133, float %135) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %137, float %139, float %137, float %139) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[12:15], s[2:3], 0x14 ; C0860314 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[16:19], 0x57 ; C2041157 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500 v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00 s_buffer_load_dword s9, s[12:15], 0x28 ; C2048D28 s_buffer_load_dword s10, s[12:15], 0x2a ; C2050D2A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s11, s[0:3], 0xe ; C205810E v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 image_sample v[10:11], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800A00 00C70A02 s_buffer_load_dword s24, s[12:15], 0x29 ; C20C0D29 s_buffer_load_dword s25, s[0:3], 0xf ; C20C810F s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110 s_buffer_load_dword s27, s[12:15], 0x2b ; C20D8D2B v_mov_b32_e32 v12, s9 ; 7E180209 v_mac_f32_e32 v12, s10, v4 ; 3E18080A v_mov_b32_e32 v1, s8 ; 7E020208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mov_b32_e32 v4, s8 ; 7E080208 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, s25, v4 ; 10080819 v_mul_f32_e32 v10, s26, v10 ; 1014141A v_mac_f32_e32 v2, v1, v10 ; 3E041501 v_mac_f32_e32 v3, v4, v10 ; 3E061504 image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[36:39] ; F0800700 01240102 v_mov_b32_e32 v13, s24 ; 7E1A0218 v_mac_f32_e32 v13, s27, v5 ; 3E1A0A1B s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[28:35], s[8:11] ; F0800100 0047040C s_waitcnt vmcnt(1) ; BF8C0771 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v2, s5, v2 ; 10040405 v_mul_f32_e32 v3, s6, v3 ; 10060606 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_mul_f32_e32 v2, v8, v2 ; 10040508 s_buffer_load_dword s4, s[12:15], 0x27 ; C2020D27 v_mul_f32_e32 v3, v9, v3 ; 10060709 s_buffer_load_dword s5, s[12:15], 0x26 ; C2028D26 v_mul_f32_e32 v5, 0x3e99999a, v1 ; 100A02FF 3E99999A v_madmk_f32_e32 v5, v2, v5, 0x3f170a3d ; 400A0B02 3F170A3D v_madmk_f32_e32 v5, v3, v5, 0x3de147ae ; 400A0B03 3DE147AE v_log_f32_e32 v5, v5 ; 7E0A4F05 s_buffer_load_dword s6, s[12:15], 0x20 ; C2030D20 s_buffer_load_dword s7, s[12:15], 0x21 ; C2038D21 s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_legacy_f32_e32 v5, s4, v5 ; 0E0A0A04 v_mul_f32_e32 v7, s5, v1 ; 100E0205 v_mul_f32_e32 v8, s5, v2 ; 10100405 v_mul_f32_e32 v9, s5, v3 ; 10120605 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_max_f32_e32 v5, 0.5, v5 ; 200A0AF0 v_mac_f32_e32 v7, s6, v5 ; 3E0E0A06 v_mac_f32_e32 v8, s7, v5 ; 3E100A07 v_mac_f32_e32 v9, s8, v5 ; 3E120A08 v_mul_f32_e32 v0, v11, v0 ; 1000010B v_mul_f32_e32 v0, s0, v0 ; 10000000 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v7, v7, v5 ; 100E0B07 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mul_f32_e32 v1, v8, v5 ; 10020B08 v_mac_f32_e32 v1, v2, v4 ; 3E020902 v_mul_f32_e32 v2, v9, v5 ; 10040B09 v_mac_f32_e32 v2, v3, v4 ; 3E040903 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F v_cvt_pkrtz_f16_f32_e32 v0, v6, v6 ; 5E000D06 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].zw, IMM[0].zzyz 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, -IN[0].yyyy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[2].xy, IN[1].xyxx 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %15, float %23, float -1.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v4, v5, v1, v1 ; F800021F 01010504 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, -1.0 ; 7E0802F3 exp 15, 12, 0, 0, 0, v2, v3, v4, v0 ; F80000CF 00040302 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 100 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..41] DCL CONST[3][0..13] DCL CONST[4][0] DCL TEMP[0..47], LOCAL IMM[0] FLT32 { 0.0000, 1.9632, 0.0417, 0.1250} IMM[1] INT32 {0, -1, 1, 15} IMM[2] FLT32 { 0.7500, 0.2500, -0.2500, 1.3333} IMM[3] FLT32 { 0.1111, -0.5000, 0.5000, 1.0000} IMM[4] UINT32 {0, 0, 0, 0} 0: MOV TEMP[0].xy, IN[0].xyyy 1: MOV TEMP[0].w, IMM[0].xxxx 2: TXL TEMP[0], TEMP[0], SAMP[0], 2D, IMM[1].xyx 3: MOV TEMP[1], TEMP[0] 4: MOV TEMP[2].xy, IN[0].xyyy 5: MOV TEMP[2].w, IMM[0].xxxx 6: TXL TEMP[2], TEMP[2], SAMP[0], 2D, IMM[1].yxy 7: MOV TEMP[3], TEMP[2] 8: MOV TEMP[4].xy, IN[0].xyyy 9: MOV TEMP[4].w, IMM[0].xxxx 10: TXL TEMP[4], TEMP[4], SAMP[0], 2D, IMM[1].xxx 11: MOV TEMP[5], TEMP[4] 12: MOV TEMP[1].xy, IN[0].xyyy 13: MOV TEMP[1].w, IMM[0].xxxx 14: TXL TEMP[1], TEMP[1], SAMP[0], 2D, IMM[1].zxz 15: MOV TEMP[6], TEMP[1] 16: MOV TEMP[7].xy, IN[0].xyyy 17: MOV TEMP[7].w, IMM[0].xxxx 18: TXL TEMP[7], TEMP[7], SAMP[0], 2D, IMM[1].xzx 19: MOV TEMP[8], TEMP[7] 20: MAD TEMP[9].x, TEMP[0].yyyy, IMM[0].yyyy, TEMP[0].xxxx 21: MAD TEMP[3].x, IMM[0].yyyy, TEMP[2].yyyy, TEMP[2].xxxx 22: MAD TEMP[10].x, IMM[0].yyyy, TEMP[4].yyyy, TEMP[4].xxxx 23: MAD TEMP[11].x, IMM[0].yyyy, TEMP[1].yyyy, TEMP[1].xxxx 24: MAD TEMP[5].x, IMM[0].yyyy, TEMP[7].yyyy, TEMP[7].xxxx 25: MAX TEMP[12].x, TEMP[10].xxxx, TEMP[9].xxxx 26: MAX TEMP[13].x, TEMP[3].xxxx, TEMP[5].xxxx 27: MAX TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx 28: MAX TEMP[12].x, TEMP[12].xxxx, TEMP[13].xxxx 29: MIN TEMP[13].x, TEMP[10].xxxx, TEMP[9].xxxx 30: MIN TEMP[6].x, TEMP[3].xxxx, TEMP[5].xxxx 31: MIN TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx 32: MIN TEMP[13].x, TEMP[13].xxxx, TEMP[6].xxxx 33: ADD TEMP[13].x, TEMP[12].xxxx, -TEMP[13].xxxx 34: MUL TEMP[12].x, TEMP[12].xxxx, IMM[0].wwww 35: MAX TEMP[12].x, IMM[0].zzzz, TEMP[12].xxxx 36: FSLT TEMP[12].x, TEMP[13].xxxx, TEMP[12].xxxx 37: UIF TEMP[12].xxxx :0 38: MOV TEMP[12].xyz, TEMP[4].xyzx 39: ELSE :0 40: ADD TEMP[6].x, TEMP[5].xxxx, TEMP[9].xxxx 41: ADD TEMP[14].x, TEMP[3].xxxx, TEMP[11].xxxx 42: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[14].xxxx 43: MAD TEMP[6].x, TEMP[6].xxxx, IMM[2].yyyy, -TEMP[10].xxxx 44: ABS TEMP[6].x, TEMP[6].xxxx 45: RCP TEMP[13].x, TEMP[13].xxxx 46: MAD TEMP[13].x, TEMP[6].xxxx, TEMP[13].xxxx, IMM[2].zzzz 47: MAX TEMP[13].x, IMM[0].xxxx, TEMP[13].xxxx 48: MUL TEMP[13].x, TEMP[13].xxxx, IMM[2].wwww 49: MIN TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx 50: MOV TEMP[6].xy, IN[0].xyyy 51: MOV TEMP[6].w, IMM[0].xxxx 52: TXL TEMP[6], TEMP[6], SAMP[0], 2D, IMM[1].yyy 53: MOV TEMP[15], TEMP[6] 54: MOV TEMP[14].xy, IN[0].xyyy 55: MOV TEMP[14].w, IMM[0].xxxx 56: TXL TEMP[14], TEMP[14], SAMP[0], 2D, IMM[1].zyz 57: MOV TEMP[16], TEMP[14] 58: MOV TEMP[17].xy, IN[0].xyyy 59: MOV TEMP[17].w, IMM[0].xxxx 60: TXL TEMP[17], TEMP[17], SAMP[0], 2D, IMM[1].yzy 61: MOV TEMP[18], TEMP[17] 62: MOV TEMP[8].xy, IN[0].xyyy 63: MOV TEMP[8].w, IMM[0].xxxx 64: TXL TEMP[8], TEMP[8], SAMP[0], 2D, IMM[1].zzz 65: MOV TEMP[19], TEMP[8] 66: ADD TEMP[20].xyz, TEMP[14].xyzz, TEMP[6].xyzz 67: ADD TEMP[21].xyz, TEMP[17].xyzz, TEMP[8].xyzz 68: ADD TEMP[20].xyz, TEMP[20].xyzz, TEMP[21].xyzz 69: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz 70: ADD TEMP[2].xyz, TEMP[4].xyzz, TEMP[1].xyzz 71: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xyzz 72: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz 73: ADD TEMP[0].xyz, TEMP[20].xyzz, TEMP[0].xyzz 74: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[3].xxxx 75: MAD TEMP[2].x, IMM[0].yyyy, TEMP[6].yyyy, TEMP[6].xxxx 76: MUL TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx 77: MAD TEMP[4].x, IMM[0].yyyy, TEMP[14].yyyy, TEMP[14].xxxx 78: MUL TEMP[4].x, IMM[2].yyyy, TEMP[4].xxxx 79: MOV TEMP[1].x, -TEMP[10].xxxx 80: MAD TEMP[7].x, IMM[0].yyyy, TEMP[17].yyyy, TEMP[17].xxxx 81: MUL TEMP[7].x, IMM[2].yyyy, TEMP[7].xxxx 82: MAD TEMP[6].x, IMM[0].yyyy, TEMP[8].yyyy, TEMP[8].xxxx 83: MUL TEMP[6].x, IMM[2].yyyy, TEMP[6].xxxx 84: MAD TEMP[14].x, IMM[3].yyyy, TEMP[3].xxxx, TEMP[2].xxxx 85: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[7].xxxx 86: ABS TEMP[14].x, TEMP[14].xxxx 87: MAD TEMP[17].x, IMM[3].zzzz, TEMP[9].xxxx, TEMP[1].xxxx 88: MAD TEMP[17].x, IMM[3].zzzz, TEMP[5].xxxx, TEMP[17].xxxx 89: ABS TEMP[17].x, TEMP[17].xxxx 90: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[17].xxxx 91: MAD TEMP[17].x, IMM[3].yyyy, TEMP[11].xxxx, TEMP[4].xxxx 92: ADD TEMP[17].x, TEMP[6].xxxx, TEMP[17].xxxx 93: ABS TEMP[17].x, TEMP[17].xxxx 94: ADD TEMP[14].x, TEMP[14].xxxx, TEMP[17].xxxx 95: MAD TEMP[2].x, TEMP[9].xxxx, IMM[3].yyyy, TEMP[2].xxxx 96: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 97: ABS TEMP[2].x, TEMP[2].xxxx 98: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].zzzz, TEMP[1].xxxx 99: MAD TEMP[4].x, IMM[3].zzzz, TEMP[11].xxxx, TEMP[4].xxxx 100: ABS TEMP[4].x, TEMP[4].xxxx 101: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 102: MAD TEMP[4].x, IMM[3].yyyy, TEMP[5].xxxx, TEMP[7].xxxx 103: ADD TEMP[4].x, TEMP[6].xxxx, TEMP[4].xxxx 104: ABS TEMP[4].x, TEMP[4].xxxx 105: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx 106: FSGE TEMP[2].x, TEMP[14].xxxx, TEMP[2].xxxx 107: UIF TEMP[2].xxxx :0 108: MOV TEMP[22], CONST[1][0] 109: MOV TEMP[4].x, CONST[1][0].yyyy 110: ELSE :0 111: MOV TEMP[23], CONST[1][0] 112: MOV TEMP[4].x, CONST[1][0].xxxx 113: ENDIF 114: MOV TEMP[1].x, -TEMP[4].xxxx 115: UIF TEMP[2].xxxx :0 116: MOV TEMP[7].x, TEMP[9].xxxx 117: ELSE :0 118: MOV TEMP[7].x, TEMP[3].xxxx 119: ENDIF 120: UIF TEMP[2].xxxx :0 121: MOV TEMP[9].x, TEMP[5].xxxx 122: ELSE :0 123: MOV TEMP[9].x, TEMP[11].xxxx 124: ENDIF 125: ADD TEMP[3].x, TEMP[7].xxxx, -TEMP[10].xxxx 126: ABS TEMP[3].x, TEMP[3].xxxx 127: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[10].xxxx 128: ABS TEMP[11].x, TEMP[11].xxxx 129: FSGE TEMP[5].x, TEMP[3].xxxx, TEMP[11].xxxx 130: UIF TEMP[5].xxxx :0 131: MOV TEMP[7].x, TEMP[7].xxxx 132: ELSE :0 133: MOV TEMP[7].x, TEMP[9].xxxx 134: ENDIF 135: ADD TEMP[7].x, TEMP[10].xxxx, TEMP[7].xxxx 136: MUL TEMP[7].x, IMM[3].zzzz, TEMP[7].xxxx 137: MOV TEMP[9].x, TEMP[7].xxxx 138: UIF TEMP[5].xxxx :0 139: MOV TEMP[3].x, TEMP[3].xxxx 140: ELSE :0 141: MOV TEMP[3].x, TEMP[11].xxxx 142: ENDIF 143: UIF TEMP[5].xxxx :0 144: MOV TEMP[1].x, TEMP[1].xxxx 145: ELSE :0 146: MOV TEMP[1].x, TEMP[4].xxxx 147: ENDIF 148: MUL TEMP[4].x, IMM[3].zzzz, TEMP[1].xxxx 149: UIF TEMP[2].xxxx :0 150: MOV TEMP[11].x, IMM[0].xxxx 151: ELSE :0 152: MOV TEMP[11].x, TEMP[4].xxxx 153: ENDIF 154: ADD TEMP[11].x, IN[0].xxxx, TEMP[11].xxxx 155: UIF TEMP[2].xxxx :0 156: MOV TEMP[4].x, TEMP[4].xxxx 157: ELSE :0 158: MOV TEMP[4].x, IMM[0].xxxx 159: ENDIF 160: MOV TEMP[11].x, TEMP[11].xxxx 161: ADD TEMP[4].x, IN[0].yyyy, TEMP[4].xxxx 162: MOV TEMP[11].y, TEMP[4].xxxx 163: MUL TEMP[4].x, IMM[2].yyyy, TEMP[3].xxxx 164: MOV TEMP[3].y, IMM[0].xxxx 165: MOV TEMP[24], CONST[1][0] 166: MOV TEMP[3].x, CONST[1][0].xxxx 167: MOV TEMP[5].x, IMM[0].xxxx 168: MOV TEMP[5].y, CONST[1][0].yyyy 169: UIF TEMP[2].xxxx :0 170: MOV TEMP[3].xy, TEMP[3].xyxx 171: ELSE :0 172: MOV TEMP[3].xy, TEMP[5].xyxx 173: ENDIF 174: MOV TEMP[5].xy, TEMP[3].xyxx 175: MOV TEMP[6].x, IMM[1].xxxx 176: MOV TEMP[14].x, IMM[4].xxxx 177: MOV TEMP[17].x, IMM[4].xxxx 178: MOV TEMP[8].x, TEMP[7].xxxx 179: MOV TEMP[20].x, TEMP[7].xxxx 180: ADD TEMP[21].xy, TEMP[11].xyyy, TEMP[3].xyyy 181: ADD TEMP[3].xy, TEMP[11].xyyy, -TEMP[3].xyyy 182: BGNLOOP :0 183: ISLT TEMP[11].x, IMM[1].wwww, TEMP[6].xxxx 184: UIF TEMP[11].xxxx :0 185: BRK 186: ENDIF 187: MOV TEMP[25].x, TEMP[20].xxxx 188: NOT TEMP[26].x, TEMP[17].xxxx 189: UIF TEMP[26].xxxx :0 190: MOV TEMP[27].xy, TEMP[3].xyyy 191: MOV TEMP[27].w, IMM[0].xxxx 192: TXL TEMP[28], TEMP[27], SAMP[0], 2D 193: MOV TEMP[29], TEMP[28] 194: MAD TEMP[25].x, IMM[0].yyyy, TEMP[28].yyyy, TEMP[28].xxxx 195: ENDIF 196: MOV TEMP[30].x, TEMP[8].xxxx 197: NOT TEMP[31].x, TEMP[14].xxxx 198: UIF TEMP[31].xxxx :0 199: MOV TEMP[32].xy, TEMP[21].xyyy 200: MOV TEMP[32].w, IMM[0].xxxx 201: TXL TEMP[33], TEMP[32], SAMP[0], 2D 202: MOV TEMP[34], TEMP[33] 203: MAD TEMP[30].x, IMM[0].yyyy, TEMP[33].yyyy, TEMP[33].xxxx 204: ENDIF 205: ADD TEMP[35].x, TEMP[25].xxxx, -TEMP[9].xxxx 206: ABS TEMP[36].x, TEMP[35].xxxx 207: FSGE TEMP[37].x, TEMP[36].xxxx, TEMP[4].xxxx 208: OR TEMP[38].x, TEMP[17].xxxx, TEMP[37].xxxx 209: ADD TEMP[39].x, TEMP[30].xxxx, -TEMP[9].xxxx 210: ABS TEMP[40].x, TEMP[39].xxxx 211: FSGE TEMP[41].x, TEMP[40].xxxx, TEMP[4].xxxx 212: OR TEMP[42].x, TEMP[14].xxxx, TEMP[41].xxxx 213: AND TEMP[43].x, TEMP[38].xxxx, TEMP[42].xxxx 214: UIF TEMP[43].xxxx :0 215: BRK 216: ENDIF 217: ADD TEMP[44].xy, TEMP[3].xyyy, -TEMP[5].xyyy 218: UIF TEMP[38].xxxx :0 219: MOV TEMP[45].xy, TEMP[3].xyxx 220: ELSE :0 221: MOV TEMP[45].xy, TEMP[44].xyxx 222: ENDIF 223: ADD TEMP[46].xy, TEMP[21].xyyy, TEMP[5].xyyy 224: UIF TEMP[42].xxxx :0 225: MOV TEMP[47].xy, TEMP[21].xyxx 226: ELSE :0 227: MOV TEMP[47].xy, TEMP[46].xyxx 228: ENDIF 229: UADD TEMP[6].x, TEMP[6].xxxx, IMM[1].zzzz 230: MOV TEMP[14].x, TEMP[42].xxxx 231: MOV TEMP[17].x, TEMP[38].xxxx 232: MOV TEMP[8].x, TEMP[30].xxxx 233: MOV TEMP[20].x, TEMP[25].xxxx 234: MOV TEMP[21].xy, TEMP[47].xyxx 235: MOV TEMP[3].xy, TEMP[45].xyxx 236: ENDLOOP :0 237: ADD TEMP[4].x, IN[0].xxxx, -TEMP[3].xxxx 238: ADD TEMP[9].x, IN[0].yyyy, -TEMP[3].yyyy 239: UIF TEMP[2].xxxx :0 240: MOV TEMP[4].x, TEMP[4].xxxx 241: ELSE :0 242: MOV TEMP[4].x, TEMP[9].xxxx 243: ENDIF 244: ADD TEMP[9].x, TEMP[21].xxxx, -IN[0].xxxx 245: ADD TEMP[3].x, TEMP[21].yyyy, -IN[0].yyyy 246: UIF TEMP[2].xxxx :0 247: MOV TEMP[9].x, TEMP[9].xxxx 248: ELSE :0 249: MOV TEMP[9].x, TEMP[3].xxxx 250: ENDIF 251: FSLT TEMP[3].x, TEMP[4].xxxx, TEMP[9].xxxx 252: UIF TEMP[3].xxxx :0 253: MOV TEMP[11].x, TEMP[20].xxxx 254: ELSE :0 255: MOV TEMP[11].x, TEMP[8].xxxx 256: ENDIF 257: FSLT TEMP[10].x, TEMP[10].xxxx, TEMP[7].xxxx 258: FSLT TEMP[7].x, TEMP[11].xxxx, TEMP[7].xxxx 259: XOR TEMP[7].x, TEMP[10].xxxx, TEMP[7].xxxx 260: UIF TEMP[7].xxxx :0 261: MOV TEMP[1].x, TEMP[1].xxxx 262: ELSE :0 263: MOV TEMP[1].x, IMM[0].xxxx 264: ENDIF 265: ADD TEMP[7].x, TEMP[4].xxxx, TEMP[9].xxxx 266: UIF TEMP[3].xxxx :0 267: MOV TEMP[4].x, TEMP[4].xxxx 268: ELSE :0 269: MOV TEMP[4].x, TEMP[9].xxxx 270: ENDIF 271: RCP TEMP[7].x, TEMP[7].xxxx 272: MAD TEMP[4].x, -TEMP[7].xxxx, TEMP[4].xxxx, IMM[3].zzzz 273: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 274: UIF TEMP[2].xxxx :0 275: MOV TEMP[1].x, IMM[0].xxxx 276: ELSE :0 277: MOV TEMP[1].x, TEMP[4].xxxx 278: ENDIF 279: ADD TEMP[1].x, IN[0].xxxx, TEMP[1].xxxx 280: UIF TEMP[2].xxxx :0 281: MOV TEMP[2].x, TEMP[4].xxxx 282: ELSE :0 283: MOV TEMP[2].x, IMM[0].xxxx 284: ENDIF 285: MOV TEMP[4].x, TEMP[1].xxxx 286: ADD TEMP[2].x, IN[0].yyyy, TEMP[2].xxxx 287: MOV TEMP[4].y, TEMP[2].xxxx 288: MOV TEMP[2].xy, TEMP[4].xyyy 289: MOV TEMP[2].w, IMM[0].xxxx 290: TXL TEMP[2].xyz, TEMP[2], SAMP[0], 2D 291: MAD TEMP[0].xyz, TEMP[13].xxxx, TEMP[0].xyzz, TEMP[2].xyzz 292: MAD TEMP[12].xyz, -TEMP[13].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 293: ENDIF 294: MOV TEMP[0].w, IMM[3].wwww 295: MOV TEMP[0].x, TEMP[12].xxxx 296: MOV TEMP[0].y, TEMP[12].yyyy 297: MOV TEMP[0].z, TEMP[12].zzzz 298: MOV OUT[0], TEMP[0] 299: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0 %28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <4 x i32> , i32 %32, i32 1 %35 = insertelement <4 x i32> %34, i32 %33, i32 2 %36 = insertelement <4 x i32> %35, i32 0, i32 3 %37 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %36, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = bitcast float %30 to i32 %41 = bitcast float %31 to i32 %42 = insertelement <4 x i32> , i32 %40, i32 1 %43 = insertelement <4 x i32> %42, i32 %41, i32 2 %44 = insertelement <4 x i32> %43, i32 0, i32 3 %45 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %44, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = bitcast float %30 to i32 %49 = bitcast float %31 to i32 %50 = insertelement <4 x i32> , i32 %48, i32 1 %51 = insertelement <4 x i32> %50, i32 %49, i32 2 %52 = insertelement <4 x i32> %51, i32 0, i32 3 %53 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %52, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = extractelement <4 x float> %53, i32 2 %57 = bitcast float %30 to i32 %58 = bitcast float %31 to i32 %59 = insertelement <4 x i32> , i32 %57, i32 1 %60 = insertelement <4 x i32> %59, i32 %58, i32 2 %61 = insertelement <4 x i32> %60, i32 0, i32 3 %62 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %61, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %63 = extractelement <4 x float> %62, i32 0 %64 = extractelement <4 x float> %62, i32 1 %65 = bitcast float %30 to i32 %66 = bitcast float %31 to i32 %67 = insertelement <4 x i32> , i32 %65, i32 1 %68 = insertelement <4 x i32> %67, i32 %66, i32 2 %69 = insertelement <4 x i32> %68, i32 0, i32 3 %70 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %69, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = fmul float %39, 0x3FFF694EE0000000 %74 = fadd float %73, %38 %75 = fmul float %47, 0x3FFF694EE0000000 %76 = fadd float %75, %46 %77 = fmul float %55, 0x3FFF694EE0000000 %78 = fadd float %77, %54 %79 = fmul float %64, 0x3FFF694EE0000000 %80 = fadd float %79, %63 %81 = fmul float %72, 0x3FFF694EE0000000 %82 = fadd float %81, %71 %83 = call float @llvm.maxnum.f32(float %78, float %74) %84 = call float @llvm.maxnum.f32(float %76, float %82) %85 = call float @llvm.maxnum.f32(float %84, float %80) %86 = call float @llvm.maxnum.f32(float %83, float %85) %87 = call float @llvm.minnum.f32(float %78, float %74) %88 = call float @llvm.minnum.f32(float %76, float %82) %89 = call float @llvm.minnum.f32(float %88, float %80) %90 = call float @llvm.minnum.f32(float %87, float %89) %91 = fsub float %86, %90 %92 = fmul float %86, 1.250000e-01 %93 = call float @llvm.maxnum.f32(float %92, float 0x3FA5555680000000) %94 = fcmp olt float %91, %93 br i1 %94, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %95 = extractelement <4 x float> %70, i32 2 %96 = extractelement <4 x float> %62, i32 2 %97 = extractelement <4 x float> %45, i32 2 %98 = extractelement <4 x float> %37, i32 2 %99 = fadd float %82, %74 %100 = fadd float %76, %80 %101 = fadd float %99, %100 %102 = fmul float %101, 2.500000e-01 %103 = fsub float %102, %78 %104 = call float @fabs(float %103) %105 = fdiv float 1.000000e+00, %91 %106 = fmul float %104, %105 %107 = fadd float %106, -2.500000e-01 %108 = call float @llvm.maxnum.f32(float %107, float 0.000000e+00) %109 = fmul float %108, 0x3FF55551E0000000 %110 = call float @llvm.minnum.f32(float %109, float 7.500000e-01) %111 = bitcast float %30 to i32 %112 = bitcast float %31 to i32 %113 = insertelement <4 x i32> , i32 %111, i32 1 %114 = insertelement <4 x i32> %113, i32 %112, i32 2 %115 = insertelement <4 x i32> %114, i32 0, i32 3 %116 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %115, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = bitcast float %30 to i32 %121 = bitcast float %31 to i32 %122 = insertelement <4 x i32> , i32 %120, i32 1 %123 = insertelement <4 x i32> %122, i32 %121, i32 2 %124 = insertelement <4 x i32> %123, i32 0, i32 3 %125 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %124, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %126 = extractelement <4 x float> %125, i32 0 %127 = extractelement <4 x float> %125, i32 1 %128 = extractelement <4 x float> %125, i32 2 %129 = bitcast float %30 to i32 %130 = bitcast float %31 to i32 %131 = insertelement <4 x i32> , i32 %129, i32 1 %132 = insertelement <4 x i32> %131, i32 %130, i32 2 %133 = insertelement <4 x i32> %132, i32 0, i32 3 %134 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %133, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = bitcast float %30 to i32 %139 = bitcast float %31 to i32 %140 = insertelement <4 x i32> , i32 %138, i32 1 %141 = insertelement <4 x i32> %140, i32 %139, i32 2 %142 = insertelement <4 x i32> %141, i32 0, i32 3 %143 = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> %142, <8 x i32> %27, <4 x i32> %29, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = fadd float %126, %117 %148 = fadd float %127, %118 %149 = fadd float %128, %119 %150 = fadd float %135, %144 %151 = fadd float %136, %145 %152 = fadd float %137, %146 %153 = fadd float %147, %150 %154 = fadd float %148, %151 %155 = fadd float %149, %152 %156 = fadd float %38, %46 %157 = fadd float %39, %47 %158 = fadd float %98, %97 %159 = fadd float %54, %63 %160 = fadd float %55, %64 %161 = fadd float %56, %96 %162 = fadd float %159, %71 %163 = fadd float %160, %72 %164 = fadd float %161, %95 %165 = fadd float %156, %162 %166 = fadd float %157, %163 %167 = fadd float %158, %164 %168 = fadd float %153, %165 %169 = fadd float %154, %166 %170 = fadd float %155, %167 %171 = fmul float %168, 0x3FBC71C540000000 %172 = fmul float %169, 0x3FBC71C540000000 %173 = fmul float %170, 0x3FBC71C540000000 %174 = fmul float %118, 0x3FFF694EE0000000 %175 = fadd float %174, %117 %176 = fmul float %175, 2.500000e-01 %177 = fmul float %127, 0x3FFF694EE0000000 %178 = fadd float %177, %126 %179 = fmul float %178, 2.500000e-01 %180 = fmul float %136, 0x3FFF694EE0000000 %181 = fadd float %180, %135 %182 = fmul float %181, 2.500000e-01 %183 = fmul float %145, 0x3FFF694EE0000000 %184 = fadd float %183, %144 %185 = fmul float %184, 2.500000e-01 %186 = fmul float %76, -5.000000e-01 %187 = fadd float %186, %176 %188 = fadd float %187, %182 %189 = call float @fabs(float %188) %190 = fmul float %74, 5.000000e-01 %191 = fsub float %190, %78 %192 = fmul float %82, 5.000000e-01 %193 = fadd float %192, %191 %194 = call float @fabs(float %193) %195 = fadd float %189, %194 %196 = fmul float %80, -5.000000e-01 %197 = fadd float %196, %179 %198 = fadd float %185, %197 %199 = call float @fabs(float %198) %200 = fadd float %195, %199 %201 = fmul float %74, -5.000000e-01 %202 = fadd float %201, %176 %203 = fadd float %202, %179 %204 = call float @fabs(float %203) %205 = fmul float %76, 5.000000e-01 %206 = fsub float %205, %78 %207 = fmul float %80, 5.000000e-01 %208 = fadd float %207, %206 %209 = call float @fabs(float %208) %210 = fadd float %204, %209 %211 = fmul float %82, -5.000000e-01 %212 = fadd float %211, %182 %213 = fadd float %185, %212 %214 = call float @fabs(float %213) %215 = fadd float %210, %214 %216 = fcmp oge float %200, %215 %. = select i1 %216, float %25, float %24 %217 = fsub float -0.000000e+00, %. %temp28.0 = select i1 %216, float %74, float %76 %.270 = select i1 %216, float %82, float %80 %218 = fsub float %temp28.0, %78 %219 = call float @fabs(float %218) %220 = fsub float %.270, %78 %221 = call float @fabs(float %220) %222 = fcmp oge float %219, %221 %temp28.1 = select i1 %222, float %temp28.0, float %.270 %223 = fadd float %78, %temp28.1 %224 = fmul float %223, 5.000000e-01 %.271 = select i1 %222, float %219, float %221 %temp4.0 = select i1 %222, float %217, float %. %225 = fmul float %temp4.0, 5.000000e-01 %.272 = select i1 %216, float 0.000000e+00, float %225 %226 = fadd float %30, %.272 %temp16.1 = select i1 %216, float %225, float 0.000000e+00 %227 = fadd float %31, %temp16.1 %228 = fmul float %.271, 2.500000e-01 %.273 = select i1 %216, float 0.000000e+00, float %25 %.274 = select i1 %216, float %24, float 0.000000e+00 %229 = fadd float %226, %.274 %230 = fadd float %227, %.273 %231 = fsub float %226, %.274 %232 = fsub float %227, %.273 %233 = bitcast <8 x i32> %27 to <32 x i8> %234 = bitcast <4 x i32> %29 to <16 x i8> %235 = bitcast <8 x i32> %27 to <32 x i8> %236 = bitcast <4 x i32> %29 to <16 x i8> br label %LOOP ENDIF: ; preds = %main_body, %ENDLOOP %temp50.0 = phi float [ %280, %ENDLOOP ], [ %56, %main_body ] %temp49.0 = phi float [ %278, %ENDLOOP ], [ %55, %main_body ] %temp48.0 = phi float [ %276, %ENDLOOP ], [ %54, %main_body ] %237 = call i32 @llvm.SI.packf16(float %temp48.0, float %temp49.0) %238 = bitcast i32 %237 to float %239 = call i32 @llvm.SI.packf16(float %temp50.0, float 1.000000e+00) %240 = bitcast i32 %239 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %238, float %240, float %238, float %240) ret void LOOP: ; preds = %ENDIF228, %ELSE %temp68.0 = phi float [ 0.000000e+00, %ELSE ], [ %308, %ENDIF228 ] %temp80.0 = phi float [ %224, %ELSE ], [ %temp100.0, %ENDIF228 ] %temp84.0 = phi float [ %229, %ELSE ], [ %temp188.0, %ENDIF228 ] %temp85.0 = phi float [ %230, %ELSE ], [ %temp189.0, %ENDIF228 ] %temp56.0 = phi float [ 0.000000e+00, %ELSE ], [ %312, %ENDIF228 ] %temp32.0 = phi float [ %224, %ELSE ], [ %temp120.0, %ENDIF228 ] %temp24.0 = phi float [ 0.000000e+00, %ELSE ], [ %327, %ENDIF228 ] %temp13.1 = phi float [ %232, %ELSE ], [ %temp13.1., %ENDIF228 ] %temp12.2 = phi float [ %231, %ELSE ], [ %temp12.2., %ENDIF228 ] %241 = bitcast float %temp24.0 to i32 %242 = icmp sgt i32 %241, 15 br i1 %242, label %ENDLOOP, label %ENDIF219 ENDLOOP: ; preds = %ENDIF225, %LOOP %243 = fsub float %30, %temp12.2 %244 = fsub float %31, %temp13.1 %.275 = select i1 %216, float %243, float %244 %245 = fsub float %temp84.0, %30 %246 = fsub float %temp85.0, %31 %temp36.1 = select i1 %216, float %245, float %246 %247 = fcmp olt float %.275, %temp36.1 %temp80.0259.temp32.0265 = select i1 %247, float %temp80.0, float %temp32.0 %248 = fcmp olt float %78, %224 %249 = fcmp olt float %temp80.0259.temp32.0265, %224 %250 = xor i1 %248, %249 %temp4.1 = select i1 %250, float %temp4.0, float 0.000000e+00 %251 = fadd float %.275, %temp36.1 %.275.temp36.1 = select i1 %247, float %.275, float %temp36.1 %252 = fdiv float 1.000000e+00, %251 %253 = fmul float %252, %.275.temp36.1 %254 = fsub float 5.000000e-01, %253 %255 = fmul float %254, %temp4.1 %temp4.2 = select i1 %216, float 0.000000e+00, float %255 %256 = fadd float %30, %temp4.2 %.276 = select i1 %216, float %255, float 0.000000e+00 %257 = fadd float %31, %.276 %258 = bitcast float %256 to i32 %259 = bitcast float %257 to i32 %260 = insertelement <4 x i32> undef, i32 %258, i32 0 %261 = insertelement <4 x i32> %260, i32 %259, i32 1 %262 = insertelement <4 x i32> %261, i32 0, i32 2 %263 = bitcast <8 x i32> %27 to <32 x i8> %264 = bitcast <4 x i32> %29 to <16 x i8> %265 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %262, <32 x i8> %263, <16 x i8> %264, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = fmul float %110, %171 %270 = fadd float %269, %266 %271 = fmul float %110, %172 %272 = fadd float %271, %267 %273 = fmul float %110, %173 %274 = fadd float %273, %268 %275 = fmul float %110, %266 %276 = fsub float %270, %275 %277 = fmul float %110, %267 %278 = fsub float %272, %277 %279 = fmul float %110, %268 %280 = fsub float %274, %279 br label %ENDIF ENDIF219: ; preds = %LOOP %281 = bitcast float %temp68.0 to i32 %282 = icmp eq i32 %281, -1 br i1 %282, label %ENDIF222, label %IF223 IF223: ; preds = %ENDIF219 %283 = bitcast float %temp12.2 to i32 %284 = bitcast float %temp13.1 to i32 %285 = insertelement <4 x i32> undef, i32 %283, i32 0 %286 = insertelement <4 x i32> %285, i32 %284, i32 1 %287 = insertelement <4 x i32> %286, i32 0, i32 2 %288 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %287, <32 x i8> %233, <16 x i8> %234, i32 2) %289 = extractelement <4 x float> %288, i32 0 %290 = extractelement <4 x float> %288, i32 1 %291 = fmul float %290, 0x3FFF694EE0000000 %292 = fadd float %291, %289 br label %ENDIF222 ENDIF222: ; preds = %ENDIF219, %IF223 %temp100.0 = phi float [ %292, %IF223 ], [ %temp80.0, %ENDIF219 ] %293 = bitcast float %temp56.0 to i32 %294 = icmp eq i32 %293, -1 br i1 %294, label %ENDIF225, label %IF226 IF226: ; preds = %ENDIF222 %295 = bitcast float %temp84.0 to i32 %296 = bitcast float %temp85.0 to i32 %297 = insertelement <4 x i32> undef, i32 %295, i32 0 %298 = insertelement <4 x i32> %297, i32 %296, i32 1 %299 = insertelement <4 x i32> %298, i32 0, i32 2 %300 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %299, <32 x i8> %235, <16 x i8> %236, i32 2) %301 = extractelement <4 x float> %300, i32 0 %302 = extractelement <4 x float> %300, i32 1 %303 = fmul float %302, 0x3FFF694EE0000000 %304 = fadd float %303, %301 br label %ENDIF225 ENDIF225: ; preds = %ENDIF222, %IF226 %temp120.0 = phi float [ %304, %IF226 ], [ %temp32.0, %ENDIF222 ] %305 = fsub float %temp100.0, %224 %306 = call float @fabs(float %305) %307 = fcmp oge float %306, %228 %308 = select i1 %307, float 0xFFFFFFFFE0000000, float %temp68.0 %309 = fsub float %temp120.0, %224 %310 = call float @fabs(float %309) %311 = fcmp oge float %310, %228 %312 = select i1 %311, float 0xFFFFFFFFE0000000, float %temp56.0 %313 = bitcast float %308 to i32 %314 = bitcast float %312 to i32 %315 = and i32 %313, %314 %316 = icmp eq i32 %315, 0 br i1 %316, label %ENDIF228, label %ENDLOOP ENDIF228: ; preds = %ENDIF225 %317 = fsub float %temp12.2, %.274 %318 = fsub float %temp13.1, %.273 %319 = bitcast float %308 to i32 %320 = icmp ne i32 %319, 0 %temp12.2. = select i1 %320, float %temp12.2, float %317 %temp13.1. = select i1 %320, float %temp13.1, float %318 %321 = fadd float %temp84.0, %.274 %322 = fadd float %temp85.0, %.273 %323 = bitcast float %312 to i32 %324 = icmp ne i32 %323, 0 %temp188.0 = select i1 %324, float %temp84.0, float %321 %temp189.0 = select i1 %324, float %temp85.0, float %322 %325 = bitcast float %temp24.0 to i32 %326 = add i32 %325, 1 %327 = bitcast i32 %326 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_mov_b32_e32 v5, 0 ; 7E0A0280 v_mov_b32_e32 v2, 0x3f00 ; 7E0402FF 00003F00 v_mov_b32_e32 v9, 0x3f003f ; 7E1202FF 003F003F v_mov_b32_e32 v10, v3 ; 7E140303 v_mov_b32_e32 v11, v4 ; 7E160304 v_mov_b32_e32 v12, v5 ; 7E180305 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l_o v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[12:19], s[8:11] ; F0D00F00 00430E02 v_mov_b32_e32 v6, v3 ; 7E0C0303 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v24, 0x10001 ; 7E3002FF 00010001 v_mov_b32_e32 v8, v5 ; 7E100305 v_mov_b32_e32 v25, v3 ; 7E320303 v_mov_b32_e32 v28, 0x100 ; 7E3802FF 00000100 v_mov_b32_e32 v29, v3 ; 7E3A0303 v_mov_b32_e32 v26, v4 ; 7E340304 image_sample_l_o v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[12:19], s[8:11] ; F0D00F00 00431409 v_mov_b32_e32 v30, v4 ; 7E3C0304 v_mov_b32_e32 v27, v5 ; 7E360305 s_waitcnt vmcnt(1) ; BF8C0771 image_sample_l_o v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[12:19], s[8:11] ; F0D00700 00431105 s_waitcnt vmcnt(1) ; BF8C0771 image_sample_l_o v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[12:19], s[8:11] ; F0D00F00 00431718 v_mov_b32_e32 v31, v5 ; 7E3E0305 image_sample_l_o v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[8:11] ; F0D00F00 0043051C v_mov_b32_e32 v0, 0x3ffb4a77 ; 7E0002FF 3FFB4A77 v_mad_f32 v10, v0, v15, v14 ; D282000A 043A1F00 v_mad_f32 v11, v0, v21, v20 ; D282000B 04522B00 s_waitcnt vmcnt(2) ; BF8C0772 v_mad_f32 v1, v0, v18, v17 ; D2820001 04462500 s_waitcnt vmcnt(1) ; BF8C0771 v_mad_f32 v12, v0, v24, v23 ; D282000C 045E3100 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v13, v0, v6, v5 ; D282000D 04160D00 v_max3_f32 v0, v11, v13, v12 ; D2A80000 04321B0B v_max3_f32 v2, v1, v10, v0 ; D2A80002 04021501 v_min3_f32 v0, v11, v13, v12 ; D2A20000 04321B0B v_min3_f32 v0, v1, v10, v0 ; D2A20000 04021501 v_subrev_f32_e32 v0, v0, v2 ; 0A000500 v_mul_f32_e32 v2, 0x3e000000, v2 ; 100404FF 3E000000 v_max_f32_e32 v2, 0x3d2aaab4, v2 ; 200404FF 3D2AAAB4 v_cmp_nlt_f32_e32 vcc, v0, v2 ; 7C1C0500 s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_3 ; BF880000 v_rcp_f32_e32 v0, v0 ; 7E005500 s_load_dwordx4 s[20:23], s[2:3], 0x4 ; C08A0304 v_add_f32_e32 v2, v10, v13 ; 06041B0A v_add_f32_e32 v8, v12, v11 ; 0610170C v_add_f32_e32 v2, v8, v2 ; 06040508 v_mov_b32_e32 v26, 0x3e800000 ; 7E3402FF 3E800000 v_mad_f32 v2, v2, v26, -v1 ; D2820002 84063502 v_mov_b32_e32 v8, 0xbe800000 ; 7E1002FF BE800000 v_mad_f32 v0, |v2|, v0, v8 ; D2820100 04220102 v_max_f32_e32 v0, 0, v0 ; 20000080 v_mul_f32_e32 v0, 0x3faaaa8f, v0 ; 100000FF 3FAAAA8F v_min_f32_e32 v0, 0x3f400000, v0 ; 1E0000FF 3F400000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[20:23], 0x0 ; C2021500 s_buffer_load_dword s5, s[20:23], 0x1 ; C2029501 v_mov_b32_e32 v2, 0 ; 7E040280 v_add_f32_e32 v8, v20, v14 ; 06101D14 v_add_f32_e32 v9, v21, v15 ; 06121F15 v_add_f32_e32 v14, v22, v16 ; 061C2116 v_add_f32_e32 v15, v23, v17 ; 061E2317 v_add_f32_e32 v5, v5, v15 ; 060A1F05 v_add_f32_e32 v15, v24, v18 ; 061E2518 v_add_f32_e32 v16, v25, v19 ; 06202719 v_add_f32_e32 v6, v6, v15 ; 060C1F06 v_mov_b32_e32 v20, 0 ; 7E280280 v_mov_b32_e32 v17, 0x3f3f3f ; 7E2202FF 003F3F3F v_mov_b32_e32 v18, v3 ; 7E240303 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431511 v_mov_b32_e32 v17, 0x13f01 ; 7E2202FF 00013F01 v_mov_b32_e32 v18, v3 ; 7E240303 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431B11 v_mov_b32_e32 v17, 0x3f013f ; 7E2202FF 003F013F v_mov_b32_e32 v18, v3 ; 7E240303 v_add_f32_e32 v7, v7, v16 ; 060E2107 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431E11 v_mov_b32_e32 v17, 0x10101 ; 7E2202FF 00010101 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 v_add_f32_e32 v15, v21, v27 ; 061E3715 v_add_f32_e32 v16, v22, v28 ; 06203916 v_add_f32_e32 v23, v23, v29 ; 062E3B17 v_mov_b32_e32 v24, 0x3ffb4a77 ; 7E3002FF 3FFB4A77 v_mad_f32 v21, v24, v22, v21 ; D2820015 04562D18 v_mov_b32_e32 v18, v3 ; 7E240303 v_mad_f32 v22, v24, v28, v27 ; D2820016 046E3918 v_mov_b32_e32 v19, v4 ; 7E260304 image_sample_l_o v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[12:19], s[8:11] ; F0D00700 00431111 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v20, v17, v30 ; 06283D11 v_add_f32_e32 v25, v18, v31 ; 06323F12 v_add_f32_e32 v19, v19, v32 ; 06264113 v_mad_f32 v27, v24, v31, v30 ; D282001B 047A3F18 v_mad_f32 v17, v24, v18, v17 ; D2820011 04462518 v_add_f32_e32 v15, v20, v15 ; 061E1F14 v_add_f32_e32 v16, v25, v16 ; 06202119 v_add_f32_e32 v18, v19, v23 ; 06242F13 v_add_f32_e32 v5, v5, v8 ; 060A1105 v_add_f32_e32 v6, v6, v9 ; 060C1306 v_add_f32_e32 v7, v7, v14 ; 060E1D07 v_add_f32_e32 v5, v5, v15 ; 060A1F05 v_add_f32_e32 v6, v6, v16 ; 060C2106 v_add_f32_e32 v8, v7, v18 ; 06102507 v_mov_b32_e32 v9, 0x3de38e2a ; 7E1202FF 3DE38E2A v_mul_f32_e32 v7, v9, v5 ; 100E0B09 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v5, v9, v8 ; 100A1109 v_mul_f32_e32 v9, v26, v21 ; 10122B1A v_mad_f32 v8, -0.5, v11, v9 ; D2820008 042616F1 v_mac_f32_e32 v8, v26, v27 ; 3E10371A v_mad_f32 v14, 0.5, v10, -v1 ; D282000E 840614F0 v_mac_f32_e32 v14, 0.5, v13 ; 3E1C1AF0 v_add_f32_e64 v8, |v8|, |v14| ; D2060308 00021D08 v_mul_f32_e32 v14, v26, v22 ; 101C2D1A v_mul_f32_e32 v15, v26, v27 ; 101E371A v_mac_f32_e32 v14, -0.5, v12 ; 3E1C18F1 v_mac_f32_e32 v14, v26, v17 ; 3E1C231A v_add_f32_e64 v8, v8, |v14| ; D2060208 00021D08 v_mac_f32_e32 v9, -0.5, v10 ; 3E1214F1 v_mac_f32_e32 v9, v26, v22 ; 3E122D1A v_mad_f32 v14, 0.5, v11, -v1 ; D282000E 840616F0 v_mac_f32_e32 v14, 0.5, v12 ; 3E1C18F0 v_add_f32_e64 v9, |v9|, |v14| ; D2060309 00021D09 v_mac_f32_e32 v15, -0.5, v13 ; 3E1E1AF1 v_mac_f32_e32 v15, v26, v17 ; 3E1E231A v_add_f32_e64 v9, v9, |v15| ; D2060209 00021F09 v_cmp_ge_f32_e32 vcc, v8, v9 ; 7C0C1308 v_mov_b32_e32 v14, s4 ; 7E1C0204 v_mov_b32_e32 v15, s5 ; 7E1E0205 v_cndmask_b32_e32 v14, v14, v15 ; 001C1F0E v_cndmask_b32_e32 v10, v11, v10 ; 0014150B v_cndmask_b32_e32 v11, v12, v13 ; 00161B0C v_subrev_f32_e32 v12, v1, v10 ; 0A181501 v_mov_b32_e32 v13, 0x7fffffff ; 7E1A02FF 7FFFFFFF v_and_b32_e32 v15, v12, v13 ; 361E1B0C v_subrev_f32_e32 v16, v1, v11 ; 0A201701 v_and_b32_e32 v13, v16, v13 ; 361A1B10 v_cmp_ge_f32_e64 s[0:1], |v12|, |v16| ; D00C0300 0002210C v_cndmask_b32_e64 v10, v11, v10, s[0:1] ; D200000A 0002150B v_mov_b32_e32 v16, s5 ; 7E200205 v_mov_b32_e32 v17, s4 ; 7E220204 v_cndmask_b32_e64 v12, v13, v15, s[0:1] ; D200000C 00021F0D v_xor_b32_e32 v13, 0x80000000, v14 ; 3A1A1CFF 80000000 v_add_f32_e32 v10, v10, v1 ; 0614030A v_mul_f32_e32 v11, 0.5, v10 ; 101614F0 v_cndmask_b32_e64 v10, v14, v13, s[0:1] ; D200000A 00021B0E v_mul_f32_e32 v13, 0.5, v10 ; 101A14F0 v_cndmask_b32_e64 v14, v13, 0, vcc ; D200000E 01A9010D v_add_f32_e32 v15, v14, v3 ; 061E070E v_cndmask_b32_e32 v13, 0, v13 ; 001A1A80 v_add_f32_e32 v13, v13, v4 ; 061A090D v_mul_f32_e32 v12, v26, v12 ; 1018191A v_cndmask_b32_e64 v14, v16, 0, vcc ; D200000E 01A90110 v_cndmask_b32_e32 v16, 0, v17 ; 00202280 v_add_f32_e32 v27, v16, v15 ; 06361F10 v_add_f32_e32 v28, v14, v13 ; 06381B0E v_subrev_f32_e32 v26, v16, v15 ; 0A341F10 v_subrev_f32_e32 v25, v14, v13 ; 0A321B0E v_mov_b32_e32 v20, v11 ; 7E28030B v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v23, v11 ; 7E2E030B v_mov_b32_e32 v24, 0 ; 7E300280 s_mov_b64 s[0:1], 0 ; BE800480 v_mov_b32_e32 v13, v23 ; 7E1A0317 v_mov_b32_e32 v15, v20 ; 7E1E0314 v_cmp_gt_i32_e32 vcc, 16, v24 ; 7D083090 v_mov_b32_e32 v18, v26 ; 7E24031A v_mov_b32_e32 v22, v28 ; 7E2C031C v_mov_b32_e32 v19, v25 ; 7E260319 v_mov_b32_e32 v21, v27 ; 7E2A031B s_and_saveexec_b64 s[4:5], vcc ; BE84246A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_9 ; BF880000 v_cmp_ne_i32_e32 vcc, -1, v2 ; 7D0A04C1 v_mov_b32_e32 v20, v15 ; 7E28030F s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E v_mov_b32_e32 v20, 0 ; 7E280280 image_sample_l v[25:26], 3, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[12:19], s[8:11] ; F0900300 00431912 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v20, v26, v25, 0x3ffb4a77 ; 4028331A 3FFB4A77 s_or_b64 exec, exec, s[20:21] ; 88FE147E v_cmp_ne_i32_e32 vcc, -1, v17 ; 7D0A22C1 v_mov_b32_e32 v23, v13 ; 7E2E030D s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E v_mov_b32_e32 v23, 0 ; 7E2E0280 image_sample_l v[25:26], 3, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[12:19], s[8:11] ; F0900300 00431915 s_waitcnt vmcnt(0) ; BF8C0770 v_madmk_f32_e32 v23, v26, v25, 0x3ffb4a77 ; 402E331A 3FFB4A77 s_or_b64 exec, exec, s[20:21] ; 88FE147E v_subrev_f32_e32 v25, v11, v20 ; 0A32290B v_cmp_ge_f32_e64 s[20:21], |v25|, v12 ; D00C0114 00021919 v_cndmask_b32_e64 v2, v2, -1, s[20:21] ; D2000002 00518302 v_subrev_f32_e32 v25, v11, v23 ; 0A322F0B v_cmp_ge_f32_e64 s[20:21], |v25|, v12 ; D00C0114 00021919 v_cndmask_b32_e64 v17, v17, -1, s[20:21] ; D2000011 00518311 v_and_b32_e32 v25, v17, v2 ; 36320511 v_cmp_eq_i32_e32 vcc, 0, v25 ; 7D043280 s_and_saveexec_b64 s[20:21], vcc ; BE94246A s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_5 ; BF880000 v_subrev_f32_e32 v25, v16, v18 ; 0A322510 v_subrev_f32_e32 v27, v14, v19 ; 0A36270E v_cmp_ne_i32_e32 vcc, 0, v2 ; 7D0A0480 v_cndmask_b32_e32 v26, v25, v18 ; 00342519 v_cndmask_b32_e32 v25, v27, v19 ; 0032271B v_add_f32_e32 v27, v16, v21 ; 06362B10 v_add_f32_e32 v28, v14, v22 ; 06382D0E v_cmp_ne_i32_e32 vcc, 0, v17 ; 7D0A2280 v_cndmask_b32_e32 v27, v27, v21 ; 00362B1B v_cndmask_b32_e32 v28, v28, v22 ; 00382D1C v_add_i32_e32 v24, 1, v24 ; 4A303081 s_or_b64 exec, exec, s[20:21] ; 88FE147E s_or_b64 s[0:1], s[20:21], s[0:1] ; 88800014 s_or_b64 exec, exec, s[4:5] ; 88FE047E s_or_b64 s[0:1], s[4:5], s[0:1] ; 88800004 s_andn2_b64 exec, exec, s[0:1] ; 8AFE007E s_cbranch_execnz BB0_4 ; BF890000 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_cmp_ge_f32_e32 vcc, v8, v9 ; 7C0C1308 v_cmp_lt_f32_e64 s[0:1], v1, v11 ; D0020000 00021701 v_subrev_f32_e32 v1, v18, v3 ; 0A020712 v_subrev_f32_e32 v2, v19, v4 ; 0A040913 v_subrev_f32_e32 v8, v3, v21 ; 0A102B03 v_subrev_f32_e32 v9, v4, v22 ; 0A122D04 v_cndmask_b32_e32 v1, v2, v1 ; 00020302 v_cndmask_b32_e32 v2, v9, v8 ; 00041109 v_cmp_lt_f32_e64 s[4:5], v1, v2 ; D0020004 00020501 v_cndmask_b32_e64 v8, v13, v15, s[4:5] ; D2000008 00121F0D v_cndmask_b32_e64 v9, v2, v1, s[4:5] ; D2000009 00120302 v_add_f32_e32 v1, v2, v1 ; 06020302 v_rcp_f32_e32 v1, v1 ; 7E025501 v_cmp_lt_f32_e64 s[4:5], v8, v11 ; D0020004 00021708 s_xor_b64 s[0:1], s[0:1], s[4:5] ; 89800400 v_cndmask_b32_e64 v2, 0, v10, s[0:1] ; D2000002 00021480 v_mad_f32 v1, -v1, v9, 0.5 ; D2820001 23C21301 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_cndmask_b32_e64 v2, v1, 0, vcc ; D2000002 01A90101 v_cndmask_b32_e32 v1, 0, v1 ; 00020280 v_add_f32_e32 v8, v2, v3 ; 06100702 v_add_f32_e32 v9, v1, v4 ; 06120901 v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_l v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[12:19], s[8:11] ; F0900700 00430108 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v7, v0, v1 ; D2820004 04060107 v_mad_f32 v6, v6, v0, v2 ; D2820006 040A0106 v_mad_f32 v5, v5, v0, v3 ; D2820005 040E0105 v_mad_f32 v17, -v0, v1, v4 ; D2820011 24120300 v_mad_f32 v18, -v0, v2, v6 ; D2820012 241A0500 v_mad_f32 v19, -v0, v3, v5 ; D2820013 24160700 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_cvt_pkrtz_f16_f32_e32 v0, v17, v18 ; 5E002511 v_cvt_pkrtz_f16_f32_e64 v1, v19, 1.0 ; D25E0001 0001E513 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 36 Code Size: 1400 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 510.0200, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[2], IN[1] 13: MOV OUT[1], IN[0] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %27, 0x407FE051E0000000 %38 = fadd float %37, 0x3FB99999A0000000 %39 = fptosi float %38 to i32 %40 = shl i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %39, 4 %43 = or i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %39, 4 %46 = or i32 %45, 8 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %39, 4 %49 = or i32 %48, 12 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %33, %41 %52 = fmul float %34, %44 %53 = fadd float %51, %52 %54 = fmul float %35, %47 %55 = fadd float %53, %54 %56 = fmul float %36, %50 %57 = fadd float %55, %56 %58 = fadd float %37, 0x3FF19999A0000000 %59 = fptosi float %58 to i32 %60 = shl i32 %59, 4 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %59, 4 %63 = or i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %59, 4 %66 = or i32 %65, 8 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %59, 4 %69 = or i32 %68, 12 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %33, %61 %72 = fmul float %34, %64 %73 = fadd float %71, %72 %74 = fmul float %35, %67 %75 = fadd float %73, %74 %76 = fmul float %36, %70 %77 = fadd float %75, %76 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %77, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x43ff028f ; 7E0202FF 43FF028F s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v8, v1, 0x3dcccccd ; 42000308 3DCCCCCD v_madak_f32_e32 v1, v8, v1, 0x3f8ccccd ; 42020308 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v14, v0, s[0:3], 0 offen ; E0301000 80000E00 v_or_b32_e32 v15, 4, v0 ; 381E0084 v_or_b32_e32 v16, 8, v0 ; 38200088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v17, 4, v1 ; 38220284 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v1, s[0:3], 0 offen ; E0301000 80001201 v_or_b32_e32 v19, 8, v1 ; 38260288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v15, v15, v11 ; 101E170F v_mac_f32_e32 v15, v14, v10 ; 3E1E150E s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v11, v17, v11 ; 10161711 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v11, v18, v10 ; 3E161512 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v15, v16, v12 ; 3E1E1910 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v11, v19, v12 ; 3E161913 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v0, v13 ; 3E1E1B00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v11, v1, v13 ; 3E161B01 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v15, v11, v1, v0 ; F80008CF 00010B0F s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 276 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 2.1000} IMM[1] FLT32 { 3.1000, 255.0100, 4.0000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[1].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MUL TEMP[1].x, IN[1].zzzz, IMM[1].yyyy 13: MAD TEMP[2].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[1].wwww 14: F2I TEMP[2].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MOV TEMP[2], CONST[ADDR[0].x] 17: MAD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[2].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: MOV TEMP[1], CONST[ADDR[0].x] 21: MOV OUT[2], IN[1] 22: MOV OUT[3], TEMP[2] 23: MOV OUT[1], IN[0] 24: MOV OUT[4], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %27, 0x408FE051E0000000 %38 = fadd float %37, 0x4000CCCCC0000000 %39 = fptosi float %38 to i32 %40 = shl i32 %39, 4 %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %40) %42 = shl i32 %39, 4 %43 = or i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %39, 4 %46 = or i32 %45, 8 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %39, 4 %49 = or i32 %48, 12 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %33, %41 %52 = fmul float %34, %44 %53 = fadd float %51, %52 %54 = fmul float %35, %47 %55 = fadd float %53, %54 %56 = fmul float %36, %50 %57 = fadd float %55, %56 %58 = fadd float %37, 0x4008CCCCC0000000 %59 = fptosi float %58 to i32 %60 = shl i32 %59, 4 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %59, 4 %63 = or i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %59, 4 %66 = or i32 %65, 8 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %59, 4 %69 = or i32 %68, 12 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %33, %61 %72 = fmul float %34, %64 %73 = fadd float %71, %72 %74 = fmul float %35, %67 %75 = fadd float %73, %74 %76 = fmul float %36, %70 %77 = fadd float %75, %76 %78 = fmul float %27, 0x406FE051E0000000 %79 = fmul float %78, 4.000000e+00 %80 = fadd float %79, 0x3FB99999A0000000 %81 = fptosi float %80 to i32 %82 = shl i32 %81, 4 %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %82) %84 = shl i32 %81, 4 %85 = or i32 %84, 4 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %81, 4 %88 = or i32 %87, 8 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %81, 4 %91 = or i32 %90, 12 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = fmul float %78, 4.000000e+00 %94 = fadd float %93, 0x3FF19999A0000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %83, float %86, float %89, float %92) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %97, float %100, float %103, float %106) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %77, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 buffer_load_format_xyzw v[10:13], v0, s[16:19], 0 idxen ; E00C2000 80040A00 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v8, v1, 0x40066666 ; 42000308 40066666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_madak_f32_e32 v14, v8, v1, 0x40466666 ; 421C0308 40466666 v_cvt_i32_f32_e32 v14, v14 ; 7E1C110E v_madak_f32_e32 v15, v8, v1, 0x3dcccccd ; 421E0308 3DCCCCCD v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_madak_f32_e32 v1, v8, v1, 0x3f8ccccd ; 42020308 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v14, 4, v14 ; 341C1C84 v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v16, v0, s[0:3], 0 offen ; E0301000 80001000 v_or_b32_e32 v17, 4, v0 ; 38220084 v_or_b32_e32 v18, 8, v0 ; 38240088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v19, v14, s[0:3], 0 offen ; E0301000 8000130E v_or_b32_e32 v20, 4, v14 ; 38281C84 v_or_b32_e32 v21, 8, v14 ; 382A1C88 v_or_b32_e32 v14, 12, v14 ; 381C1C8C buffer_load_dword v22, v15, s[0:3], 0 offen ; E0301000 8000160F v_or_b32_e32 v23, 4, v15 ; 382E1E84 v_or_b32_e32 v24, 8, v15 ; 38301E88 v_or_b32_e32 v15, 12, v15 ; 381E1E8C buffer_load_dword v25, v1, s[0:3], 0 offen ; E0301000 80001901 v_or_b32_e32 v26, 4, v1 ; 38340284 v_or_b32_e32 v27, 8, v1 ; 38360288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v17, v17, v11 ; 10221711 v_mac_f32_e32 v17, v16, v10 ; 3E221510 s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v11, v20, v11 ; 10161714 v_mac_f32_e32 v11, v19, v10 ; 3E161513 v_mac_f32_e32 v17, v18, v12 ; 3E221912 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v11, v21, v12 ; 3E161915 v_mac_f32_e32 v17, v0, v13 ; 3E221B00 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v11, v14, v13 ; 3E161B0E exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 exp 15, 33, 0, 0, 0, v6, v7, v8, v9 ; F800021F 09080706 s_waitcnt vmcnt(3) ; BF8C0773 exp 15, 34, 0, 0, 0, v22, v23, v24, v15 ; F800022F 0F181716 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 35, 0, 0, 0, v25, v26, v27, v1 ; F800023F 011B1A19 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 12, 0, 1, 0, v17, v11, v1, v0 ; F80008CF 00010B11 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 412 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[2] 5: DP4 TEMP[2].x, IN[1], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: MOV OUT[2], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 %59 = fmul float %41, %21 %60 = fmul float %42, %22 %61 = fadd float %59, %60 %62 = fmul float %43, %23 %63 = fadd float %61, %62 %64 = fmul float %44, %24 %65 = fadd float %63, %64 %66 = fmul float %41, %25 %67 = fmul float %42, %26 %68 = fadd float %66, %67 %69 = fmul float %43, %27 %70 = fadd float %68, %69 %71 = fmul float %44, %28 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0x5 ; C2060105 s_buffer_load_dword s13, s[0:3], 0x6 ; C2068106 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_buffer_load_dword s7, s[0:3], 0xa ; C203810A s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_buffer_load_dword s9, s[0:3], 0xc ; C204810C s_buffer_load_dword s10, s[0:3], 0xd ; C205010D s_buffer_load_dword s11, s[0:3], 0xe ; C205810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s15, v6 ; 10000C0F v_mac_f32_e32 v0, s14, v5 ; 3E000A0E v_mul_f32_e32 v9, s12, v6 ; 10120C0C v_mac_f32_e32 v9, s18, v5 ; 3E120A12 v_mul_f32_e32 v10, s6, v6 ; 10140C06 v_mac_f32_e32 v10, s5, v5 ; 3E140A05 v_mul_f32_e32 v6, s10, v6 ; 100C0C0A v_mac_f32_e32 v6, s9, v5 ; 3E0C0A09 v_mac_f32_e32 v0, s16, v7 ; 3E000E10 v_mac_f32_e32 v9, s13, v7 ; 3E120E0D v_mac_f32_e32 v10, s7, v7 ; 3E140E07 v_mac_f32_e32 v6, s11, v7 ; 3E0C0E0B v_mac_f32_e32 v0, s17, v8 ; 3E001011 v_mac_f32_e32 v9, s4, v8 ; 3E121004 v_mac_f32_e32 v10, s8, v8 ; 3E141008 v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v10, v6, v1, v1 ; F800021F 0101060A v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v9, v1, v2 ; F80008CF 02010900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 208 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0] DCL CONST[2] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].x, -CONST[0].xxxx 4: BGNLOOP :0 5: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 6: UIF TEMP[4].xxxx :0 7: BRK 8: ENDIF 9: MAD TEMP[5].xy, TEMP[3].xxxx, CONST[2].xyyy, TEMP[0].xyyy 10: MOV TEMP[6].xy, TEMP[5].xyyy 11: MOV TEMP[6].w, IMM[0].xxxx 12: TXL TEMP[7], TEMP[6], SAMP[0], 2D 13: ADD TEMP[2], TEMP[2], TEMP[7] 14: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 15: ENDLOOP :0 16: MUL TEMP[1], TEMP[2], CONST[0].wwww 17: MOV TEMP[0].w, IMM[0].yyyy 18: MOV TEMP[0].xyz, IN[1].xyzx 19: MUL TEMP[0], TEMP[1], TEMP[0] 20: MUL TEMP[1], TEMP[0], IN[1].wwww 21: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0 %30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %42 = fsub float -0.000000e+00, %24 br label %LOOP LOOP: ; preds = %ENDIF, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %81, %ENDIF ] %temp9.0 = phi float [ 0.000000e+00, %main_body ], [ %82, %ENDIF ] %temp10.0 = phi float [ 0.000000e+00, %main_body ], [ %83, %ENDIF ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %84, %ENDIF ] %temp12.0 = phi float [ %42, %main_body ], [ %85, %ENDIF ] %43 = fcmp olt float %24, %temp12.0 br i1 %43, label %IF, label %ENDIF IF: ; preds = %LOOP %44 = fmul float %temp8.0, %25 %45 = fmul float %temp9.0, %25 %46 = fmul float %temp10.0, %25 %47 = fmul float %temp11.0, %25 %48 = fmul float %44, %36 %49 = fmul float %45, %37 %50 = fmul float %46, %38 %51 = fmul float %48, %39 %52 = fmul float %49, %39 %53 = fmul float %50, %39 %54 = fmul float %47, %39 %55 = fmul float %32, %54 %56 = fadd float %55, %51 %57 = fmul float %33, %54 %58 = fadd float %57, %52 %59 = fmul float %34, %54 %60 = fadd float %59, %53 %61 = fmul float %35, %54 %62 = fadd float %61, %54 %63 = call i32 @llvm.SI.packf16(float %56, float %58) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %60, float %62) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void ENDIF: ; preds = %LOOP %67 = fmul float %temp12.0, %26 %68 = fadd float %67, %40 %69 = fmul float %temp12.0, %27 %70 = fadd float %69, %41 %71 = bitcast float %68 to i32 %72 = bitcast float %70 to i32 %73 = insertelement <4 x i32> undef, i32 %71, i32 0 %74 = insertelement <4 x i32> %73, i32 %72, i32 1 %75 = insertelement <4 x i32> %74, i32 0, i32 2 %76 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %29, <16 x i8> %31, i32 2) %77 = extractelement <4 x float> %76, i32 0 %78 = extractelement <4 x float> %76, i32 1 %79 = extractelement <4 x float> %76, i32 2 %80 = extractelement <4 x float> %76, i32 3 %81 = fadd float %temp8.0, %77 %82 = fadd float %temp9.0, %78 %83 = fadd float %temp10.0, %79 %84 = fadd float %temp11.0, %80 %85 = fadd float %temp12.0, 1.000000e+00 br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00 s_buffer_load_dword s0, s[12:15], 0x3 ; C2000D03 s_buffer_load_dword s2, s[12:15], 0x8 ; C2010D08 s_buffer_load_dword s3, s[12:15], 0x9 ; C2018D09 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s1, v1 ; 3A160201 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_mov_b32_e32 v16, 0 ; 7E200280 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, 0 ; 7E240280 s_mov_b64 s[16:17], 0 ; BE900480 v_mov_b32_e32 v1, v18 ; 7E020312 v_mov_b32_e32 v12, v17 ; 7E180311 v_mov_b32_e32 v13, v16 ; 7E1A0310 v_mov_b32_e32 v14, v15 ; 7E1C030F v_cmp_nlt_f32_e32 vcc, s1, v11 ; 7C1C1601 s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mad_f32 v15, s2, v11, v10 ; D282000F 042A1602 v_mad_f32 v16, s3, v11, v0 ; D2820010 04021603 v_mov_b32_e32 v17, 0 ; 7E220280 image_sample_l v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[12:15] ; F0900F00 0061120F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v18, v14 ; 061E1D12 v_add_f32_e32 v16, v19, v13 ; 06201B13 v_add_f32_e32 v17, v20, v12 ; 06221914 v_add_f32_e32 v18, v21, v1 ; 06240315 v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[18:19] ; 88FE127E s_or_b64 s[16:17], s[18:19], s[16:17] ; 88901012 s_andn2_b64 exec, exec, s[16:17] ; 8AFE107E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_mul_f32_e32 v0, s0, v14 ; 10001C00 v_mul_f32_e32 v10, s0, v13 ; 10141A00 v_mul_f32_e32 v11, s0, v12 ; 10161800 v_mul_f32_e32 v1, s0, v1 ; 10020200 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_mul_f32_e32 v6, v7, v10 ; 100C1507 v_mul_f32_e32 v7, v8, v11 ; 100E1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mac_f32_e32 v0, v1, v2 ; 3E000501 v_mac_f32_e32 v6, v1, v3 ; 3E0C0701 v_mac_f32_e32 v7, v1, v4 ; 3E0E0901 v_mac_f32_e32 v1, v1, v5 ; 3E020B01 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 24 Code Size: 332 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[0..2] DCL CONST[4] DCL CONST[6] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[2].xyxx 1: MOV TEMP[1], IMM[0].xxxx 2: MOV TEMP[2], IMM[0].xxxx 3: MOV TEMP[3].y, IMM[0].xxxx 4: MOV TEMP[3].x, -CONST[0].xxxx 5: BGNLOOP :0 6: FSLT TEMP[4].x, CONST[0].xxxx, TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: BRK 9: ENDIF 10: MOV TEMP[3].y, -CONST[0].yyyy 11: BGNLOOP :0 12: FSLT TEMP[5].x, CONST[0].yyyy, TEMP[3].yyyy 13: UIF TEMP[5].xxxx :0 14: BRK 15: ENDIF 16: ADD TEMP[6].xy, CONST[1].xyyy, TEMP[3].xyyy 17: MAD TEMP[7].xy, TEMP[6].xyyy, CONST[6].xyyy, TEMP[0].xyyy 18: MOV TEMP[8].xy, TEMP[7].xyyy 19: MOV TEMP[8].w, IMM[0].xxxx 20: TXL TEMP[9], TEMP[8], SAMP[1], 2D 21: ADD TEMP[2], TEMP[2], TEMP[9] 22: ADD TEMP[10].x, TEMP[3].yyyy, IMM[0].yyyy 23: MOV TEMP[3].y, TEMP[10].xxxx 24: ENDLOOP :0 25: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy 26: ENDLOOP :0 27: MUL TEMP[1].w, TEMP[2], CONST[0].wwww 28: MUL TEMP[0].xy, IN[2].xyyy, CONST[4].xyyy 29: MOV TEMP[0].xy, TEMP[0].xyyy 30: MOV TEMP[0].w, IMM[0].xxxx 31: TXL TEMP[0], TEMP[0], SAMP[0], 2D 32: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[0].wwww 33: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx 34: MUL TEMP[2].x, TEMP[2].xxxx, CONST[0].zzzz 35: MOV_SAT TEMP[2].x, TEMP[2].xxxx 36: MAD TEMP[1], CONST[2], TEMP[2].xxxx, TEMP[0] 37: MOV TEMP[0].w, IMM[0].yyyy 38: MOV TEMP[0].xyz, IN[1].xyzx 39: MUL TEMP[0], TEMP[1], TEMP[0] 40: MUL TEMP[1], TEMP[0], IN[1].wwww 41: MAD TEMP[1], IN[0], TEMP[1].wwww, TEMP[1] 42: MOV OUT[0], TEMP[1] 43: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0 %40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0 %42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)* %44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0 %45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %55 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %56 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %57 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %58 = fsub float -0.000000e+00, %24 %59 = fsub float -0.000000e+00, %25 br label %LOOP LOOP: ; preds = %IF47, %main_body %temp12.0 = phi float [ %58, %main_body ], [ %111, %IF47 ] %temp11.0 = phi float [ 0.000000e+00, %main_body ], [ %temp11.1, %IF47 ] %60 = fcmp olt float %24, %temp12.0 br i1 %60, label %IF, label %ENDIF IF: ; preds = %LOOP %61 = fmul float %temp11.0, %27 %62 = fmul float %56, %34 %63 = fmul float %57, %35 %64 = bitcast float %62 to i32 %65 = bitcast float %63 to i32 %66 = insertelement <4 x i32> undef, i32 %64, i32 0 %67 = insertelement <4 x i32> %66, i32 %65, i32 1 %68 = insertelement <4 x i32> %67, i32 0, i32 2 %69 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %68, <32 x i8> %39, <16 x i8> %41, i32 2) %70 = extractelement <4 x float> %69, i32 0 %71 = extractelement <4 x float> %69, i32 1 %72 = extractelement <4 x float> %69, i32 2 %73 = extractelement <4 x float> %69, i32 3 %74 = fsub float 1.000000e+00, %73 %75 = fmul float %61, %74 %76 = fmul float %75, %26 %77 = call float @llvm.AMDIL.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %78 = fmul float %30, %77 %79 = fadd float %78, %70 %80 = fmul float %31, %77 %81 = fadd float %80, %71 %82 = fmul float %32, %77 %83 = fadd float %82, %72 %84 = fmul float %33, %77 %85 = fadd float %84, %73 %86 = fmul float %79, %52 %87 = fmul float %81, %53 %88 = fmul float %83, %54 %89 = fmul float %86, %55 %90 = fmul float %87, %55 %91 = fmul float %88, %55 %92 = fmul float %85, %55 %93 = fmul float %48, %92 %94 = fadd float %93, %89 %95 = fmul float %49, %92 %96 = fadd float %95, %90 %97 = fmul float %50, %92 %98 = fadd float %97, %91 %99 = fmul float %51, %92 %100 = fadd float %99, %92 %101 = call i32 @llvm.SI.packf16(float %94, float %96) %102 = bitcast i32 %101 to float %103 = call i32 @llvm.SI.packf16(float %98, float %100) %104 = bitcast i32 %103 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %102, float %104, float %102, float %104) ret void ENDIF: ; preds = %LOOP %105 = fadd float %28, %temp12.0 %106 = fmul float %105, %36 %107 = fadd float %106, %56 %108 = bitcast float %107 to i32 %109 = insertelement <4 x i32> undef, i32 %108, i32 0 br label %LOOP45 LOOP45: ; preds = %ENDIF46, %ENDIF %temp13.0 = phi float [ %59, %ENDIF ], [ %121, %ENDIF46 ] %temp11.1 = phi float [ %temp11.0, %ENDIF ], [ %120, %ENDIF46 ] %110 = fcmp olt float %25, %temp13.0 br i1 %110, label %IF47, label %ENDIF46 IF47: ; preds = %LOOP45 %111 = fadd float %temp12.0, 1.000000e+00 br label %LOOP ENDIF46: ; preds = %LOOP45 %112 = fadd float %29, %temp13.0 %113 = fmul float %112, %37 %114 = fadd float %113, %57 %115 = bitcast float %114 to i32 %116 = insertelement <4 x i32> %109, i32 %115, i32 1 %117 = insertelement <4 x i32> %116, i32 0, i32 2 %118 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %117, <32 x i8> %44, <16 x i8> %47, i32 2) %119 = extractelement <4 x float> %118, i32 3 %120 = fadd float %temp11.1, %119 %121 = fadd float %temp13.0, 1.000000e+00 br label %LOOP45 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_mov_b32 m0, s9 ; BEFC0309 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s10, s[20:23], 0x0 ; C2051500 s_buffer_load_dword s11, s[20:23], 0x1 ; C2059501 s_buffer_load_dword s3, s[20:23], 0x2 ; C2019502 s_buffer_load_dword s9, s[20:23], 0x3 ; C2049503 s_buffer_load_dword s14, s[20:23], 0x4 ; C2071504 s_buffer_load_dword s15, s[20:23], 0x5 ; C2079505 s_buffer_load_dword s8, s[20:23], 0x8 ; C2041508 s_buffer_load_dword s2, s[20:23], 0x9 ; C2011509 s_buffer_load_dword s1, s[20:23], 0xa ; C200950A s_buffer_load_dword s0, s[20:23], 0xb ; C200150B s_buffer_load_dword s13, s[20:23], 0x10 ; C2069510 s_buffer_load_dword s12, s[20:23], 0x11 ; C2061511 s_buffer_load_dword s16, s[20:23], 0x18 ; C2081518 s_buffer_load_dword s17, s[20:23], 0x19 ; C2089519 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900 v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901 v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_xor_b32_e32 v11, s10, v1 ; 3A16020A v_xor_b32_e32 v12, s11, v1 ; 3A18020B v_mov_b32_e32 v13, 0 ; 7E1A0280 s_mov_b64 s[4:5], 0 ; BE840480 v_mov_b32_e32 v1, v13 ; 7E02030D v_cmp_nlt_f32_e32 vcc, s10, v11 ; 7C1C160A s_and_saveexec_b64 s[6:7], vcc ; BE86246A s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E s_cbranch_execz BB0_4 ; BF880000 v_add_f32_e32 v13, s14, v11 ; 061A160E v_mad_f32 v14, s16, v13, v10 ; D282000E 042A1A10 s_mov_b64 s[18:19], 0 ; BE920480 v_mov_b32_e32 v16, v12 ; 7E20030C v_mov_b32_e32 v15, v1 ; 7E1E0301 v_mov_b32_e32 v13, v15 ; 7E1A030F v_cmp_nlt_f32_e32 vcc, s11, v16 ; 7C1C200B s_and_saveexec_b64 s[44:45], vcc ; BEAC246A s_xor_b64 s[44:45], exec, s[44:45] ; 89AC2C7E s_cbranch_execz BB0_6 ; BF880000 v_add_f32_e32 v15, s15, v16 ; 061E200F v_mad_f32 v15, s17, v15, v0 ; D282000F 04021E11 v_mov_b32_e32 v17, 0 ; 7E220280 v_mov_b32_e32 v18, v14 ; 7E24030E v_mov_b32_e32 v19, v15 ; 7E26030F v_mov_b32_e32 v20, v16 ; 7E280310 v_mov_b32_e32 v21, v17 ; 7E2A0311 v_mov_b32_e32 v20, v17 ; 7E280311 image_sample_l v15, 8, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[32:39], s[40:43] ; F0900800 01480F12 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v15, v15, v13 ; 061E1B0F v_add_f32_e32 v16, 1.0, v16 ; 062020F2 s_or_b64 exec, exec, s[44:45] ; 88FE2C7E s_or_b64 s[18:19], s[44:45], s[18:19] ; 8892122C s_andn2_b64 exec, exec, s[18:19] ; 8AFE127E s_cbranch_execnz BB0_5 ; BF890000 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_add_f32_e32 v11, 1.0, v11 ; 061616F2 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_or_b64 s[4:5], s[6:7], s[4:5] ; 88840406 s_andn2_b64 exec, exec, s[4:5] ; 8AFE047E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_mul_f32_e32 v1, s9, v1 ; 10020209 v_mul_f32_e32 v10, s13, v10 ; 1014140D v_mul_f32_e32 v11, s12, v0 ; 1016000C v_mov_b32_e32 v12, 0 ; 7E180280 image_sample_l v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[24:31], s[20:23] ; F0900F00 00A60A0A s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v0, 1.0, v13 ; 08001AF2 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v0, s3, v0 ; 10000003 v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_mad_f32 v1, s8, v0, v10 ; D2820001 042A0008 v_mad_f32 v10, s2, v0, v11 ; D282000A 042E0002 v_mad_f32 v11, s1, v0, v12 ; D282000B 04320001 v_mac_f32_e32 v13, s0, v0 ; 3E1A0000 v_mul_f32_e32 v0, v6, v1 ; 10000306 v_mul_f32_e32 v1, v7, v10 ; 10021507 v_mul_f32_e32 v6, v8, v11 ; 100C1708 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 48 VGPRS: 24 Code Size: 496 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[0] 2: DP4 TEMP[1].x, IN[2], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[1], IN[0] 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %13 %46 = fmul float %42, %14 %47 = fadd float %45, %46 %48 = fmul float %43, %15 %49 = fadd float %47, %48 %50 = fmul float %44, %16 %51 = fadd float %49, %50 %52 = fmul float %41, %17 %53 = fmul float %42, %18 %54 = fadd float %52, %53 %55 = fmul float %43, %19 %56 = fadd float %54, %55 %57 = fmul float %44, %20 %58 = fadd float %56, %57 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s17, v10 ; 10001411 v_mac_f32_e32 v0, s16, v9 ; 3E001210 v_mul_f32_e32 v10, s7, v10 ; 10141407 v_mac_f32_e32 v10, s6, v9 ; 3E141206 v_mac_f32_e32 v0, s4, v11 ; 3E001604 v_mac_f32_e32 v10, s8, v11 ; 3E141608 v_mac_f32_e32 v0, s5, v12 ; 3E001805 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v10, v2, v1 ; F80008CF 01020A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 156 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1020.0400, 0.1000} IMM[1] FLT32 { 1.1000, 2.1000, 3.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MOV TEMP[1].xy, TEMP[2].xyxx 23: MOV OUT[1], IN[0] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %19, 0x408FE051E0000000 %30 = fadd float %29, 0x3FB99999A0000000 %31 = fptosi float %30 to i32 %32 = shl i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %31, 4 %35 = or i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %31, 4 %38 = or i32 %37, 8 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %31, 4 %41 = or i32 %40, 12 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %25, %33 %44 = fmul float %26, %36 %45 = fadd float %43, %44 %46 = fmul float %27, %39 %47 = fadd float %45, %46 %48 = fmul float %28, %42 %49 = fadd float %47, %48 %50 = fadd float %29, 0x3FF19999A0000000 %51 = fptosi float %50 to i32 %52 = shl i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %51, 4 %55 = or i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %51, 4 %58 = or i32 %57, 8 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %51, 4 %61 = or i32 %60, 12 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = fmul float %25, %53 %64 = fmul float %26, %56 %65 = fadd float %63, %64 %66 = fmul float %27, %59 %67 = fadd float %65, %66 %68 = fmul float %28, %62 %69 = fadd float %67, %68 %70 = fadd float %29, 0x4000CCCCC0000000 %71 = fptosi float %70 to i32 %72 = shl i32 %71, 4 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %71, 4 %75 = or i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %71, 4 %78 = or i32 %77, 8 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %71, 4 %81 = or i32 %80, 12 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = fmul float %25, %73 %84 = fmul float %26, %76 %85 = fadd float %83, %84 %86 = fmul float %27, %79 %87 = fadd float %85, %86 %88 = fmul float %28, %82 %89 = fadd float %87, %88 %90 = fadd float %29, 0x4008CCCCC0000000 %91 = fptosi float %90 to i32 %92 = shl i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %91, 4 %95 = or i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = shl i32 %91, 4 %98 = or i32 %97, 8 %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %98) %100 = shl i32 %91, 4 %101 = or i32 %100, 12 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %25, %93 %104 = fmul float %26, %96 %105 = fadd float %103, %104 %106 = fmul float %27, %99 %107 = fadd float %105, %106 %108 = fmul float %28, %102 %109 = fadd float %107, %108 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %89, float %109, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %49, float %69, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0x447f028f ; 7E0202FF 447F028F s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v4, v1, 0x40066666 ; 42000304 40066666 v_madak_f32_e32 v10, v4, v1, 0x3dcccccd ; 42140304 3DCCCCCD v_madak_f32_e32 v11, v4, v1, 0x3f8ccccd ; 42160304 3F8CCCCD v_madak_f32_e32 v1, v4, v1, 0x40466666 ; 42020304 40466666 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 buffer_load_dword v12, v10, s[0:3], 0 offen ; E0301000 80000C0A v_or_b32_e32 v13, 4, v10 ; 381A1484 v_or_b32_e32 v14, 8, v10 ; 381C1488 v_or_b32_e32 v10, 12, v10 ; 3814148C buffer_load_dword v15, v11, s[0:3], 0 offen ; E0301000 80000F0B v_or_b32_e32 v16, 4, v11 ; 38201684 v_or_b32_e32 v17, 8, v11 ; 38221688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v18, v0, s[0:3], 0 offen ; E0301000 80001200 v_or_b32_e32 v19, 4, v0 ; 38260084 v_or_b32_e32 v20, 8, v0 ; 38280088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v21, 4, v1 ; 382A0284 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 v_or_b32_e32 v23, 8, v1 ; 382E0288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(12) ; BF8C077C v_mul_f32_e32 v13, v13, v7 ; 101A0F0D v_mac_f32_e32 v13, v12, v6 ; 3E1A0D0C s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v12, v16, v7 ; 10180F10 v_mac_f32_e32 v12, v15, v6 ; 3E180D0F s_waitcnt vmcnt(10) ; BF8C077A v_mul_f32_e32 v15, v19, v7 ; 101E0F13 v_mac_f32_e32 v15, v18, v6 ; 3E1E0D12 s_waitcnt vmcnt(9) ; BF8C0779 v_mul_f32_e32 v7, v21, v7 ; 100E0F15 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v7, v22, v6 ; 3E0E0D16 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v13, v14, v8 ; 3E1A110E s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v12, v17, v8 ; 3E181111 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v15, v20, v8 ; 3E1E1114 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v7, v23, v8 ; 3E0E1117 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v13, v10, v9 ; 3E1A130A s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v12, v11, v9 ; 3E18130B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v0, v9 ; 3E1E1300 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v7, v1, v9 ; 3E0E1301 exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v15, v7, v0, v0 ; F800021F 0000070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v13, v12, v0, v1 ; F80008CF 01000C0D s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 440 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 0.1000} IMM[1] FLT32 { 1.1000, 2.1000, 3.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].yyyy 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].zzzz 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MOV TEMP[1].xy, TEMP[2].xyxx 23: MOV OUT[1], IN[0] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = sitofp i32 %10 to float %30 = fmul float %29, 4.000000e+00 %31 = fadd float %30, 0x3FB99999A0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = fmul float %25, %34 %45 = fmul float %26, %37 %46 = fadd float %44, %45 %47 = fmul float %27, %40 %48 = fadd float %46, %47 %49 = fmul float %28, %43 %50 = fadd float %48, %49 %51 = fmul float %29, 4.000000e+00 %52 = fadd float %51, 0x3FF19999A0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = fmul float %25, %55 %66 = fmul float %26, %58 %67 = fadd float %65, %66 %68 = fmul float %27, %61 %69 = fadd float %67, %68 %70 = fmul float %28, %64 %71 = fadd float %69, %70 %72 = fmul float %29, 4.000000e+00 %73 = fadd float %72, 0x4000CCCCC0000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %25, %76 %87 = fmul float %26, %79 %88 = fadd float %86, %87 %89 = fmul float %27, %82 %90 = fadd float %88, %89 %91 = fmul float %28, %85 %92 = fadd float %90, %91 %93 = fmul float %29, 4.000000e+00 %94 = fadd float %93, 0x4008CCCCC0000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %25, %97 %108 = fmul float %26, %100 %109 = fadd float %107, %108 %110 = fmul float %27, %103 %111 = fadd float %109, %110 %112 = fmul float %28, %106 %113 = fadd float %111, %112 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %92, float %113, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 v_madak_f32_e32 v2, 4.0, v1, 0x40066666 ; 420402F6 40066666 v_cvt_i32_f32_e32 v2, v2 ; 7E041102 v_madak_f32_e32 v3, 4.0, v1, 0x40466666 ; 420602F6 40466666 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, 4.0, v1, 0x3dcccccd ; 420802F6 3DCCCCCD v_cvt_i32_f32_e32 v4, v4 ; 7E081104 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900 v_or_b32_e32 v0, 4, v4 ; 38000884 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_or_b32_e32 v13, 8, v4 ; 381A0888 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D v_madak_f32_e32 v1, 4.0, v1, 0x3f8ccccd ; 420202F6 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 buffer_load_dword v14, v4, s[0:3], 0 offen ; E0301000 80000E04 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v15, 4, v1 ; 381E0284 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 8, v1 ; 38200288 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v17, v1, s[0:3], 0 offen ; E0301000 80001101 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_or_b32_e32 v18, 4, v2 ; 38240484 buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 buffer_load_dword v19, v2, s[0:3], 0 offen ; E0301000 80001302 v_or_b32_e32 v20, 4, v3 ; 38280684 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v21, v3, s[0:3], 0 offen ; E0301000 80001503 v_or_b32_e32 v22, 8, v2 ; 382C0488 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 v_or_b32_e32 v23, 8, v3 ; 382E0688 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_or_b32_e32 v2, 12, v2 ; 3804048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt ; BF8C077F v_mul_f32_e32 v0, v0, v10 ; 10001500 s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v0, v14, v9 ; 3E00130E s_waitcnt vmcnt(11) ; BF8C077B v_mul_f32_e32 v14, v15, v10 ; 101C150F s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v14, v17, v9 ; 3E1C1311 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v15, v18, v10 ; 101E1512 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v15, v19, v9 ; 3E1E1313 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v10, v20, v10 ; 10141514 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v10, v21, v9 ; 3E141315 v_mac_f32_e32 v0, v13, v11 ; 3E00170D v_mac_f32_e32 v14, v16, v11 ; 3E1C1710 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v15, v22, v11 ; 3E1E1716 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v10, v23, v11 ; 3E141717 v_mac_f32_e32 v0, v4, v12 ; 3E001904 v_mac_f32_e32 v14, v1, v12 ; 3E1C1901 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v15, v2, v12 ; 3E1E1902 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v10, v3, v12 ; 3E141903 exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605 v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v15, v10, v1, v1 ; F800021F 01010A0F v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v14, v1, v2 ; F80008CF 02010E00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 24 Code Size: 428 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 4.0000, 2.1000} IMM[1] FLT32 { 3.1000, 1.1000, 0.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[3].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz 15: F2I TEMP[1].x, TEMP[1].xxxx 16: UARL ADDR[0].x, TEMP[1].xxxx 17: UARL ADDR[0].x, TEMP[1].xxxx 18: MOV TEMP[1], CONST[ADDR[0].x] 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: MAD TEMP[1], IN[0], CONST[ADDR[0].x], TEMP[1] 22: MOV TEMP[2].xy, IN[1].xyxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: MOV OUT[2], TEMP[2] 26: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0 %37 = add i32 %5, %7 %38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37) %39 = extractelement <4 x float> %38, i32 0 %40 = fmul float %39, 4.000000e+00 %41 = fadd float %40, 0x4000CCCCC0000000 %42 = fptosi float %41 to i32 %43 = shl i32 %42, 4 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = shl i32 %42, 4 %46 = or i32 %45, 4 %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %46) %48 = shl i32 %42, 4 %49 = or i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = shl i32 %42, 4 %52 = or i32 %51, 12 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = fmul float %31, %44 %55 = fmul float %32, %47 %56 = fadd float %54, %55 %57 = fmul float %33, %50 %58 = fadd float %56, %57 %59 = fmul float %34, %53 %60 = fadd float %58, %59 %61 = fadd float %40, 0x4008CCCCC0000000 %62 = fptosi float %61 to i32 %63 = shl i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %62, 4 %66 = or i32 %65, 4 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %62, 4 %69 = or i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %62, 4 %72 = or i32 %71, 12 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = fmul float %31, %64 %75 = fmul float %32, %67 %76 = fadd float %74, %75 %77 = fmul float %33, %70 %78 = fadd float %76, %77 %79 = fmul float %34, %73 %80 = fadd float %78, %79 %81 = fadd float %40, 0x3FF19999A0000000 %82 = fptosi float %81 to i32 %83 = fadd float %40, 0x3FB99999A0000000 %84 = fptosi float %83 to i32 %85 = shl i32 %84, 4 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %84, 4 %88 = or i32 %87, 4 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %84, 4 %91 = or i32 %90, 8 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %84, 4 %94 = or i32 %93, 12 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %82, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = fmul float %17, %97 %99 = fadd float %98, %86 %100 = shl i32 %82, 4 %101 = or i32 %100, 4 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %18, %102 %104 = fadd float %103, %89 %105 = shl i32 %82, 4 %106 = or i32 %105, 8 %107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %106) %108 = fmul float %19, %107 %109 = fadd float %108, %92 %110 = shl i32 %82, 4 %111 = or i32 %110, 12 %112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %111) %113 = fmul float %20, %112 %114 = fadd float %113, %95 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %104, float %109, float %114) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %80, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700 buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 4.0, v11, 0x40066666 ; 420016F6 40066666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_madak_f32_e32 v12, 4.0, v11, 0x40466666 ; 421816F6 40466666 v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_madak_f32_e32 v13, 4.0, v11, 0x3f8ccccd ; 421A16F6 3F8CCCCD v_madak_f32_e32 v11, 4.0, v11, 0x3dcccccd ; 421616F6 3DCCCCCD v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 buffer_load_dword v14, v0, s[0:3], 0 offen ; E0301000 80000E00 v_or_b32_e32 v15, 4, v0 ; 381E0084 v_or_b32_e32 v16, 8, v0 ; 38200088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v17, v11, s[0:3], 0 offen ; E0301000 8000110B v_or_b32_e32 v18, 4, v11 ; 38241684 v_or_b32_e32 v19, 8, v11 ; 38261688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v20, v13, s[0:3], 0 offen ; E0301000 8000140D v_or_b32_e32 v21, 4, v13 ; 382A1A84 v_or_b32_e32 v22, 8, v13 ; 382C1A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D v_or_b32_e32 v23, 4, v12 ; 382E1884 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 buffer_load_dword v24, v12, s[0:3], 0 offen ; E0301000 8000180C v_or_b32_e32 v25, 8, v12 ; 38321888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(13) ; BF8C077D v_mac_f32_e32 v17, v20, v1 ; 3E220314 s_waitcnt vmcnt(8) ; BF8C0778 v_mac_f32_e32 v18, v21, v2 ; 3E240515 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v19, v22, v3 ; 3E260716 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v11, v13, v4 ; 3E16090D v_mul_f32_e32 v1, v15, v8 ; 1002110F v_mac_f32_e32 v1, v14, v7 ; 3E020F0E s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v2, v23, v8 ; 10041117 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v2, v24, v7 ; 3E040F18 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v1, v16, v9 ; 3E021310 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v25, v9 ; 3E041319 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v1, v0, v10 ; 3E021500 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v12, v10 ; 3E04150C exp 15, 32, 0, 0, 0, v17, v18, v19, v11 ; F800020F 0B131211 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v5, v6, v0, v0 ; F800021F 00000605 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v1, v2, v0, v3 ; F80008CF 03000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 28 Code Size: 432 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[1].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[0], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[0], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = fmul float %25, 2.000000e+00 %27 = fadd float %26, 0x3FB99999A0000000 %28 = fptosi float %27 to i32 %29 = shl i32 %28, 4 %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %29) %31 = shl i32 %28, 4 %32 = or i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %28, 4 %35 = or i32 %34, 8 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %28, 4 %38 = or i32 %37, 12 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = fmul float %17, %30 %41 = fmul float %18, %33 %42 = fadd float %40, %41 %43 = fmul float %19, %36 %44 = fadd float %42, %43 %45 = fmul float %20, %39 %46 = fadd float %44, %45 %47 = fadd float %26, 0x3FF19999A0000000 %48 = fptosi float %47 to i32 %49 = shl i32 %48, 4 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = shl i32 %48, 4 %52 = or i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %48, 4 %55 = or i32 %54, 8 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %48, 4 %58 = or i32 %57, 12 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = fmul float %17, %50 %61 = fmul float %18, %53 %62 = fadd float %60, %61 %63 = fmul float %19, %56 %64 = fadd float %62, %63 %65 = fmul float %20, %59 %66 = fadd float %64, %65 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %46, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v5, 0x3dcccccd ; 42000AF4 3DCCCCCD v_madak_f32_e32 v5, 2.0, v5, 0x3f8ccccd ; 420A0AF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_dword v6, v0, s[0:3], 0 offen ; E0301000 80000600 v_or_b32_e32 v7, 4, v0 ; 380E0084 v_or_b32_e32 v8, 8, v0 ; 38100088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v9, v5, s[0:3], 0 offen ; E0301000 80000905 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_or_b32_e32 v10, 4, v5 ; 38140A84 v_or_b32_e32 v11, 8, v5 ; 38160A88 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v7, v7, v2 ; 100E0507 v_mac_f32_e32 v7, v6, v1 ; 3E0E0306 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 s_waitcnt vmcnt(4) ; BF8C0774 v_mul_f32_e32 v2, v10, v2 ; 1004050A v_mac_f32_e32 v2, v9, v1 ; 3E040309 s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v7, v8, v3 ; 3E0E0708 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v2, v11, v3 ; 3E04070B s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v7, v0, v4 ; 3E0E0900 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v2, v5, v4 ; 3E040905 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 12, 0, 1, 0, v7, v2, v0, v6 ; F80008CF 06000207 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 12 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %25, %13 %30 = fmul float %26, %14 %31 = fadd float %29, %30 %32 = fmul float %27, %15 %33 = fadd float %31, %32 %34 = fmul float %28, %16 %35 = fadd float %33, %34 %36 = fmul float %25, %17 %37 = fmul float %26, %18 %38 = fadd float %36, %37 %39 = fmul float %27, %19 %40 = fadd float %38, %39 %41 = fmul float %28, %20 %42 = fadd float %40, %41 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %35, float %42, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 v_mul_f32_e32 v1, s5, v1 ; 10020205 v_mac_f32_e32 v4, s6, v0 ; 3E080006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_mac_f32_e32 v4, s8, v2 ; 3E080408 v_mac_f32_e32 v1, s9, v2 ; 3E020409 v_mac_f32_e32 v4, s10, v3 ; 3E08060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v4, v1, v2, v0 ; F80008CF 00020104 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 116 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MAD TEMP[1], IN[0], CONST[1], CONST[0] 5: MOV TEMP[2].xy, IN[1].xyxx 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: MOV OUT[2], TEMP[2] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %21 %52 = fmul float %48, %22 %53 = fadd float %51, %52 %54 = fmul float %49, %23 %55 = fadd float %53, %54 %56 = fmul float %50, %24 %57 = fadd float %55, %56 %58 = fmul float %47, %25 %59 = fmul float %48, %26 %60 = fadd float %58, %59 %61 = fmul float %49, %27 %62 = fadd float %60, %61 %63 = fmul float %50, %28 %64 = fadd float %62, %63 %65 = fmul float %33, %17 %66 = fadd float %65, %13 %67 = fmul float %34, %18 %68 = fadd float %67, %14 %69 = fmul float %35, %19 %70 = fadd float %69, %15 %71 = fmul float %36, %20 %72 = fadd float %71, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %66, float %68, float %70, float %72) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %64, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[7:10], v0, s[8:11], 0 idxen ; E00C2000 80020700 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 v_mov_b32_e32 v11, s17 ; 7E160211 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v12, s4 ; 7E180204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mov_b32_e32 v13, s5 ; 7E1A0205 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F v_mac_f32_e32 v0, s6, v1 ; 3E000206 v_mac_f32_e32 v11, s7, v2 ; 3E160407 v_mac_f32_e32 v12, s8, v3 ; 3E180608 v_mac_f32_e32 v13, s9, v4 ; 3E1A0809 v_mul_f32_e32 v1, s11, v8 ; 1002100B v_mac_f32_e32 v1, s10, v7 ; 3E020E0A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s5, v8 ; 10041005 v_mac_f32_e32 v2, s4, v7 ; 3E040E04 v_mac_f32_e32 v1, s12, v9 ; 3E02120C v_mac_f32_e32 v2, s14, v9 ; 3E04120E v_mac_f32_e32 v1, s13, v10 ; 3E02140D v_mac_f32_e32 v2, s0, v10 ; 3E041400 exp 15, 32, 0, 0, 0, v0, v11, v12, v13 ; F800020F 0D0C0B00 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 33, 0, 0, 0, v5, v6, v0, v0 ; F800021F 00000605 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v1, v2, v0, v3 ; F80008CF 03000201 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 228 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[0] 2: DP4 TEMP[1].x, IN[1], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[1].xy, IN[0].xyxx 5: MOV OUT[0], TEMP[0] 6: MOV OUT[1], TEMP[1] 7: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = fmul float %31, %13 %36 = fmul float %32, %14 %37 = fadd float %35, %36 %38 = fmul float %33, %15 %39 = fadd float %37, %38 %40 = fmul float %34, %16 %41 = fadd float %39, %40 %42 = fmul float %31, %17 %43 = fmul float %32, %18 %44 = fadd float %42, %43 %45 = fmul float %33, %19 %46 = fadd float %44, %45 %47 = fmul float %34, %20 %48 = fadd float %46, %47 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %41, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_mov_b32_e32 v1, 0 ; 7E020280 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106 s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107 exp 15, 32, 0, 0, 0, v2, v3, v1, v1 ; F800020F 01010302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, s9, v5 ; 10000A09 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s5, v5 ; 10040A05 v_mac_f32_e32 v0, s8, v4 ; 3E000808 v_mac_f32_e32 v2, s4, v4 ; 3E040804 v_mac_f32_e32 v0, s10, v6 ; 3E000C0A v_mac_f32_e32 v2, s6, v6 ; 3E040C06 v_mac_f32_e32 v0, s11, v7 ; 3E000E0B v_mac_f32_e32 v2, s0, v7 ; 3E040E00 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 12, 0, 1, 0, v0, v2, v1, v3 ; F80008CF 03010200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 140 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[1], CONST[2] 2: DP4 TEMP[1].x, IN[1], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[1], CONST[4] 5: DP4 TEMP[2].x, IN[1], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], IN[0] 9: MOV OUT[2], CONST[0] 10: MOV OUT[3], CONST[1] 11: MOV OUT[0], TEMP[0] 12: MOV OUT[4], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 %67 = fmul float %49, %29 %68 = fmul float %50, %30 %69 = fadd float %67, %68 %70 = fmul float %51, %31 %71 = fadd float %69, %70 %72 = fmul float %52, %32 %73 = fadd float %71, %72 %74 = fmul float %49, %33 %75 = fmul float %50, %34 %76 = fadd float %74, %75 %77 = fmul float %51, %35 %78 = fadd float %76, %77 %79 = fmul float %52, %36 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %73, float %80, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_buffer_load_dword s10, s[0:3], 0x11 ; C2050111 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115 s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116 s_buffer_load_dword s12, s[0:3], 0x17 ; C2060117 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s15, s[0:3], 0x7 ; C2078107 s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108 s_buffer_load_dword s17, s[0:3], 0x9 ; C2088109 s_buffer_load_dword s18, s[0:3], 0xa ; C209010A s_buffer_load_dword s19, s[0:3], 0xb ; C209810B s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E s_buffer_load_dword s23, s[0:3], 0x0 ; C20B8100 s_buffer_load_dword s24, s[0:3], 0x1 ; C20C0101 s_buffer_load_dword s25, s[0:3], 0x2 ; C20C8102 s_buffer_load_dword s26, s[0:3], 0x3 ; C20D0103 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s17, v6 ; 10000C11 v_mac_f32_e32 v0, s16, v5 ; 3E000A10 v_mul_f32_e32 v9, s21, v6 ; 10120C15 v_mac_f32_e32 v9, s20, v5 ; 3E120A14 v_mul_f32_e32 v10, s10, v6 ; 10140C0A v_mac_f32_e32 v10, s9, v5 ; 3E140A09 v_mul_f32_e32 v6, s7, v6 ; 100C0C07 v_mac_f32_e32 v6, s6, v5 ; 3E0C0A06 v_mac_f32_e32 v0, s18, v7 ; 3E000E12 v_mac_f32_e32 v9, s22, v7 ; 3E120E16 v_mac_f32_e32 v10, s4, v7 ; 3E140E04 v_mac_f32_e32 v6, s11, v7 ; 3E0C0E0B v_mac_f32_e32 v0, s19, v8 ; 3E001013 v_mac_f32_e32 v9, s8, v8 ; 3E121008 v_mac_f32_e32 v10, s5, v8 ; 3E141005 v_mac_f32_e32 v6, s12, v8 ; 3E0C100C exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s24 ; 7E040218 v_mov_b32_e32 v3, s25 ; 7E060219 v_mov_b32_e32 v4, s26 ; 7E08021A exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s0 ; 7E020200 v_mov_b32_e32 v2, s13 ; 7E04020D v_mov_b32_e32 v3, s14 ; 7E06020E v_mov_b32_e32 v4, s15 ; 7E08020F exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v10, v6, v1, v1 ; F800023F 0101060A v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 12, 0, 1, 0, v0, v9, v1, v2 ; F80008CF 02010900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 12 Code Size: 296 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 6.0000, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 0.1000} IMM[2] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].yyyy 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: MAD TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].zzzz 18: F2I TEMP[3].x, TEMP[3].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: DP4 TEMP[3].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[3].xxxx 22: MAD TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].wwww 23: F2I TEMP[3].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: MOV TEMP[3], CONST[ADDR[0].x] 26: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[2].xxxx 27: F2I TEMP[1].x, TEMP[1].xxxx 28: UARL ADDR[0].x, TEMP[1].xxxx 29: MOV TEMP[1], CONST[ADDR[0].x] 30: MOV TEMP[2].xy, TEMP[2].xyxx 31: MOV OUT[1], IN[0] 32: MOV OUT[2], TEMP[3] 33: MOV OUT[3], TEMP[1] 34: MOV OUT[0], TEMP[0] 35: MOV OUT[4], TEMP[2] 36: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = sitofp i32 %10 to float %30 = fmul float %29, 6.000000e+00 %31 = fadd float %30, 0x4000CCCCC0000000 %32 = fptosi float %31 to i32 %33 = shl i32 %32, 4 %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %33) %35 = shl i32 %32, 4 %36 = or i32 %35, 4 %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %36) %38 = shl i32 %32, 4 %39 = or i32 %38, 8 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = shl i32 %32, 4 %42 = or i32 %41, 12 %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %42) %44 = fmul float %25, %34 %45 = fmul float %26, %37 %46 = fadd float %44, %45 %47 = fmul float %27, %40 %48 = fadd float %46, %47 %49 = fmul float %28, %43 %50 = fadd float %48, %49 %51 = fmul float %29, 6.000000e+00 %52 = fadd float %51, 0x4008CCCCC0000000 %53 = fptosi float %52 to i32 %54 = shl i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = shl i32 %53, 4 %57 = or i32 %56, 4 %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %57) %59 = shl i32 %53, 4 %60 = or i32 %59, 8 %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %60) %62 = shl i32 %53, 4 %63 = or i32 %62, 12 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = fmul float %25, %55 %66 = fmul float %26, %58 %67 = fadd float %65, %66 %68 = fmul float %27, %61 %69 = fadd float %67, %68 %70 = fmul float %28, %64 %71 = fadd float %69, %70 %72 = fmul float %29, 6.000000e+00 %73 = fadd float %72, 0x4010666660000000 %74 = fptosi float %73 to i32 %75 = shl i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %74, 4 %78 = or i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %74, 4 %81 = or i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %74, 4 %84 = or i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %25, %76 %87 = fmul float %26, %79 %88 = fadd float %86, %87 %89 = fmul float %27, %82 %90 = fadd float %88, %89 %91 = fmul float %28, %85 %92 = fadd float %90, %91 %93 = fmul float %29, 6.000000e+00 %94 = fadd float %93, 0x4014666660000000 %95 = fptosi float %94 to i32 %96 = shl i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %96) %98 = shl i32 %95, 4 %99 = or i32 %98, 4 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = shl i32 %95, 4 %102 = or i32 %101, 8 %103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %102) %104 = shl i32 %95, 4 %105 = or i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %25, %97 %108 = fmul float %26, %100 %109 = fadd float %107, %108 %110 = fmul float %27, %103 %111 = fadd float %109, %110 %112 = fmul float %28, %106 %113 = fadd float %111, %112 %114 = fmul float %29, 6.000000e+00 %115 = fadd float %114, 0x3FB99999A0000000 %116 = fptosi float %115 to i32 %117 = shl i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %117) %119 = shl i32 %116, 4 %120 = or i32 %119, 4 %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %120) %122 = shl i32 %116, 4 %123 = or i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = shl i32 %116, 4 %126 = or i32 %125, 12 %127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %126) %128 = fmul float %29, 6.000000e+00 %129 = fadd float %128, 0x3FF19999A0000000 %130 = fptosi float %129 to i32 %131 = shl i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %131) %133 = shl i32 %130, 4 %134 = or i32 %133, 4 %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %134) %136 = shl i32 %130, 4 %137 = or i32 %136, 8 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = shl i32 %130, 4 %140 = or i32 %139, 12 %141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %118, float %121, float %124, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %132, float %135, float %138, float %141) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %92, float %113, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %50, float %71, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v2, 0x40c00000 ; 7E0402FF 40C00000 v_madak_f32_e32 v3, v1, v2, 0x40833333 ; 42060501 40833333 v_cvt_i32_f32_e32 v3, v3 ; 7E061103 v_madak_f32_e32 v4, v1, v2, 0x40a33333 ; 42080501 40A33333 v_cvt_i32_f32_e32 v4, v4 ; 7E081104 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_madak_f32_e32 v5, v1, v2, 0x40066666 ; 420A0501 40066666 v_cvt_i32_f32_e32 v5, v5 ; 7E0A1105 v_lshlrev_b32_e32 v3, 4, v3 ; 34060684 v_lshlrev_b32_e32 v4, 4, v4 ; 34080884 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00 v_madak_f32_e32 v0, v1, v2, 0x40466666 ; 42000501 40466666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_madak_f32_e32 v14, v1, v2, 0x3dcccccd ; 421C0501 3DCCCCCD v_madak_f32_e32 v1, v1, v2, 0x3f8ccccd ; 42020501 3F8CCCCD v_cvt_i32_f32_e32 v2, v14 ; 7E04110E v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_or_b32_e32 v14, 4, v3 ; 381C0684 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v15, 4, v4 ; 381E0884 buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F v_or_b32_e32 v16, 4, v5 ; 38200A84 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_or_b32_e32 v17, 4, v0 ; 38220084 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v3, s[0:3], 0 offen ; E0301000 80001203 v_or_b32_e32 v19, 8, v3 ; 38260688 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v3, 12, v3 ; 3806068C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_dword v20, v4, s[0:3], 0 offen ; E0301000 80001404 v_or_b32_e32 v21, 8, v4 ; 382A0888 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_or_b32_e32 v4, 12, v4 ; 3808088C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 v_or_b32_e32 v22, 4, v2 ; 382C0484 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 v_or_b32_e32 v23, 8, v2 ; 382E0488 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_or_b32_e32 v24, 12, v2 ; 3830048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v25, v5, s[0:3], 0 offen ; E0301000 80001905 v_or_b32_e32 v26, 8, v5 ; 38340A88 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A v_or_b32_e32 v5, 12, v5 ; 380A0A8C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 buffer_load_dword v27, v0, s[0:3], 0 offen ; E0301000 80001B00 v_or_b32_e32 v28, 8, v0 ; 38380088 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_or_b32_e32 v29, 4, v1 ; 383A0284 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_or_b32_e32 v30, 8, v1 ; 383C0288 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E buffer_load_dword v31, v1, s[0:3], 0 offen ; E0301000 80001F01 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt ; BF8C077F v_mul_f32_e32 v16, v16, v11 ; 10201710 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v16, v25, v10 ; 3E201519 v_mul_f32_e32 v17, v17, v11 ; 10221711 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v17, v27, v10 ; 3E22151B v_mul_f32_e32 v14, v14, v11 ; 101C170E v_mac_f32_e32 v14, v18, v10 ; 3E1C1512 v_mul_f32_e32 v11, v15, v11 ; 1016170F v_mac_f32_e32 v11, v20, v10 ; 3E161514 v_mac_f32_e32 v16, v26, v12 ; 3E20191A s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v17, v28, v12 ; 3E22191C v_mac_f32_e32 v14, v19, v12 ; 3E1C1913 v_mac_f32_e32 v11, v21, v12 ; 3E161915 v_mac_f32_e32 v16, v5, v13 ; 3E201B05 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v17, v0, v13 ; 3E221B00 v_mac_f32_e32 v14, v3, v13 ; 3E1C1B03 v_mac_f32_e32 v11, v4, v13 ; 3E161B04 exp 15, 32, 0, 0, 0, v6, v7, v8, v9 ; F800020F 09080706 exp 15, 33, 0, 0, 0, v2, v22, v23, v24 ; F800021F 18171602 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v31, v29, v30, v1 ; F800022F 011E1D1F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v14, v11, v0, v0 ; F800023F 00000B0E s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v16, v17, v0, v1 ; F80008CF 01001110 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 552 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..3] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[0] 2: DP4 TEMP[1].x, IN[0], CONST[1] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[2] 5: DP4 TEMP[2].x, IN[0], CONST[3] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[0], TEMP[0] 9: MOV OUT[1], TEMP[1] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %57, float %64, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xb ; C205010B s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mac_f32_e32 v4, s14, v0 ; 3E08000E v_mul_f32_e32 v5, s4, v1 ; 100A0204 v_mac_f32_e32 v5, s18, v0 ; 3E0A0012 v_mul_f32_e32 v6, s8, v1 ; 100C0208 v_mul_f32_e32 v1, s12, v1 ; 1002020C v_mac_f32_e32 v6, s7, v0 ; 3E0C0007 v_mac_f32_e32 v1, s11, v0 ; 3E02000B v_mac_f32_e32 v4, s16, v2 ; 3E080410 v_mac_f32_e32 v5, s5, v2 ; 3E0A0405 v_mac_f32_e32 v6, s9, v2 ; 3E0C0409 v_mac_f32_e32 v1, s13, v2 ; 3E02040D v_mac_f32_e32 v4, s17, v3 ; 3E080611 v_mac_f32_e32 v5, s6, v3 ; 3E0A0606 v_mac_f32_e32 v6, s10, v3 ; 3E0C060A v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 32, 0, 0, 0, v6, v1, v0, v0 ; F800020F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 188 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[0..4] DCL TEMP[0..2], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: DP4 TEMP[1].x, TEMP[0], CONST[1] 3: DP4 TEMP[2].x, TEMP[0], CONST[2] 4: MOV TEMP[1].y, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[0], CONST[3] 6: MOV TEMP[1].z, TEMP[2].xxxx 7: DP4 TEMP[2].x, TEMP[0], CONST[4] 8: MOV TEMP[1].w, TEMP[2].xxxx 9: ADD TEMP[0].x, TEMP[0].wwww, CONST[0].wwww 10: MAD TEMP[0], CONST[0], TEMP[0].xxxx, TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0 %46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = bitcast float %48 to i32 %51 = bitcast float %49 to i32 %52 = insertelement <2 x i32> undef, i32 %50, i32 0 %53 = insertelement <2 x i32> %52, i32 %51, i32 1 %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %45, <16 x i8> %47, i32 2) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, %28 %60 = fmul float %56, %29 %61 = fadd float %59, %60 %62 = fmul float %57, %30 %63 = fadd float %61, %62 %64 = fmul float %58, %31 %65 = fadd float %63, %64 %66 = fmul float %55, %32 %67 = fmul float %56, %33 %68 = fadd float %66, %67 %69 = fmul float %57, %34 %70 = fadd float %68, %69 %71 = fmul float %58, %35 %72 = fadd float %70, %71 %73 = fmul float %55, %36 %74 = fmul float %56, %37 %75 = fadd float %73, %74 %76 = fmul float %57, %38 %77 = fadd float %75, %76 %78 = fmul float %58, %39 %79 = fadd float %77, %78 %80 = fmul float %55, %40 %81 = fmul float %56, %41 %82 = fadd float %80, %81 %83 = fmul float %57, %42 %84 = fadd float %82, %83 %85 = fmul float %58, %43 %86 = fadd float %84, %85 %87 = fadd float %58, %27 %88 = fmul float %24, %87 %89 = fadd float %88, %65 %90 = fmul float %25, %87 %91 = fadd float %90, %72 %92 = fmul float %26, %87 %93 = fadd float %92, %79 %94 = fmul float %27, %87 %95 = fadd float %94, %86 %96 = call i32 @llvm.SI.packf16(float %89, float %91) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %93, float %95) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230002 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x12 ; C20B0112 s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s9, v1 ; 10080209 v_mac_f32_e32 v4, s8, v0 ; 3E080008 v_mac_f32_e32 v4, s10, v2 ; 3E08040A v_mac_f32_e32 v4, s11, v3 ; 3E08060B v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mac_f32_e32 v5, s12, v0 ; 3E0A000C v_mac_f32_e32 v5, s14, v2 ; 3E0A040E v_mac_f32_e32 v5, s15, v3 ; 3E0A060F v_mul_f32_e32 v6, s17, v1 ; 100C0211 v_mac_f32_e32 v6, s16, v0 ; 3E0C0010 v_mac_f32_e32 v6, s18, v2 ; 3E0C0412 v_mac_f32_e32 v6, s19, v3 ; 3E0C0613 v_mul_f32_e32 v1, s21, v1 ; 10020215 v_mac_f32_e32 v1, s20, v0 ; 3E020014 v_mac_f32_e32 v1, s22, v2 ; 3E020416 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_add_f32_e32 v0, s7, v3 ; 06000607 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_mac_f32_e32 v5, s5, v0 ; 3E0A0005 v_mac_f32_e32 v6, s6, v0 ; 3E0C0006 v_mac_f32_e32 v1, s7, v0 ; 3E020007 v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v1 ; 5E020306 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 236 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: I2F TEMP[1].x, SV[0].xxxx 2: MAD TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[2], CONST[ADDR[0].x] 7: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[2], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV OUT[2], IN[1] 13: MOV OUT[1], IN[0] 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = sitofp i32 %10 to float %38 = fmul float %37, 2.000000e+00 %39 = fadd float %38, 0x3FB99999A0000000 %40 = fptosi float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = shl i32 %40, 4 %44 = or i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %44) %46 = shl i32 %40, 4 %47 = or i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %47) %49 = shl i32 %40, 4 %50 = or i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %50) %52 = fmul float %33, %42 %53 = fmul float %34, %45 %54 = fadd float %52, %53 %55 = fmul float %35, %48 %56 = fadd float %54, %55 %57 = fmul float %36, %51 %58 = fadd float %56, %57 %59 = fmul float %37, 2.000000e+00 %60 = fadd float %59, 0x3FF19999A0000000 %61 = fptosi float %60 to i32 %62 = shl i32 %61, 4 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %61, 4 %65 = or i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %61, 4 %68 = or i32 %67, 8 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = shl i32 %61, 4 %71 = or i32 %70, 12 %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %71) %73 = fmul float %33, %63 %74 = fmul float %34, %66 %75 = fadd float %73, %74 %76 = fmul float %35, %69 %77 = fadd float %75, %76 %78 = fmul float %36, %72 %79 = fadd float %77, %78 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %25, float %26, float %27, float %28) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %79, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_cvt_f32_i32_e32 v1, v3 ; 7E020B03 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 v_madak_f32_e32 v2, 2.0, v1, 0x3dcccccd ; 420402F4 3DCCCCCD v_cvt_i32_f32_e32 v2, v2 ; 7E041102 v_lshlrev_b32_e32 v2, 4, v2 ; 34040484 v_or_b32_e32 v3, 4, v2 ; 38060484 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 buffer_load_dword v0, v3, s[0:3], 0 offen ; E0301000 80000003 v_or_b32_e32 v3, 8, v2 ; 38060488 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 v_madak_f32_e32 v1, 2.0, v1, 0x3f8ccccd ; 420202F4 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 buffer_load_dword v16, v2, s[0:3], 0 offen ; E0301000 80001002 v_or_b32_e32 v2, 12, v2 ; 3804048C buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v17, 4, v1 ; 38220284 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 buffer_load_dword v18, v1, s[0:3], 0 offen ; E0301000 80001201 v_or_b32_e32 v19, 8, v1 ; 38260288 buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(7) ; BF8C0777 v_mul_f32_e32 v0, v0, v13 ; 10001B00 s_waitcnt vmcnt(5) ; BF8C0775 v_mac_f32_e32 v0, v16, v12 ; 3E001910 s_waitcnt vmcnt(3) ; BF8C0773 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v13, v18, v12 ; 3E1A1912 v_mac_f32_e32 v0, v3, v14 ; 3E001D03 s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v13, v19, v14 ; 3E1A1D13 v_mac_f32_e32 v0, v2, v15 ; 3E001F02 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v13, v1, v15 ; 3E1A1F01 exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 33, 0, 0, 0, v8, v9, v10, v11 ; F800021F 0B0A0908 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v13, v2, v1 ; F80008CF 01020D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 20 Code Size: 264 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s9, s[0:3], 0x1a ; C204811A s_buffer_load_dword s10, s[0:3], 0x1b ; C205011B s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0x0 ; C2098100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s21, s[0:3], 0x2 ; C20A8102 s_buffer_load_dword s22, s[0:3], 0x3 ; C20B0103 s_buffer_load_dword s23, s[0:3], 0x4 ; C20B8104 s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F s_buffer_load_dword s25, s[0:3], 0x10 ; C20C8110 s_buffer_load_dword s26, s[0:3], 0x11 ; C20D0111 s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 s_buffer_load_dword s28, s[0:3], 0x13 ; C20E0113 s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s31, s[0:3], 0x16 ; C20F8116 s_buffer_load_dword s32, s[0:3], 0x17 ; C2100117 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x1e ; C211011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v6 ; 10000C0C v_mac_f32_e32 v0, s7, v5 ; 3E000A07 v_mul_f32_e32 v9, s17, v6 ; 10120C11 v_mac_f32_e32 v9, s15, v5 ; 3E120A0F v_mul_f32_e32 v10, s26, v6 ; 10140C1A v_mac_f32_e32 v10, s25, v5 ; 3E140A19 v_mul_f32_e32 v11, s30, v6 ; 10160C1E v_mac_f32_e32 v11, s29, v5 ; 3E160A1D v_mul_f32_e32 v12, s8, v6 ; 10180C08 v_mac_f32_e32 v12, s33, v5 ; 3E180A21 v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mac_f32_e32 v6, s11, v5 ; 3E0C0A0B v_mac_f32_e32 v0, s13, v7 ; 3E000E0D v_mac_f32_e32 v9, s18, v7 ; 3E120E12 v_mac_f32_e32 v10, s27, v7 ; 3E140E1B v_mac_f32_e32 v11, s31, v7 ; 3E160E1F v_mac_f32_e32 v12, s9, v7 ; 3E180E09 v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v0, s14, v8 ; 3E00100E v_mac_f32_e32 v9, s24, v8 ; 3E121018 v_mac_f32_e32 v10, s28, v8 ; 3E14101C v_mac_f32_e32 v11, s32, v8 ; 3E161020 v_mac_f32_e32 v12, s10, v8 ; 3E18100A v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s19 ; 7E020213 v_mov_b32_e32 v2, s20 ; 7E040214 v_mov_b32_e32 v3, s21 ; 7E060215 v_mov_b32_e32 v4, s22 ; 7E080216 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s4 ; 7E040204 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s6 ; 7E080206 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v12, v6, v1, v1 ; F800023F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[0], CONST[0] 1: DP4 TEMP[1].x, IN[0], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[0], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[0], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = fmul float %33, %13 %38 = fmul float %34, %14 %39 = fadd float %37, %38 %40 = fmul float %35, %15 %41 = fadd float %39, %40 %42 = fmul float %36, %16 %43 = fadd float %41, %42 %44 = fmul float %33, %17 %45 = fmul float %34, %18 %46 = fadd float %44, %45 %47 = fmul float %35, %19 %48 = fadd float %46, %47 %49 = fmul float %36, %20 %50 = fadd float %48, %49 %51 = fmul float %33, %21 %52 = fmul float %34, %22 %53 = fadd float %51, %52 %54 = fmul float %35, %23 %55 = fadd float %53, %54 %56 = fmul float %36, %24 %57 = fadd float %55, %56 %58 = fmul float %33, %25 %59 = fmul float %34, %26 %60 = fadd float %58, %59 %61 = fmul float %35, %27 %62 = fadd float %60, %61 %63 = fmul float %36, %28 %64 = fadd float %62, %63 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %43, float %50, float %57, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s5, v1 ; 10080205 v_mac_f32_e32 v4, s4, v0 ; 3E080004 v_mul_f32_e32 v5, s9, v1 ; 100A0209 v_mac_f32_e32 v5, s8, v0 ; 3E0A0008 v_mul_f32_e32 v6, s13, v1 ; 100C020D v_mac_f32_e32 v6, s12, v0 ; 3E0C000C v_mul_f32_e32 v1, s14, v1 ; 1002020E v_mac_f32_e32 v1, s17, v0 ; 3E020011 v_mac_f32_e32 v4, s6, v2 ; 3E080406 v_mac_f32_e32 v5, s10, v2 ; 3E0A040A v_mac_f32_e32 v6, s15, v2 ; 3E0C040F v_mac_f32_e32 v1, s18, v2 ; 3E020412 v_mac_f32_e32 v4, s7, v3 ; 3E080607 v_mac_f32_e32 v5, s11, v3 ; 3E0A060B v_mac_f32_e32 v6, s16, v3 ; 3E0C0610 v_mac_f32_e32 v1, s0, v3 ; 3E020600 exp 15, 12, 0, 1, 0, v4, v5, v6, v1 ; F80008CF 01060504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 172 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %26, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s5 ; 7E000205 v_cvt_pkrtz_f16_f32_e32 v0, s4, v0 ; 5E000004 v_mov_b32_e32 v1, s0 ; 7E020200 v_cvt_pkrtz_f16_f32_e32 v1, s6, v1 ; 5E020206 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 56 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..5] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[3], CONST[0] 9: MOV OUT[1], IN[0] 10: MOV OUT[4], CONST[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s13, v10 ; 1000140D v_mac_f32_e32 v0, s12, v9 ; 3E00120C v_mul_f32_e32 v13, s17, v10 ; 101A1411 v_mac_f32_e32 v13, s16, v9 ; 3E1A1210 v_mul_f32_e32 v14, s21, v10 ; 101C1415 v_mac_f32_e32 v14, s20, v9 ; 3E1C1214 v_mul_f32_e32 v10, s23, v10 ; 10141417 v_mac_f32_e32 v10, s22, v9 ; 3E141216 v_mac_f32_e32 v0, s14, v11 ; 3E00160E v_mac_f32_e32 v13, s18, v11 ; 3E1A1612 v_mac_f32_e32 v14, s24, v11 ; 3E1C1618 v_mac_f32_e32 v10, s26, v11 ; 3E14161A v_mac_f32_e32 v0, s15, v12 ; 3E00180F v_mac_f32_e32 v13, s19, v12 ; 3E1A1813 v_mac_f32_e32 v14, s25, v12 ; 3E1C1819 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s4 ; 7E020204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v4, s7 ; 7E080207 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s8 ; 7E020208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v4, s11 ; 7E08020B exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 exp 15, 12, 0, 1, 0, v0, v13, v14, v10 ; F80008CF 0A0E0D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 300 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[2] 1: DP4 TEMP[1].x, IN[2], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MAD TEMP[1], IN[0], CONST[1], CONST[0] 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV OUT[1], TEMP[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[2], TEMP[2] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = add i32 %5, %7 %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53) %55 = extractelement <4 x float> %54, i32 0 %56 = extractelement <4 x float> %54, i32 1 %57 = extractelement <4 x float> %54, i32 2 %58 = extractelement <4 x float> %54, i32 3 %59 = fmul float %55, %21 %60 = fmul float %56, %22 %61 = fadd float %59, %60 %62 = fmul float %57, %23 %63 = fadd float %61, %62 %64 = fmul float %58, %24 %65 = fadd float %63, %64 %66 = fmul float %55, %25 %67 = fmul float %56, %26 %68 = fadd float %66, %67 %69 = fmul float %57, %27 %70 = fadd float %68, %69 %71 = fmul float %58, %28 %72 = fadd float %70, %71 %73 = fmul float %55, %29 %74 = fmul float %56, %30 %75 = fadd float %73, %74 %76 = fmul float %57, %31 %77 = fadd float %75, %76 %78 = fmul float %58, %32 %79 = fadd float %77, %78 %80 = fmul float %55, %33 %81 = fmul float %56, %34 %82 = fadd float %80, %81 %83 = fmul float %57, %35 %84 = fadd float %82, %83 %85 = fmul float %58, %36 %86 = fadd float %84, %85 %87 = fmul float %41, %17 %88 = fadd float %87, %13 %89 = fmul float %42, %18 %90 = fadd float %89, %14 %91 = fmul float %43, %19 %92 = fadd float %91, %15 %93 = fmul float %44, %20 %94 = fadd float %93, %16 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %90, float %92, float %94) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %72, float %79, float %86) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100 s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101 buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 v_mov_b32_e32 v0, s16 ; 7E000210 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 v_mov_b32_e32 v12, s17 ; 7E180211 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109 s_buffer_load_dword s12, s[0:3], 0xa ; C206010A s_buffer_load_dword s13, s[0:3], 0xb ; C206810B s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v13, s4 ; 7E1A0204 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mov_b32_e32 v14, s5 ; 7E1C0205 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s14, s[0:3], 0xe ; C207010E s_buffer_load_dword s15, s[0:3], 0xf ; C207810F v_mac_f32_e32 v0, s6, v2 ; 3E000406 v_mac_f32_e32 v12, s7, v3 ; 3E180607 v_mac_f32_e32 v13, s8, v4 ; 3E1A0808 v_mac_f32_e32 v14, s9, v5 ; 3E1C0A09 exp 15, 32, 0, 0, 0, v0, v12, v13, v14 ; F800020F 0E0D0C00 s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110 s_buffer_load_dword s7, s[0:3], 0x11 ; C2038111 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112 s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113 s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s11, v9 ; 1000120B v_mac_f32_e32 v0, s10, v8 ; 3E00100A v_mul_f32_e32 v2, s5, v9 ; 10041205 v_mac_f32_e32 v2, s4, v8 ; 3E041004 v_mul_f32_e32 v3, s7, v9 ; 10061207 v_mac_f32_e32 v3, s6, v8 ; 3E061006 v_mul_f32_e32 v4, s9, v9 ; 10081209 v_mac_f32_e32 v4, s8, v8 ; 3E081008 v_mac_f32_e32 v0, s12, v10 ; 3E00140C v_mac_f32_e32 v2, s14, v10 ; 3E04140E v_mac_f32_e32 v3, s16, v10 ; 3E061410 v_mac_f32_e32 v4, s18, v10 ; 3E081412 v_mac_f32_e32 v0, s13, v11 ; 3E00160D v_mac_f32_e32 v2, s15, v11 ; 3E04160F v_mac_f32_e32 v3, s17, v11 ; 3E061611 v_mac_f32_e32 v4, s0, v11 ; 3E081600 exp 15, 33, 0, 0, 0, v6, v7, v1, v1 ; F800021F 01010706 exp 15, 12, 0, 1, 0, v0, v2, v3, v4 ; F80008CF 04030200 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 284 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MOV TEMP[1].xy, IN[1].xyyy 2: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].xxxx 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %29, %37 %39 = call i32 @llvm.SI.packf16(float %26, float %27) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %28, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800100 00020006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 104 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v6 ; 10000C0E v_mac_f32_e32 v0, s11, v5 ; 3E000A0B v_mul_f32_e32 v9, s18, v6 ; 10120C12 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mul_f32_e32 v10, s22, v6 ; 10140C16 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mul_f32_e32 v11, s26, v6 ; 10160C1A v_mac_f32_e32 v11, s25, v5 ; 3E160A19 v_mul_f32_e32 v12, s4, v6 ; 10180C04 v_mac_f32_e32 v12, s13, v5 ; 3E180A0D v_mul_f32_e32 v6, s8, v6 ; 100C0C08 v_mac_f32_e32 v6, s7, v5 ; 3E0C0A07 v_mac_f32_e32 v0, s15, v7 ; 3E000E0F v_mac_f32_e32 v9, s19, v7 ; 3E120E13 v_mac_f32_e32 v10, s23, v7 ; 3E140E17 v_mac_f32_e32 v11, s0, v7 ; 3E160E00 v_mac_f32_e32 v12, s5, v7 ; 3E180E05 v_mac_f32_e32 v6, s9, v7 ; 3E0C0E09 v_mac_f32_e32 v0, s16, v8 ; 3E001010 v_mac_f32_e32 v9, s20, v8 ; 3E121014 v_mac_f32_e32 v10, s24, v8 ; 3E141018 v_mac_f32_e32 v11, s12, v8 ; 3E16100C v_mac_f32_e32 v12, s6, v8 ; 3E181006 v_mac_f32_e32 v6, s10, v8 ; 3E0C100A exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v12, v6, v1, v1 ; F800021F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV TEMP[1].xyz, TEMP[0].xyzx 3: MUL TEMP[0].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[1].w, TEMP[0].xxxx 5: MOV OUT[0], TEMP[1] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = bitcast float %27 to i32 %30 = bitcast float %28 to i32 %31 = insertelement <2 x i32> undef, i32 %29, i32 0 %32 = insertelement <2 x i32> %31, i32 %30, i32 1 %33 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %32, <32 x i8> %23, <16 x i8> %25, i32 2) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = extractelement <4 x float> %33, i32 2 %37 = extractelement <4 x float> %33, i32 3 %38 = fmul float %37, %26 %39 = call i32 @llvm.SI.packf16(float %34, float %35) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %36, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 80 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[2] 1: DP4 TEMP[1].x, IN[1], CONST[3] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[4] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[5] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[6] 8: DP4 TEMP[2].x, IN[1], CONST[7] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[2], CONST[0] 13: MOV OUT[3], CONST[1] 14: MOV OUT[0], TEMP[0] 15: MOV OUT[4], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fmul float %57, %21 %62 = fmul float %58, %22 %63 = fadd float %61, %62 %64 = fmul float %59, %23 %65 = fadd float %63, %64 %66 = fmul float %60, %24 %67 = fadd float %65, %66 %68 = fmul float %57, %25 %69 = fmul float %58, %26 %70 = fadd float %68, %69 %71 = fmul float %59, %27 %72 = fadd float %70, %71 %73 = fmul float %60, %28 %74 = fadd float %72, %73 %75 = fmul float %57, %29 %76 = fmul float %58, %30 %77 = fadd float %75, %76 %78 = fmul float %59, %31 %79 = fadd float %77, %78 %80 = fmul float %60, %32 %81 = fadd float %79, %80 %82 = fmul float %57, %33 %83 = fmul float %58, %34 %84 = fadd float %82, %83 %85 = fmul float %59, %35 %86 = fadd float %84, %85 %87 = fmul float %60, %36 %88 = fadd float %86, %87 %89 = fmul float %57, %37 %90 = fmul float %58, %38 %91 = fadd float %89, %90 %92 = fmul float %59, %39 %93 = fadd float %91, %92 %94 = fmul float %60, %40 %95 = fadd float %93, %94 %96 = fmul float %57, %41 %97 = fmul float %58, %42 %98 = fadd float %96, %97 %99 = fmul float %59, %43 %100 = fadd float %98, %99 %101 = fmul float %60, %44 %102 = fadd float %100, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %49, float %50, float %51, float %52) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %95, float %102, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %74, float %81, float %88) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_buffer_load_dword s9, s[0:3], 0x1a ; C204811A s_buffer_load_dword s10, s[0:3], 0x1b ; C205011B s_buffer_load_dword s11, s[0:3], 0x1c ; C205811C s_buffer_load_dword s16, s[0:3], 0x1d ; C208011D buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109 s_buffer_load_dword s13, s[0:3], 0xa ; C206810A s_buffer_load_dword s14, s[0:3], 0xb ; C207010B s_buffer_load_dword s15, s[0:3], 0xc ; C207810C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0x0 ; C2098100 s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101 s_buffer_load_dword s21, s[0:3], 0x2 ; C20A8102 s_buffer_load_dword s22, s[0:3], 0x3 ; C20B0103 s_buffer_load_dword s23, s[0:3], 0x4 ; C20B8104 s_buffer_load_dword s24, s[0:3], 0xf ; C20C010F s_buffer_load_dword s25, s[0:3], 0x10 ; C20C8110 s_buffer_load_dword s26, s[0:3], 0x11 ; C20D0111 s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112 s_buffer_load_dword s28, s[0:3], 0x13 ; C20E0113 s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114 s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115 s_buffer_load_dword s31, s[0:3], 0x16 ; C20F8116 s_buffer_load_dword s32, s[0:3], 0x17 ; C2100117 s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118 s_buffer_load_dword s34, s[0:3], 0x1e ; C211011E s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s12, v6 ; 10000C0C v_mac_f32_e32 v0, s7, v5 ; 3E000A07 v_mul_f32_e32 v9, s17, v6 ; 10120C11 v_mac_f32_e32 v9, s15, v5 ; 3E120A0F v_mul_f32_e32 v10, s26, v6 ; 10140C1A v_mac_f32_e32 v10, s25, v5 ; 3E140A19 v_mul_f32_e32 v11, s30, v6 ; 10160C1E v_mac_f32_e32 v11, s29, v5 ; 3E160A1D v_mul_f32_e32 v12, s8, v6 ; 10180C08 v_mac_f32_e32 v12, s33, v5 ; 3E180A21 v_mul_f32_e32 v6, s16, v6 ; 100C0C10 v_mac_f32_e32 v6, s11, v5 ; 3E0C0A0B v_mac_f32_e32 v0, s13, v7 ; 3E000E0D v_mac_f32_e32 v9, s18, v7 ; 3E120E12 v_mac_f32_e32 v10, s27, v7 ; 3E140E1B v_mac_f32_e32 v11, s31, v7 ; 3E160E1F v_mac_f32_e32 v12, s9, v7 ; 3E180E09 v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22 v_mac_f32_e32 v0, s14, v8 ; 3E00100E v_mac_f32_e32 v9, s24, v8 ; 3E121018 v_mac_f32_e32 v10, s28, v8 ; 3E14101C v_mac_f32_e32 v11, s32, v8 ; 3E161020 v_mac_f32_e32 v12, s10, v8 ; 3E18100A v_mac_f32_e32 v6, s0, v8 ; 3E0C1000 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s19 ; 7E020213 v_mov_b32_e32 v2, s20 ; 7E040214 v_mov_b32_e32 v3, s21 ; 7E060215 v_mov_b32_e32 v4, s22 ; 7E080216 exp 15, 33, 0, 0, 0, v1, v2, v3, v4 ; F800021F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s23 ; 7E020217 v_mov_b32_e32 v2, s4 ; 7E040204 v_mov_b32_e32 v3, s5 ; 7E060205 v_mov_b32_e32 v4, s6 ; 7E080206 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 35, 0, 0, 0, v12, v6, v1, v1 ; F800023F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 356 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[3].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[3].xyyy 9: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MAD TEMP[1].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[1].xyzx 13: MOV TEMP[0].w, IMM[1].zzzz 14: MAD TEMP[0], TEMP[0], IN[2], IN[1] 15: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 16: MOV TEMP[0].w, TEMP[1].xxxx 17: MOV OUT[0], TEMP[0] 18: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %30 = bitcast <4 x i32> addrspace(2)* %29 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %49 = bitcast float %47 to i32 %50 = bitcast float %48 to i32 %51 = insertelement <2 x i32> undef, i32 %49, i32 0 %52 = insertelement <2 x i32> %51, i32 %50, i32 1 %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %34, <16 x i8> %37, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = fadd float %54, 0xBFE0101020000000 %56 = bitcast float %47 to i32 %57 = bitcast float %48 to i32 %58 = insertelement <2 x i32> undef, i32 %56, i32 0 %59 = insertelement <2 x i32> %58, i32 %57, i32 1 %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %23, <16 x i8> %25, i32 2) %61 = extractelement <4 x float> %60, i32 0 %62 = fadd float %61, 0xBFB0101020000000 %63 = fmul float %62, 0x3FF29FBE80000000 %64 = fmul float %55, 0x3FF9893740000000 %65 = fadd float %64, %63 %66 = fmul float %55, 0xBFEA0418A0000000 %67 = fadd float %66, %63 %68 = fmul float %55, 0.000000e+00 %69 = fadd float %68, %63 %70 = bitcast float %47 to i32 %71 = bitcast float %48 to i32 %72 = insertelement <2 x i32> undef, i32 %70, i32 0 %73 = insertelement <2 x i32> %72, i32 %71, i32 1 %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %28, <16 x i8> %31, i32 2) %75 = extractelement <4 x float> %74, i32 0 %76 = fadd float %75, 0xBFE0101020000000 %77 = fmul float %76, 0.000000e+00 %78 = fadd float %77, %65 %79 = fmul float %76, 0xBFD9168720000000 %80 = fadd float %79, %67 %81 = fmul float %76, 0x400022D0E0000000 %82 = fadd float %81, %69 %83 = fmul float %78, %43 %84 = fadd float %83, %39 %85 = fmul float %80, %44 %86 = fadd float %85, %40 %87 = fmul float %82, %45 %88 = fadd float %87, %41 %89 = fadd float %46, %42 %90 = fmul float %89, %38 %91 = call i32 @llvm.SI.packf16(float %84, float %86) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %88, float %90) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %92, float %94, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800100 0064000B image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[8:11] ; F0800100 0046010B image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[32:39], s[0:3] ; F0800100 00080B0B v_mov_b32_e32 v12, 0xbf008081 ; 7E1802FF BF008081 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v12, v0 ; 0600010C v_mov_b32_e32 v13, 0xbd808081 ; 7E1A02FF BD808081 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v1, v1, v13 ; 06021B01 v_mul_f32_e32 v1, 0x3f94fdf4, v1 ; 100202FF 3F94FDF4 v_madmk_f32_e32 v13, v0, v1, 0x3fcc49ba ; 401A0300 3FCC49BA v_mac_f32_e32 v3, v7, v13 ; 3E061B07 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v7, v12, v11 ; 060E170C v_madmk_f32_e32 v0, v0, v1, 0xbf5020c5 ; 40000300 BF5020C5 v_madmk_f32_e32 v0, v7, v0, 0xbec8b439 ; 40000107 BEC8B439 v_mac_f32_e32 v4, v8, v0 ; 3E080108 v_madmk_f32_e32 v0, v7, v1, 0x40011687 ; 40000307 40011687 v_mac_f32_e32 v5, v9, v0 ; 3E0A0109 v_add_f32_e32 v0, v6, v10 ; 06001506 v_mul_f32_e32 v0, v2, v0 ; 10000102 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..3] DCL TEMP[0..1], LOCAL 0: DP4 TEMP[0].x, IN[2], CONST[0] 1: DP4 TEMP[1].x, IN[2], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[2], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[2], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: MOV OUT[2], IN[1] 8: MOV OUT[1], IN[0] 9: MOV OUT[0], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xd ; C207010D s_buffer_load_dword s15, s[0:3], 0xa ; C207810A s_buffer_load_dword s16, s[0:3], 0xb ; C208010B s_buffer_load_dword s17, s[0:3], 0xc ; C208810C s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s5, v10 ; 10001405 v_mac_f32_e32 v0, s4, v9 ; 3E001204 v_mul_f32_e32 v13, s9, v10 ; 101A1409 v_mac_f32_e32 v13, s8, v9 ; 3E1A1208 v_mul_f32_e32 v14, s13, v10 ; 101C140D v_mac_f32_e32 v14, s12, v9 ; 3E1C120C v_mul_f32_e32 v10, s14, v10 ; 1014140E v_mac_f32_e32 v10, s17, v9 ; 3E141211 v_mac_f32_e32 v0, s6, v11 ; 3E001606 v_mac_f32_e32 v13, s10, v11 ; 3E1A160A v_mac_f32_e32 v14, s15, v11 ; 3E1C160F v_mac_f32_e32 v10, s18, v11 ; 3E141612 v_mac_f32_e32 v0, s7, v12 ; 3E001807 v_mac_f32_e32 v13, s11, v12 ; 3E1A180B v_mac_f32_e32 v14, s16, v12 ; 3E1C1810 v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 exp 15, 12, 0, 1, 0, v0, v13, v14, v10 ; F80008CF 0A0E0D00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 212 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xyz, IN[0].xyzx 1: MUL TEMP[1].x, IN[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = fmul float %25, %26 %28 = call i32 @llvm.SI.packf16(float %22, float %23) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %24, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700 v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 8 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL CONST[0..5] DCL TEMP[0..2], LOCAL 0: DP4 TEMP[0].x, IN[1], CONST[0] 1: DP4 TEMP[1].x, IN[1], CONST[1] 2: MOV TEMP[0].y, TEMP[1].xxxx 3: DP4 TEMP[1].x, IN[1], CONST[2] 4: MOV TEMP[0].z, TEMP[1].xxxx 5: DP4 TEMP[1].x, IN[1], CONST[3] 6: MOV TEMP[0].w, TEMP[1].xxxx 7: DP4 TEMP[1].x, IN[1], CONST[4] 8: DP4 TEMP[2].x, IN[1], CONST[5] 9: MOV TEMP[1].y, TEMP[2].xxxx 10: MOV TEMP[1].xy, TEMP[1].xyxx 11: MOV OUT[1], IN[0] 12: MOV OUT[0], TEMP[0] 13: MOV OUT[2], TEMP[1] 14: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %13 %54 = fmul float %50, %14 %55 = fadd float %53, %54 %56 = fmul float %51, %15 %57 = fadd float %55, %56 %58 = fmul float %52, %16 %59 = fadd float %57, %58 %60 = fmul float %49, %17 %61 = fmul float %50, %18 %62 = fadd float %60, %61 %63 = fmul float %51, %19 %64 = fadd float %62, %63 %65 = fmul float %52, %20 %66 = fadd float %64, %65 %67 = fmul float %49, %21 %68 = fmul float %50, %22 %69 = fadd float %67, %68 %70 = fmul float %51, %23 %71 = fadd float %69, %70 %72 = fmul float %52, %24 %73 = fadd float %71, %72 %74 = fmul float %49, %25 %75 = fmul float %50, %26 %76 = fadd float %74, %75 %77 = fmul float %51, %27 %78 = fadd float %76, %77 %79 = fmul float %52, %28 %80 = fadd float %78, %79 %81 = fmul float %49, %29 %82 = fmul float %50, %30 %83 = fadd float %81, %82 %84 = fmul float %51, %31 %85 = fadd float %83, %84 %86 = fmul float %52, %32 %87 = fadd float %85, %86 %88 = fmul float %49, %33 %89 = fmul float %50, %34 %90 = fadd float %88, %89 %91 = fmul float %51, %35 %92 = fadd float %90, %91 %93 = fmul float %52, %36 %94 = fadd float %92, %93 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %94, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float %73, float %80) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[0:3], 0xf ; C206010F s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s9, s[0:3], 0x16 ; C2048116 s_buffer_load_dword s10, s[0:3], 0x17 ; C2050117 s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100 s_buffer_load_dword s14, s[0:3], 0x1 ; C2070101 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s16, s[0:3], 0x3 ; C2080103 s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104 s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105 s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106 s_buffer_load_dword s20, s[0:3], 0x7 ; C20A0107 s_buffer_load_dword s21, s[0:3], 0x8 ; C20A8108 s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109 s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s14, v6 ; 10000C0E v_mac_f32_e32 v0, s11, v5 ; 3E000A0B v_mul_f32_e32 v9, s18, v6 ; 10120C12 v_mac_f32_e32 v9, s17, v5 ; 3E120A11 v_mul_f32_e32 v10, s22, v6 ; 10140C16 v_mac_f32_e32 v10, s21, v5 ; 3E140A15 v_mul_f32_e32 v11, s26, v6 ; 10160C1A v_mac_f32_e32 v11, s25, v5 ; 3E160A19 v_mul_f32_e32 v12, s4, v6 ; 10180C04 v_mac_f32_e32 v12, s13, v5 ; 3E180A0D v_mul_f32_e32 v6, s8, v6 ; 100C0C08 v_mac_f32_e32 v6, s7, v5 ; 3E0C0A07 v_mac_f32_e32 v0, s15, v7 ; 3E000E0F v_mac_f32_e32 v9, s19, v7 ; 3E120E13 v_mac_f32_e32 v10, s23, v7 ; 3E140E17 v_mac_f32_e32 v11, s0, v7 ; 3E160E00 v_mac_f32_e32 v12, s5, v7 ; 3E180E05 v_mac_f32_e32 v6, s9, v7 ; 3E0C0E09 v_mac_f32_e32 v0, s16, v8 ; 3E001010 v_mac_f32_e32 v9, s20, v8 ; 3E121014 v_mac_f32_e32 v10, s24, v8 ; 3E141018 v_mac_f32_e32 v11, s12, v8 ; 3E16100C v_mac_f32_e32 v12, s6, v8 ; 3E181006 v_mac_f32_e32 v6, s10, v8 ; 3E0C100A exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 0 ; 7E020280 exp 15, 33, 0, 0, 0, v12, v6, v1, v1 ; F800021F 0101060C exp 15, 12, 0, 1, 0, v0, v9, v10, v11 ; F80008CF 0B0A0900 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { -0.5020, 1.5960, -0.8130, 0.0000} IMM[1] FLT32 { -0.0627, 1.1640, 1.0000, -0.3920} IMM[2] FLT32 { 0.0000, -0.3920, 2.0170, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0].x, TEMP[0], SAMP[2], 2D 2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 5: ADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 6: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy 7: MAD TEMP[0], TEMP[0].xxxx, IMM[0].yzww, TEMP[1].xxxx 8: MOV TEMP[1].xy, IN[1].xyyy 9: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D 10: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MAD TEMP[0].xyz, TEMP[1].xxxx, IMM[2].xyzx, TEMP[0] 12: MOV TEMP[0].xyz, TEMP[0].xyzx 13: MOV TEMP[0].w, IN[0].wwww 14: MOV OUT[0], TEMP[0] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %27 = bitcast <8 x i32> addrspace(2)* %26 to <32 x i8> addrspace(2)* %28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0 %29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %30 = bitcast <4 x i32> addrspace(2)* %29 to <16 x i8> addrspace(2)* %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0 %32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)* %34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0 %35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)* %37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = bitcast float %39 to i32 %42 = bitcast float %40 to i32 %43 = insertelement <2 x i32> undef, i32 %41, i32 0 %44 = insertelement <2 x i32> %43, i32 %42, i32 1 %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %34, <16 x i8> %37, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = fadd float %46, 0xBFE0101020000000 %48 = bitcast float %39 to i32 %49 = bitcast float %40 to i32 %50 = insertelement <2 x i32> undef, i32 %48, i32 0 %51 = insertelement <2 x i32> %50, i32 %49, i32 1 %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %23, <16 x i8> %25, i32 2) %53 = extractelement <4 x float> %52, i32 0 %54 = fadd float %53, 0xBFB0101020000000 %55 = fmul float %54, 0x3FF29FBE80000000 %56 = fmul float %47, 0x3FF9893740000000 %57 = fadd float %56, %55 %58 = fmul float %47, 0xBFEA0418A0000000 %59 = fadd float %58, %55 %60 = fmul float %47, 0.000000e+00 %61 = fadd float %60, %55 %62 = bitcast float %39 to i32 %63 = bitcast float %40 to i32 %64 = insertelement <2 x i32> undef, i32 %62, i32 0 %65 = insertelement <2 x i32> %64, i32 %63, i32 1 %66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %28, <16 x i8> %31, i32 2) %67 = extractelement <4 x float> %66, i32 0 %68 = fadd float %67, 0xBFE0101020000000 %69 = fmul float %68, 0.000000e+00 %70 = fadd float %69, %57 %71 = fmul float %68, 0xBFD9168720000000 %72 = fadd float %71, %59 %73 = fmul float %68, 0x400022D0E0000000 %74 = fadd float %73, %61 %75 = call i32 @llvm.SI.packf16(float %70, float %72) %76 = bitcast i32 %75 to float %77 = call i32 @llvm.SI.packf16(float %74, float %38) %78 = bitcast i32 %77 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508 s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710 s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700 s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640003 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[0:3] ; F0800100 00060103 image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[32:39], s[8:11] ; F0800100 00480303 v_mov_b32_e32 v4, 0xbf008081 ; 7E0802FF BF008081 s_waitcnt vmcnt(2) ; BF8C0772 v_add_f32_e32 v0, v4, v0 ; 06000104 v_mov_b32_e32 v5, 0xbd808081 ; 7E0A02FF BD808081 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v1, v1, v5 ; 06020B01 v_mul_f32_e32 v1, 0x3f94fdf4, v1 ; 100202FF 3F94FDF4 v_madmk_f32_e32 v5, v0, v1, 0x3fcc49ba ; 400A0300 3FCC49BA v_madmk_f32_e32 v0, v0, v1, 0xbf5020c5 ; 40000300 BF5020C5 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v4, v3 ; 06060704 v_madmk_f32_e32 v0, v3, v0, 0xbec8b439 ; 40000103 BEC8B439 v_madmk_f32_e32 v1, v3, v1, 0x40011687 ; 40020303 40011687 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 40 VGPRS: 8 Code Size: 184 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[2], CONST[2] 2: DP4 TEMP[1].x, IN[2], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV OUT[2], IN[1] 5: MOV OUT[3], CONST[0] 6: MOV OUT[1], IN[0] 7: MOV OUT[4], CONST[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = fmul float %49, %21 %54 = fmul float %50, %22 %55 = fadd float %53, %54 %56 = fmul float %51, %23 %57 = fadd float %55, %56 %58 = fmul float %52, %24 %59 = fadd float %57, %58 %60 = fmul float %49, %25 %61 = fmul float %50, %26 %62 = fadd float %60, %61 %63 = fmul float %51, %27 %64 = fadd float %62, %63 %65 = fmul float %52, %28 %66 = fadd float %64, %65 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %59, float %66, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109 s_buffer_load_dword s9, s[0:3], 0xa ; C204810A s_buffer_load_dword s10, s[0:3], 0xb ; C205010B s_buffer_load_dword s11, s[0:3], 0xc ; C205810C s_buffer_load_dword s12, s[0:3], 0xd ; C206010D s_buffer_load_dword s13, s[0:3], 0xe ; C206810E s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_buffer_load_dword s15, s[0:3], 0x1 ; C2078101 s_buffer_load_dword s16, s[0:3], 0x2 ; C2080102 s_buffer_load_dword s17, s[0:3], 0x3 ; C2088103 s_buffer_load_dword s18, s[0:3], 0x4 ; C2090104 s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s7, v10 ; 10001407 v_mac_f32_e32 v0, s6, v9 ; 3E001206 v_mul_f32_e32 v10, s12, v10 ; 1014140C v_mac_f32_e32 v10, s11, v9 ; 3E14120B v_mac_f32_e32 v0, s9, v11 ; 3E001609 v_mac_f32_e32 v10, s13, v11 ; 3E14160D v_mac_f32_e32 v0, s10, v12 ; 3E00180A v_mac_f32_e32 v10, s0, v12 ; 3E141800 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 exp 15, 33, 0, 0, 0, v5, v6, v7, v8 ; F800021F 08070605 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s14 ; 7E02020E v_mov_b32_e32 v2, s15 ; 7E04020F v_mov_b32_e32 v3, s16 ; 7E060210 v_mov_b32_e32 v4, s17 ; 7E080211 exp 15, 34, 0, 0, 0, v1, v2, v3, v4 ; F800022F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, s18 ; 7E020212 v_mov_b32_e32 v2, s8 ; 7E040208 v_mov_b32_e32 v3, s4 ; 7E060204 v_mov_b32_e32 v4, s5 ; 7E080205 exp 15, 35, 0, 0, 0, v1, v2, v3, v4 ; F800023F 04030201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v10, v2, v1 ; F80008CF 01020A00 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 244 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL 0: MAD TEMP[0], IN[0], IN[3], IN[2] 1: MUL TEMP[1].x, TEMP[0].wwww, IN[1].wwww 2: MOV TEMP[0].w, TEMP[1].xxxx 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %35 = fmul float %22, %31 %36 = fadd float %35, %27 %37 = fmul float %23, %32 %38 = fadd float %37, %28 %39 = fmul float %24, %33 %40 = fadd float %39, %29 %41 = fmul float %25, %34 %42 = fadd float %41, %30 %43 = fmul float %42, %26 %44 = call i32 @llvm.SI.packf16(float %36, float %38) %45 = bitcast i32 %44 to float %46 = call i32 @llvm.SI.packf16(float %40, float %43) %47 = bitcast i32 %46 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %45, float %47, float %45, float %47) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 v_interp_p1_f32 v13, v0, 2, 3, [m0] ; C8340E00 v_interp_p2_f32 v13, [v13], v1, 2, 3, [m0] ; C8350E01 v_interp_p1_f32 v0, v0, 3, 3, [m0] ; C8000F00 v_interp_p2_f32 v0, [v0], v1, 3, 3, [m0] ; C8010F01 v_mac_f32_e32 v7, v11, v2 ; 3E0E050B v_mac_f32_e32 v8, v12, v3 ; 3E10070C v_mac_f32_e32 v9, v13, v4 ; 3E12090D v_mac_f32_e32 v10, v0, v5 ; 3E140B00 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107 v_cvt_pkrtz_f16_f32_e32 v0, v9, v0 ; 5E000109 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 148 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL CONST[0..143] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 1530.0599, 2.1000} IMM[1] FLT32 { 3.1000, 4.1000, 5.1000, 255.0100} IMM[2] FLT32 { 6.0000, 0.1000, 1.1000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IMM[0].zzzz, IN[0].zzzz 2: ADD TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[2].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[2].x, TEMP[2].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ADD TEMP[2].x, IMM[1].yyyy, TEMP[1].xxxx 13: F2I TEMP[2].x, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: DP4 TEMP[2].x, IN[1], CONST[ADDR[0].x] 17: ADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: F2I TEMP[1].x, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 21: MOV TEMP[2].y, TEMP[1].xxxx 22: MUL TEMP[1].x, IN[0].zzzz, IMM[1].wwww 23: MAD TEMP[3].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy 24: F2I TEMP[3].x, TEMP[3].xxxx 25: UARL ADDR[0].x, TEMP[3].xxxx 26: MOV TEMP[3], CONST[ADDR[0].x] 27: MAD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].zzzz 28: F2I TEMP[1].x, TEMP[1].xxxx 29: UARL ADDR[0].x, TEMP[1].xxxx 30: MOV TEMP[1], CONST[ADDR[0].x] 31: MOV TEMP[2].xy, TEMP[2].xyxx 32: MOV OUT[1], IN[0] 33: MOV OUT[2], TEMP[3] 34: MOV OUT[3], TEMP[1] 35: MOV OUT[0], TEMP[0] 36: MOV OUT[4], TEMP[2] 37: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = extractelement <4 x float> %24, i32 2 %28 = extractelement <4 x float> %24, i32 3 %29 = fmul float %19, 0x4097E83D60000000 %30 = fadd float %29, 0x4000CCCCC0000000 %31 = fptosi float %30 to i32 %32 = shl i32 %31, 4 %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %32) %34 = shl i32 %31, 4 %35 = or i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %31, 4 %38 = or i32 %37, 8 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %31, 4 %41 = or i32 %40, 12 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %25, %33 %44 = fmul float %26, %36 %45 = fadd float %43, %44 %46 = fmul float %27, %39 %47 = fadd float %45, %46 %48 = fmul float %28, %42 %49 = fadd float %47, %48 %50 = fadd float %29, 0x4008CCCCC0000000 %51 = fptosi float %50 to i32 %52 = shl i32 %51, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %52) %54 = shl i32 %51, 4 %55 = or i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %51, 4 %58 = or i32 %57, 8 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %51, 4 %61 = or i32 %60, 12 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = fmul float %25, %53 %64 = fmul float %26, %56 %65 = fadd float %63, %64 %66 = fmul float %27, %59 %67 = fadd float %65, %66 %68 = fmul float %28, %62 %69 = fadd float %67, %68 %70 = fadd float %29, 0x4010666660000000 %71 = fptosi float %70 to i32 %72 = shl i32 %71, 4 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %71, 4 %75 = or i32 %74, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %71, 4 %78 = or i32 %77, 8 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = shl i32 %71, 4 %81 = or i32 %80, 12 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = fmul float %25, %73 %84 = fmul float %26, %76 %85 = fadd float %83, %84 %86 = fmul float %27, %79 %87 = fadd float %85, %86 %88 = fmul float %28, %82 %89 = fadd float %87, %88 %90 = fadd float %29, 0x4014666660000000 %91 = fptosi float %90 to i32 %92 = shl i32 %91, 4 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %91, 4 %95 = or i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = shl i32 %91, 4 %98 = or i32 %97, 8 %99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %98) %100 = shl i32 %91, 4 %101 = or i32 %100, 12 %102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %101) %103 = fmul float %25, %93 %104 = fmul float %26, %96 %105 = fadd float %103, %104 %106 = fmul float %27, %99 %107 = fadd float %105, %106 %108 = fmul float %28, %102 %109 = fadd float %107, %108 %110 = fmul float %19, 0x406FE051E0000000 %111 = fmul float %110, 6.000000e+00 %112 = fadd float %111, 0x3FB99999A0000000 %113 = fptosi float %112 to i32 %114 = shl i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = shl i32 %113, 4 %117 = or i32 %116, 4 %118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %117) %119 = shl i32 %113, 4 %120 = or i32 %119, 8 %121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %120) %122 = shl i32 %113, 4 %123 = or i32 %122, 12 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fmul float %110, 6.000000e+00 %126 = fadd float %125, 0x3FF19999A0000000 %127 = fptosi float %126 to i32 %128 = shl i32 %127, 4 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = shl i32 %127, 4 %131 = or i32 %130, 4 %132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %131) %133 = shl i32 %127, 4 %134 = or i32 %133, 8 %135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %134) %136 = shl i32 %127, 4 %137 = or i32 %136, 12 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %115, float %118, float %121, float %124) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %132, float %135, float %138) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %89, float %109, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %49, float %69, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 v_mov_b32_e32 v1, 0x44bf41eb ; 7E0202FF 44BF41EB v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600 s_waitcnt vmcnt(1) ; BF8C0771 v_madak_f32_e32 v0, v4, v1, 0x40833333 ; 42000304 40833333 v_madak_f32_e32 v10, v4, v1, 0x40a33333 ; 42140304 40A33333 v_madak_f32_e32 v11, v4, v1, 0x40066666 ; 42160304 40066666 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v10, v10 ; 7E14110A v_cvt_i32_f32_e32 v11, v11 ; 7E16110B v_madak_f32_e32 v12, v4, v1, 0x40466666 ; 42180304 40466666 v_cvt_i32_f32_e32 v12, v12 ; 7E18110C v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v10, 4, v10 ; 34141484 v_lshlrev_b32_e32 v11, 4, v11 ; 34161684 v_lshlrev_b32_e32 v12, 4, v12 ; 34181884 v_madak_f32_e32 v13, v4, v1, 0x3dcccccd ; 421A0304 3DCCCCCD v_cvt_i32_f32_e32 v13, v13 ; 7E1A110D v_madak_f32_e32 v1, v4, v1, 0x3f8ccccd ; 42020304 3F8CCCCD v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_or_b32_e32 v14, 4, v0 ; 381C0084 v_lshlrev_b32_e32 v13, 4, v13 ; 341A1A84 v_or_b32_e32 v15, 4, v11 ; 381E1684 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_or_b32_e32 v16, 4, v12 ; 38201884 buffer_load_dword v17, v0, s[0:3], 0 offen ; E0301000 80001100 v_or_b32_e32 v18, 8, v0 ; 38240088 v_or_b32_e32 v0, 12, v0 ; 3800008C buffer_load_dword v19, v11, s[0:3], 0 offen ; E0301000 8000130B v_or_b32_e32 v20, 8, v11 ; 38281688 v_or_b32_e32 v11, 12, v11 ; 3816168C buffer_load_dword v21, v12, s[0:3], 0 offen ; E0301000 8000150C v_or_b32_e32 v22, 8, v12 ; 382C1888 v_or_b32_e32 v12, 12, v12 ; 3818188C buffer_load_dword v23, v10, s[0:3], 0 offen ; E0301000 8000170A v_or_b32_e32 v24, 4, v10 ; 38301484 v_or_b32_e32 v25, 8, v10 ; 38321488 v_or_b32_e32 v10, 12, v10 ; 3814148C buffer_load_dword v26, v13, s[0:3], 0 offen ; E0301000 80001A0D v_or_b32_e32 v27, 4, v13 ; 38361A84 v_or_b32_e32 v28, 8, v13 ; 38381A88 v_or_b32_e32 v13, 12, v13 ; 381A1A8C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F buffer_load_dword v29, v1, s[0:3], 0 offen ; E0301000 80001D01 v_or_b32_e32 v30, 4, v1 ; 383C0284 v_or_b32_e32 v31, 8, v1 ; 383E0288 v_or_b32_e32 v1, 12, v1 ; 3802028C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 buffer_load_dword v14, v14, s[0:3], 0 offen ; E0301000 80000E0E buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C buffer_load_dword v18, v18, s[0:3], 0 offen ; E0301000 80001212 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v25, v25, s[0:3], 0 offen ; E0301000 80001919 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt ; BF8C077F v_mul_f32_e32 v15, v15, v7 ; 101E0F0F v_mac_f32_e32 v15, v19, v6 ; 3E1E0D13 v_mul_f32_e32 v16, v16, v7 ; 10200F10 v_mac_f32_e32 v16, v21, v6 ; 3E200D15 s_waitcnt vmcnt(14) ; BF8C077E v_mul_f32_e32 v14, v14, v7 ; 101C0F0E v_mac_f32_e32 v14, v17, v6 ; 3E1C0D11 s_waitcnt vmcnt(12) ; BF8C077C v_mul_f32_e32 v7, v24, v7 ; 100E0F18 v_mac_f32_e32 v7, v23, v6 ; 3E0E0D17 v_mac_f32_e32 v15, v20, v8 ; 3E1E1114 s_waitcnt vmcnt(11) ; BF8C077B v_mac_f32_e32 v16, v22, v8 ; 3E201116 s_waitcnt vmcnt(9) ; BF8C0779 v_mac_f32_e32 v14, v18, v8 ; 3E1C1112 s_waitcnt vmcnt(7) ; BF8C0777 v_mac_f32_e32 v7, v25, v8 ; 3E0E1119 v_mac_f32_e32 v15, v11, v9 ; 3E1E130B v_mac_f32_e32 v16, v12, v9 ; 3E20130C v_mac_f32_e32 v14, v0, v9 ; 3E1C1300 s_waitcnt vmcnt(6) ; BF8C0776 v_mac_f32_e32 v7, v10, v9 ; 3E0E130A exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302 s_waitcnt vmcnt(3) ; BF8C0773 exp 15, 33, 0, 0, 0, v26, v27, v28, v13 ; F800021F 0D1C1B1A s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 34, 0, 0, 0, v29, v30, v31, v1 ; F800022F 011F1E1D v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 35, 0, 0, 0, v14, v7, v0, v0 ; F800023F 0000070E s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v15, v16, v0, v1 ; F80008CF 0100100F s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 32 Code Size: 564 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[3].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MAD TEMP[0], TEMP[0], IN[2], IN[1] 3: MUL TEMP[1].x, TEMP[0].wwww, IN[0].wwww 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %37 = bitcast float %35 to i32 %38 = bitcast float %36 to i32 %39 = insertelement <2 x i32> undef, i32 %37, i32 0 %40 = insertelement <2 x i32> %39, i32 %38, i32 1 %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = extractelement <4 x float> %41, i32 2 %45 = extractelement <4 x float> %41, i32 3 %46 = fmul float %42, %31 %47 = fadd float %46, %27 %48 = fmul float %43, %32 %49 = fadd float %48, %28 %50 = fmul float %44, %33 %51 = fadd float %50, %29 %52 = fmul float %45, %34 %53 = fadd float %52, %30 %54 = fmul float %53, %26 %55 = call i32 @llvm.SI.packf16(float %47, float %49) %56 = bitcast i32 %55 to float %57 = call i32 @llvm.SI.packf16(float %51, float %54) %58 = bitcast i32 %57 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 v_interp_p1_f32 v6, v0, 3, 1, [m0] ; C8180700 v_interp_p2_f32 v6, [v6], v1, 3, 1, [m0] ; C8190701 v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800 v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00 v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01 v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00 v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01 v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00 v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01 v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00 v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00010B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v3, v7, v11 ; 3E061707 v_mac_f32_e32 v4, v8, v12 ; 3E081908 v_mac_f32_e32 v5, v9, v13 ; 3E0A1B09 v_mac_f32_e32 v6, v10, v14 ; 3E0C1D0A v_mul_f32_e32 v0, v2, v6 ; 10000D02 v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903 v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL CONST[0..47] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.1000} IMM[1] FLT32 { 1.1000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MUL TEMP[1].x, IN[2].xxxx, IMM[0].zzzz 2: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].wwww 3: F2I TEMP[2].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: DP4 TEMP[0].x, IN[1], CONST[ADDR[0].x] 7: ADD TEMP[1].x, IMM[1].xxxx, TEMP[1].xxxx 8: F2I TEMP[1].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[1].xxxx 10: DP4 TEMP[1].x, IN[1], CONST[ADDR[0].x] 11: MOV TEMP[0].y, TEMP[1].xxxx 12: MOV TEMP[1].xy, IN[0].xyxx 13: MOV OUT[0], TEMP[0] 14: MOV OUT[1], TEMP[1] 15: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %14 = load <16 x i8>, <16 x i8> addrspace(2)* %13, align 16, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 %27 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0 %29 = add i32 %5, %7 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %29) %31 = extractelement <4 x float> %30, i32 0 %32 = fmul float %31, 2.000000e+00 %33 = fadd float %32, 0x3FB99999A0000000 %34 = fptosi float %33 to i32 %35 = shl i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = shl i32 %34, 4 %38 = or i32 %37, 4 %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %38) %40 = shl i32 %34, 4 %41 = or i32 %40, 8 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = shl i32 %34, 4 %44 = or i32 %43, 12 %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %44) %46 = fmul float %23, %36 %47 = fmul float %24, %39 %48 = fadd float %46, %47 %49 = fmul float %25, %42 %50 = fadd float %48, %49 %51 = fmul float %26, %45 %52 = fadd float %50, %51 %53 = fadd float %32, 0x3FF19999A0000000 %54 = fptosi float %53 to i32 %55 = shl i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = shl i32 %54, 4 %58 = or i32 %57, 4 %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %58) %60 = shl i32 %54, 4 %61 = or i32 %60, 8 %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %61) %63 = shl i32 %54, 4 %64 = or i32 %63, 12 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %23, %56 %67 = fmul float %24, %59 %68 = fadd float %66, %67 %69 = fmul float %25, %62 %70 = fadd float %68, %69 %71 = fmul float %26, %65 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %17, float %18, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %52, float %72, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v0, s[12:15], 0 idxen ; E00C2000 80030300 buffer_load_format_xyzw v[7:10], v0, s[8:11], 0 idxen ; E00C2000 80020700 s_waitcnt vmcnt(0) ; BF8C0770 v_madak_f32_e32 v0, 2.0, v7, 0x3dcccccd ; 42000EF4 3DCCCCCD v_madak_f32_e32 v7, 2.0, v7, 0x3f8ccccd ; 420E0EF4 3F8CCCCD v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_cvt_i32_f32_e32 v7, v7 ; 7E0E1107 v_lshlrev_b32_e32 v0, 4, v0 ; 34000084 v_lshlrev_b32_e32 v7, 4, v7 ; 340E0E84 buffer_load_dword v8, v0, s[0:3], 0 offen ; E0301000 80000800 v_or_b32_e32 v9, 4, v0 ; 38120084 v_or_b32_e32 v10, 8, v0 ; 38140088 v_or_b32_e32 v0, 12, v0 ; 3800008C v_or_b32_e32 v11, 4, v7 ; 38160E84 buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B buffer_load_dword v12, v7, s[0:3], 0 offen ; E0301000 80000C07 v_or_b32_e32 v13, 8, v7 ; 381A0E88 v_or_b32_e32 v7, 12, v7 ; 380E0E8C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_waitcnt vmcnt(6) ; BF8C0776 v_mul_f32_e32 v9, v9, v4 ; 10120909 s_waitcnt vmcnt(5) ; BF8C0775 v_mul_f32_e32 v4, v11, v4 ; 1008090B v_mac_f32_e32 v9, v8, v3 ; 3E120708 s_waitcnt vmcnt(4) ; BF8C0774 v_mac_f32_e32 v4, v12, v3 ; 3E08070C s_waitcnt vmcnt(3) ; BF8C0773 v_mac_f32_e32 v9, v10, v5 ; 3E120B0A s_waitcnt vmcnt(2) ; BF8C0772 v_mac_f32_e32 v4, v13, v5 ; 3E080B0D s_waitcnt vmcnt(1) ; BF8C0771 v_mac_f32_e32 v9, v0, v6 ; 3E120D00 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v4, v7, v6 ; 3E080D07 v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 32, 0, 0, 0, v1, v2, v0, v0 ; F800020F 00000201 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v9, v4, v0, v1 ; F80008CF 01000409 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 16 Code Size: 268 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 4 Code Size: 68 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** Store/Community web auth request failed: Auth error SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL CONST[0..5] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: DP4 TEMP[0].x, IN[0], CONST[2] 2: DP4 TEMP[1].x, IN[0], CONST[3] 3: MOV TEMP[0].y, TEMP[1].xxxx 4: DP4 TEMP[1].x, IN[0], CONST[4] 5: DP4 TEMP[2].x, IN[0], CONST[5] 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyxx 8: MOV OUT[1], CONST[0] 9: MOV OUT[2], CONST[1] 10: MOV OUT[0], TEMP[0] 11: MOV OUT[3], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %21 %46 = fmul float %42, %22 %47 = fadd float %45, %46 %48 = fmul float %43, %23 %49 = fadd float %47, %48 %50 = fmul float %44, %24 %51 = fadd float %49, %50 %52 = fmul float %41, %25 %53 = fmul float %42, %26 %54 = fadd float %52, %53 %55 = fmul float %43, %27 %56 = fadd float %54, %55 %57 = fmul float %44, %28 %58 = fadd float %56, %57 %59 = fmul float %41, %29 %60 = fmul float %42, %30 %61 = fadd float %59, %60 %62 = fmul float %43, %31 %63 = fadd float %61, %62 %64 = fmul float %44, %32 %65 = fadd float %63, %64 %66 = fmul float %41, %33 %67 = fmul float %42, %34 %68 = fadd float %66, %67 %69 = fmul float %43, %35 %70 = fadd float %68, %69 %71 = fmul float %44, %36 %72 = fadd float %70, %71 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %17, float %18, float %19, float %20) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %65, float %72, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %58, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106 s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108 s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s19, s[0:3], 0xf ; C209810F s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114 s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115 s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112 s_buffer_load_dword s25, s[0:3], 0x13 ; C20C8113 s_buffer_load_dword s26, s[0:3], 0x16 ; C20D0116 s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s13, v1 ; 1008020D v_mac_f32_e32 v4, s12, v0 ; 3E08000C v_mul_f32_e32 v5, s17, v1 ; 100A0211 v_mac_f32_e32 v5, s16, v0 ; 3E0A0010 v_mul_f32_e32 v6, s21, v1 ; 100C0215 v_mac_f32_e32 v6, s20, v0 ; 3E0C0014 v_mul_f32_e32 v1, s23, v1 ; 10020217 v_mac_f32_e32 v1, s22, v0 ; 3E020016 v_mac_f32_e32 v4, s14, v2 ; 3E08040E v_mac_f32_e32 v5, s18, v2 ; 3E0A0412 v_mac_f32_e32 v6, s24, v2 ; 3E0C0418 v_mac_f32_e32 v1, s26, v2 ; 3E02041A v_mac_f32_e32 v4, s15, v3 ; 3E08060F v_mac_f32_e32 v5, s19, v3 ; 3E0A0613 v_mac_f32_e32 v6, s25, v3 ; 3E0C0619 v_mac_f32_e32 v1, s0, v3 ; 3E020600 v_mov_b32_e32 v0, s4 ; 7E000204 v_mov_b32_e32 v2, s5 ; 7E040205 v_mov_b32_e32 v3, s6 ; 7E060206 v_mov_b32_e32 v7, s7 ; 7E0E0207 exp 15, 32, 0, 0, 0, v0, v2, v3, v7 ; F800020F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, s8 ; 7E000208 v_mov_b32_e32 v2, s9 ; 7E040209 v_mov_b32_e32 v3, s10 ; 7E06020A v_mov_b32_e32 v7, s11 ; 7E0E020B exp 15, 33, 0, 0, 0, v0, v2, v3, v7 ; F800021F 07030200 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 0 ; 7E000280 exp 15, 34, 0, 0, 0, v6, v1, v0, v0 ; F800022F 00000106 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 12, 0, 1, 0, v4, v5, v0, v1 ; F80008CF 01000504 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 32 VGPRS: 8 Code Size: 280 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[1].xyzx 2: MOV TEMP[1].xy, IN[2].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], 2D 4: MUL TEMP[0], TEMP[1], TEMP[0] 5: MUL TEMP[0], TEMP[0], IN[1].wwww 6: MAD TEMP[0], IN[0], TEMP[0].wwww, TEMP[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0 %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %23, <16 x i8> %25, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = fmul float %41, %30 %46 = fmul float %42, %31 %47 = fmul float %43, %32 %48 = fmul float %45, %33 %49 = fmul float %46, %33 %50 = fmul float %47, %33 %51 = fmul float %44, %33 %52 = fmul float %26, %51 %53 = fadd float %52, %48 %54 = fmul float %27, %51 %55 = fadd float %54, %49 %56 = fmul float %28, %51 %57 = fadd float %56, %50 %58 = fmul float %29, %51 %59 = fadd float %58, %51 %60 = call i32 @llvm.SI.packf16(float %53, float %55) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %57, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300 v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600 v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601 v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700 v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701 v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800 v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[4:11], s[0:3] ; F0800F00 00010A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v10 ; 10001506 v_mul_f32_e32 v1, v7, v11 ; 10021707 v_mul_f32_e32 v6, v8, v12 ; 100C1908 v_mul_f32_e32 v7, v9, v13 ; 100E1B09 v_mul_f32_e32 v0, v9, v0 ; 10000109 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mul_f32_e32 v6, v9, v6 ; 100C0D09 v_mac_f32_e32 v0, v7, v2 ; 3E000507 v_mac_f32_e32 v1, v7, v3 ; 3E020707 v_mac_f32_e32 v6, v7, v4 ; 3E0C0907 v_mac_f32_e32 v7, v7, v5 ; 3E0E0B07 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 16 VGPRS: 16 Code Size: 176 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..15], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 2.0000, -2.0000, 1.0000, 0.0774} IMM[3] FLT32 { 0.9479, 0.0521, 2.4000, 0.0404} IMM[4] UINT32 {112, 3, 320, 48} IMM[5] FLT32 { 0.0000, 1.0000, 0.0039, 0.0000} IMM[6] UINT32 {304, 64, 512, 528} IMM[7] UINT32 {544, 560, 516, 532} IMM[8] UINT32 {548, 564, 524, 540} IMM[9] UINT32 {556, 572, 364, 372} IMM[10] UINT32 {520, 536, 552, 568} IMM[11] FLT32 { 0.0010, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MUL TEMP[5].x, IMM[0].yyyy, CONST[1][0].yyyy 21: MUL TEMP[6].x, IN[1].zzzz, CONST[5][2].xxxx 22: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: FRC TEMP[6].x, TEMP[5].xxxx 24: FLR TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 26: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].yyyy 27: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 28: RCP TEMP[8].x, CONST[1][0].yyyy 29: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 32: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 33: MOV TEMP[4].xy, TEMP[4].xyyy 34: MOV TEMP[4].w, IMM[0].xxxx 35: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 36: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 37: UIF TEMP[4].xxxx :0 38: MOV TEMP[4].x, TEMP[7].xxxx 39: ELSE :0 40: MOV TEMP[4].x, TEMP[5].xxxx 41: ENDIF 42: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 43: MOV TEMP[5].x, TEMP[4].xxxx 44: MOV TEMP[5].y, TEMP[2].xxxx 45: MOV TEMP[5].xy, TEMP[5].xyyy 46: MOV TEMP[5].w, IMM[0].xxxx 47: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 48: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 49: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 50: MOV TEMP[7].x, TEMP[4].xxxx 51: MOV TEMP[7].y, TEMP[6].xxxx 52: MOV TEMP[7].xy, TEMP[7].xyyy 53: MOV TEMP[7].w, IMM[0].xxxx 54: TXL TEMP[7], TEMP[7], SAMP[0], 2D 55: MOV TEMP[8].x, TEMP[4].xxxx 56: MAD TEMP[2].x, IMM[2].xxxx, CONST[1][0].zzzz, TEMP[2].xxxx 57: MOV TEMP[8].y, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[8].xyyy 59: MOV TEMP[2].w, IMM[0].xxxx 60: TXL TEMP[2], TEMP[2], SAMP[0], 2D 61: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 62: MOV TEMP[0].y, TEMP[6].xxxx 63: MOV TEMP[0].xy, TEMP[0].xyyy 64: MOV TEMP[0].w, IMM[0].xxxx 65: TXL TEMP[0], TEMP[0], SAMP[0], 2D 66: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 67: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 68: RCP TEMP[8].x, TEMP[4].xxxx 69: RCP TEMP[8].y, TEMP[4].yyyy 70: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 71: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 72: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 73: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 74: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 75: RCP TEMP[9].x, TEMP[4].xxxx 76: RCP TEMP[9].y, TEMP[4].yyyy 77: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 78: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 79: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 80: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 81: MOV TEMP[8].y, IMM[0].xxxx 82: MOV TEMP[8].x, TEMP[5].xxxx 83: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 84: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 85: MOV TEMP[10].x, TEMP[9].xxxx 86: MOV TEMP[10].y, TEMP[9].xxxx 87: MOV TEMP[10].z, TEMP[5].xxxx 88: MOV TEMP[10].w, TEMP[5].xxxx 89: ADD TEMP[5], TEMP[3], IMM[0].zzzz 90: RCP TEMP[9].x, CONST[5][2].zzzz 91: MOV_SAT TEMP[9].x, TEMP[9].xxxx 92: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 93: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 94: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 95: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 96: MOV TEMP[10].x, TEMP[5].xxxx 97: MOV TEMP[10].y, TEMP[9].xxxx 98: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 99: MOV TEMP[10].z, TEMP[11].xxxx 100: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 101: MOV TEMP[10].w, TEMP[3].xxxx 102: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 103: MAD TEMP[3], TEMP[3], IMM[2].xxxx, CONST[5][6].zwzw 104: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 105: UIF TEMP[11].xxxx :0 106: MOV TEMP[11].x, TEMP[0].zzzz 107: ELSE :0 108: MOV TEMP[11].x, TEMP[0].xxxx 109: ENDIF 110: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 111: UIF TEMP[12].xxxx :0 112: MOV TEMP[12].x, TEMP[0].wwww 113: ELSE :0 114: MOV TEMP[12].x, TEMP[0].yyyy 115: ENDIF 116: MOV TEMP[0].x, TEMP[11].xxxx 117: MOV TEMP[0].y, TEMP[12].xxxx 118: MOV TEMP[5].x, TEMP[5].xxxx 119: MOV TEMP[5].y, TEMP[9].xxxx 120: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 121: RCP TEMP[5].x, TEMP[3].xxxx 122: RCP TEMP[5].y, TEMP[3].yyyy 123: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 124: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 125: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 126: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].xxxx 127: ADD TEMP[0].xy, IMM[2].zzzz, -TEMP[0].xyyy 128: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 129: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 130: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 131: MOV TEMP[3].x, TEMP[5].xxxx 132: MOV TEMP[3].y, TEMP[5].yyyy 133: MOV TEMP[3].z, TEMP[2].xxxx 134: MOV TEMP[3].w, TEMP[2].yyyy 135: MUL TEMP[2].xyz, IN[2].xyzz, IMM[2].wwww 136: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].xxxx, IMM[3].yyyy 137: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].zzzz 138: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].zzzz 139: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].zzzz 140: FSLT TEMP[4].x, IMM[3].wwww, IN[2].xxxx 141: UIF TEMP[4].xxxx :0 142: MOV TEMP[4].x, TEMP[5].xxxx 143: ELSE :0 144: MOV TEMP[4].x, TEMP[2].xxxx 145: ENDIF 146: FSLT TEMP[6].x, IMM[3].wwww, IN[2].yyyy 147: UIF TEMP[6].xxxx :0 148: MOV TEMP[6].x, TEMP[5].yyyy 149: ELSE :0 150: MOV TEMP[6].x, TEMP[2].yyyy 151: ENDIF 152: FSLT TEMP[7].x, IMM[3].wwww, IN[2].zzzz 153: UIF TEMP[7].xxxx :0 154: MOV TEMP[5].x, TEMP[5].zzzz 155: ELSE :0 156: MOV TEMP[5].x, TEMP[2].zzzz 157: ENDIF 158: MOV TEMP[2].x, TEMP[4].xxxx 159: MOV TEMP[2].y, TEMP[6].xxxx 160: MOV TEMP[2].z, TEMP[5].xxxx 161: MOV TEMP[2].w, IN[2].wwww 162: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 163: MUL TEMP[4].xyz, IMM[5].yxxx, CONST[4][20].yzxx 164: MAD TEMP[4].xyz, IMM[5].xyxx, CONST[4][20].zxyy, -TEMP[4].xyzz 165: COS TEMP[5].x, IN[4].xxxx 166: SIN TEMP[6].x, IN[4].xxxx 167: MUL TEMP[7].xyz, TEMP[5].xxxx, TEMP[4].xyzz 168: MAD TEMP[7].xyz, IMM[5].xxyy, TEMP[6].xxxx, TEMP[7].xyzz 169: MUL TEMP[4].xyz, TEMP[6].xxxx, TEMP[4].xyzz 170: MAD TEMP[4].xyz, IMM[5].xxyy, TEMP[5].xxxx, -TEMP[4].xyzz 171: SIN TEMP[5].x, IN[4].yyyy 172: COS TEMP[6].x, IN[4].yyyy 173: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[6].xxxx 174: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].yyyy 175: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[10].xxxx 176: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[4].zzzz 177: MUL TEMP[12].x, TEMP[4].xxxx, TEMP[4].zzzz 178: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx 179: MUL TEMP[13].x, TEMP[5].xxxx, TEMP[4].yyyy 180: MUL TEMP[14].x, TEMP[4].yyyy, TEMP[4].zzzz 181: MUL TEMP[9].x, TEMP[14].xxxx, TEMP[9].xxxx 182: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 183: MUL TEMP[14].x, TEMP[4].xxxx, TEMP[4].xxxx 184: LRP TEMP[14].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[14].xxxx 185: ADD TEMP[15].x, TEMP[10].xxxx, -TEMP[11].xxxx 186: MOV TEMP[14].y, TEMP[15].xxxx 187: ADD TEMP[15].x, TEMP[12].xxxx, TEMP[13].xxxx 188: MOV TEMP[14].z, TEMP[15].xxxx 189: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 190: MUL TEMP[11].x, TEMP[4].yyyy, TEMP[4].yyyy 191: LRP TEMP[11].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[11].xxxx 192: MOV TEMP[10].y, TEMP[11].xxxx 193: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[5].xxxx 194: MOV TEMP[10].z, TEMP[11].xxxx 195: ADD TEMP[11].x, TEMP[12].xxxx, -TEMP[13].xxxx 196: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 197: MOV TEMP[11].y, TEMP[5].xxxx 198: MUL TEMP[5].x, TEMP[4].zzzz, TEMP[4].zzzz 199: LRP TEMP[5].x, TEMP[6].xxxx, IMM[2].zzzz, TEMP[5].xxxx 200: MOV TEMP[11].z, TEMP[5].xxxx 201: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[14].xyzz 202: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[10].xyzz 203: MOV TEMP[5].y, TEMP[6].xxxx 204: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[11].xyzz 205: MOV TEMP[5].z, TEMP[6].xxxx 206: ADD TEMP[6].x, CONST[5][3].yyyy, TEMP[0].xxxx 207: ADD TEMP[0].x, CONST[5][3].zzzz, TEMP[0].yyyy 208: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz 209: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[0].xyzz 210: ADD TEMP[4].xyz, CONST[4][19].xyzz, -IN[3].xyzz 211: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 212: SQRT TEMP[4].x, TEMP[4].xxxx 213: MOV TEMP[5], TEMP[2] 214: MOV TEMP[6].x, IN[4].wwww 215: FSLT TEMP[7].x, IMM[0].xxxx, CONST[5][4].zzzz 216: UIF TEMP[7].xxxx :0 217: MUL TEMP[7].x, TEMP[4].xxxx, CONST[5][0].yyyy 218: FSLT TEMP[9].x, TEMP[7].xxxx, IN[4].wwww 219: ADD TEMP[7].x, IN[4].wwww, -TEMP[7].xxxx 220: ADD TEMP[10].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 221: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 222: RCP TEMP[10].x, TEMP[10].xxxx 223: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[10].xxxx 224: ADD TEMP[7].x, IMM[2].zzzz, -TEMP[7].xxxx 225: MUL TEMP[7], TEMP[7].xxxx, TEMP[2] 226: MUL TEMP[10].x, TEMP[4].xxxx, CONST[5][0].zzzz 227: FSLT TEMP[10].x, TEMP[10].xxxx, IN[4].wwww 228: UIF TEMP[10].xxxx :0 229: MOV TEMP[10], IMM[0].xxxx 230: ELSE :0 231: MOV TEMP[10], TEMP[7] 232: ENDIF 233: UIF TEMP[9].xxxx :0 234: MOV TEMP[7], TEMP[10] 235: ELSE :0 236: MOV TEMP[7], TEMP[2] 237: ENDIF 238: MOV TEMP[5], TEMP[7] 239: MUL TEMP[2].x, TEMP[4].xxxx, CONST[5][0].xxxx 240: MAX TEMP[2].x, IN[4].wwww, TEMP[2].xxxx 241: MUL TEMP[4].x, TEMP[4].xxxx, CONST[5][0].wwww 242: MIN TEMP[6].x, TEMP[2].xxxx, TEMP[4].xxxx 243: ENDIF 244: FSLT TEMP[2].x, TEMP[5].wwww, IMM[5].zzzz 245: UIF TEMP[2].xxxx :0 246: MOV TEMP[2].x, IMM[0].xxxx 247: ELSE :0 248: MOV TEMP[2].x, TEMP[6].xxxx 249: ENDIF 250: MOV TEMP[4].xy, IMM[0].xxxx 251: MOV TEMP[4].w, IMM[0].xxxx 252: TXL TEMP[4], TEMP[4], SAMP[1], 2D 253: MUL TEMP[4].xyz, TEMP[4], IMM[5].wwww 254: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 255: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 256: MOV TEMP[2].w, IMM[2].zzzz 257: MOV TEMP[2].x, TEMP[0].xxxx 258: MOV TEMP[2].y, TEMP[0].yyyy 259: MOV TEMP[2].z, TEMP[0].zzzz 260: MOV TEMP[4].x, CONST[4][32].xxxx 261: MOV TEMP[4].y, CONST[4][33].xxxx 262: MOV TEMP[4].z, CONST[4][34].xxxx 263: MOV TEMP[4].w, CONST[4][35].xxxx 264: DP4 TEMP[4].x, TEMP[2], TEMP[4] 265: MOV TEMP[6].x, CONST[4][32].yyyy 266: MOV TEMP[6].y, CONST[4][33].yyyy 267: MOV TEMP[6].z, CONST[4][34].yyyy 268: MOV TEMP[6].w, CONST[4][35].yyyy 269: DP4 TEMP[6].x, TEMP[2], TEMP[6] 270: MOV TEMP[7].x, CONST[4][32].wwww 271: MOV TEMP[7].y, CONST[4][33].wwww 272: MOV TEMP[7].z, CONST[4][34].wwww 273: MOV TEMP[7].w, CONST[4][35].wwww 274: DP4 TEMP[7].x, TEMP[2], TEMP[7] 275: MAD TEMP[9].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 276: MOV TEMP[10].w, IMM[2].zzzz 277: MOV TEMP[10].x, TEMP[9].xxxx 278: MOV TEMP[10].y, TEMP[9].yyyy 279: MOV TEMP[10].z, TEMP[9].zzzz 280: MOV TEMP[9].xyz, -CONST[4][19].xyzx 281: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[9].xyzz 282: MOV TEMP[12].x, TEMP[0].xxxx 283: MOV TEMP[12].y, TEMP[0].yyyy 284: MOV TEMP[12].z, TEMP[0].zzzz 285: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 286: MOV TEMP[12].w, TEMP[13].xxxx 287: MOV TEMP[13].x, TEMP[4].xxxx 288: MOV TEMP[13].y, TEMP[6].xxxx 289: MOV TEMP[14].x, -CONST[4][22].wwww 290: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 291: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 292: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 293: RCP TEMP[14].x, TEMP[14].xxxx 294: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 295: MOV TEMP[13].z, TEMP[11].xxxx 296: MOV TEMP[13].w, TEMP[7].xxxx 297: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xyzz 298: MOV TEMP[0].xyz, -TEMP[0].xyzx 299: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[0].xyzz 300: RSQ TEMP[9].x, TEMP[9].xxxx 301: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xxxx 302: MOV TEMP[4].x, TEMP[4].xxxx 303: MOV TEMP[4].y, -TEMP[6].xxxx 304: MOV TEMP[6].x, CONST[4][32].zzzz 305: MOV TEMP[6].y, CONST[4][33].zzzz 306: MOV TEMP[6].z, CONST[4][34].zzzz 307: MOV TEMP[6].w, CONST[4][35].zzzz 308: MOV TEMP[9].x, CONST[4][32].wwww 309: MOV TEMP[9].y, CONST[4][33].wwww 310: MOV TEMP[9].z, CONST[4][34].wwww 311: MOV TEMP[9].w, CONST[4][35].wwww 312: MOV TEMP[11].x, CONST[4][32].zzzz 313: MOV TEMP[11].y, CONST[4][33].zzzz 314: MOV TEMP[11].z, CONST[4][34].zzzz 315: MOV TEMP[11].w, CONST[4][35].zzzz 316: DP4 TEMP[6].x, TEMP[10], TEMP[6] 317: DP4 TEMP[9].x, TEMP[10], TEMP[9] 318: RCP TEMP[9].x, TEMP[9].xxxx 319: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 320: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 321: DP4 TEMP[2].x, TEMP[2], TEMP[11] 322: MIN TEMP[2].x, IMM[11].xxxx, TEMP[2].xxxx 323: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[2].xxxx 324: MAD TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx, -TEMP[7].xxxx 325: MOV TEMP[4].z, TEMP[2].xxxx 326: MOV TEMP[4].w, TEMP[7].xxxx 327: MOV OUT[1], TEMP[1] 328: MOV OUT[6].xyz, TEMP[0].xyzx 329: MOV OUT[2], TEMP[3] 330: MOV OUT[3], TEMP[5] 331: MOV OUT[4], TEMP[13] 332: MOV OUT[0], TEMP[4] 333: MOV OUT[5], TEMP[12] 334: MOV OUT[7].xy, TEMP[8].xyxx 335: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 40) %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 48) %50 = call float @llvm.SI.load.const(<16 x i8> %44, i32 52) %51 = call float @llvm.SI.load.const(<16 x i8> %44, i32 56) %52 = call float @llvm.SI.load.const(<16 x i8> %44, i32 72) %53 = call float @llvm.SI.load.const(<16 x i8> %44, i32 96) %54 = call float @llvm.SI.load.const(<16 x i8> %44, i32 100) %55 = call float @llvm.SI.load.const(<16 x i8> %44, i32 104) %56 = call float @llvm.SI.load.const(<16 x i8> %44, i32 108) %57 = call float @llvm.SI.load.const(<16 x i8> %44, i32 112) %58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0 %60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0 %62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)* %64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0 %65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)* %67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0 %68 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0 %76 = add i32 %10, %6 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 2 %80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0 %82 = add i32 %10, %6 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0 %90 = add i32 %10, %6 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0 %97 = add i32 %10, %6 %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97) %99 = extractelement <4 x float> %98, i32 0 %100 = extractelement <4 x float> %98, i32 1 %101 = extractelement <4 x float> %98, i32 3 %102 = fmul float %72, 5.000000e-01 %103 = fadd float %102, 5.000000e-01 %104 = fmul float %73, -5.000000e-01 %105 = fadd float %104, 5.000000e-01 %106 = fdiv float 1.000000e+00, %14 %107 = fmul float %78, 3.000000e+00 %108 = fcmp olt float %78, %16 %. = select i1 %108, float %107, float 0.000000e+00 %109 = fmul float %15, %. %110 = fadd float %109, %13 %111 = fmul float %14, 5.000000e-01 %112 = fmul float %79, %47 %113 = fmul float %111, %112 %114 = call float @llvm.AMDIL.fraction.(float %113) %115 = call float @floor(float %113) %116 = fmul float %115, 2.000000e+00 %117 = fadd float %14, -2.000000e+00 %118 = call float @llvm.minnum.f32(float %117, float %116) %119 = fdiv float 1.000000e+00, %14 %120 = fmul float %116, %119 %121 = call float @floor(float %120) %122 = fmul float %14, %121 %123 = fsub float %116, %122 %124 = bitcast float %110 to i32 %125 = insertelement <4 x i32> , i32 %124, i32 1 %126 = insertelement <4 x i32> %125, i32 0, i32 2 %127 = bitcast <8 x i32> %59 to <32 x i8> %128 = bitcast <4 x i32> %61 to <16 x i8> %129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 2 %131 = fcmp ogt float %130, 0.000000e+00 %temp16.0 = select i1 %131, float %118, float %123 %132 = fmul float %106, %temp16.0 %133 = bitcast float %132 to i32 %134 = bitcast float %110 to i32 %135 = insertelement <4 x i32> undef, i32 %133, i32 0 %136 = insertelement <4 x i32> %135, i32 %134, i32 1 %137 = insertelement <4 x i32> %136, i32 0, i32 2 %138 = bitcast <8 x i32> %59 to <32 x i8> %139 = bitcast <4 x i32> %61 to <16 x i8> %140 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2) %141 = extractelement <4 x float> %140, i32 0 %142 = extractelement <4 x float> %140, i32 3 %143 = call float @llvm.AMDGPU.lrp(float %114, float %142, float %141) %144 = fadd float %110, %15 %145 = bitcast float %132 to i32 %146 = bitcast float %144 to i32 %147 = insertelement <4 x i32> undef, i32 %145, i32 0 %148 = insertelement <4 x i32> %147, i32 %146, i32 1 %149 = insertelement <4 x i32> %148, i32 0, i32 2 %150 = bitcast <8 x i32> %59 to <32 x i8> %151 = bitcast <4 x i32> %61 to <16 x i8> %152 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %149, <32 x i8> %150, <16 x i8> %151, i32 2) %153 = extractelement <4 x float> %152, i32 0 %154 = extractelement <4 x float> %152, i32 1 %155 = extractelement <4 x float> %152, i32 2 %156 = extractelement <4 x float> %152, i32 3 %157 = fmul float %15, 2.000000e+00 %158 = fadd float %157, %110 %159 = bitcast float %132 to i32 %160 = bitcast float %158 to i32 %161 = insertelement <4 x i32> undef, i32 %159, i32 0 %162 = insertelement <4 x i32> %161, i32 %160, i32 1 %163 = insertelement <4 x i32> %162, i32 0, i32 2 %164 = bitcast <8 x i32> %59 to <32 x i8> %165 = bitcast <4 x i32> %61 to <16 x i8> %166 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %163, <32 x i8> %164, <16 x i8> %165, i32 2) %167 = extractelement <4 x float> %166, i32 0 %168 = extractelement <4 x float> %166, i32 1 %169 = extractelement <4 x float> %166, i32 2 %170 = extractelement <4 x float> %166, i32 3 %171 = fadd float %106, %132 %172 = bitcast float %171 to i32 %173 = bitcast float %144 to i32 %174 = insertelement <4 x i32> undef, i32 %172, i32 0 %175 = insertelement <4 x i32> %174, i32 %173, i32 1 %176 = insertelement <4 x i32> %175, i32 0, i32 2 %177 = bitcast <8 x i32> %59 to <32 x i8> %178 = bitcast <4 x i32> %61 to <16 x i8> %179 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %176, <32 x i8> %177, <16 x i8> %178, i32 2) %180 = extractelement <4 x float> %179, i32 0 %181 = extractelement <4 x float> %179, i32 1 %182 = extractelement <4 x float> %179, i32 2 %183 = extractelement <4 x float> %179, i32 3 %184 = fsub float %182, %180 %185 = fsub float %183, %181 %186 = fsub float %155, %153 %187 = fsub float %156, %154 %188 = fdiv float 1.000000e+00, %184 %189 = fdiv float 1.000000e+00, %185 %190 = fmul float %186, %188 %191 = fmul float %187, %189 %192 = fmul float %180, %190 %193 = fmul float %181, %191 %194 = fsub float %153, %192 %195 = fsub float %154, %193 %196 = fadd float %194, %190 %197 = fadd float %195, %191 %198 = fsub float %169, %167 %199 = fsub float %170, %168 %200 = fdiv float 1.000000e+00, %184 %201 = fdiv float 1.000000e+00, %185 %202 = fmul float %198, %200 %203 = fmul float %199, %201 %204 = fmul float %180, %202 %205 = fmul float %181, %203 %206 = fsub float %167, %204 %207 = fsub float %168, %205 %208 = fadd float %206, %202 %209 = fadd float %207, %203 %210 = fmul float %143, 5.000000e-01 %211 = fadd float %210, 5.000000e-01 %212 = fadd float %103, -5.000000e-01 %213 = fadd float %105, -5.000000e-01 %214 = fadd float %103, -5.000000e-01 %215 = fadd float %105, -5.000000e-01 %216 = fdiv float 1.000000e+00, %48 %217 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00) %218 = fmul float %212, %217 %219 = fadd float %218, 5.000000e-01 %220 = fmul float %213, %217 %221 = fadd float %220, 5.000000e-01 %222 = fmul float %214, %217 %223 = fadd float %222, 5.000000e-01 %224 = fmul float %215, %217 %225 = fadd float %224, 5.000000e-01 %226 = call float @llvm.AMDGPU.lrp(float %211, float %219, float %103) %227 = call float @llvm.AMDGPU.lrp(float %211, float %221, float %105) %228 = call float @llvm.AMDGPU.lrp(float %210, float %223, float %103) %229 = call float @llvm.AMDGPU.lrp(float %210, float %225, float %105) %230 = fmul float %55, %226 %231 = fadd float %230, %53 %232 = fmul float %56, %227 %233 = fadd float %232, %54 %234 = fmul float %55, %228 %235 = fadd float %234, %53 %236 = fmul float %56, %229 %237 = fadd float %236, %54 %238 = fsub float %53, %231 %239 = fsub float %54, %233 %240 = fsub float %53, %235 %241 = fsub float %54, %237 %242 = fmul float %238, 2.000000e+00 %243 = fadd float %242, %55 %244 = fmul float %239, 2.000000e+00 %245 = fadd float %244, %56 %246 = fmul float %240, 2.000000e+00 %247 = fadd float %246, %55 %248 = fmul float %241, 2.000000e+00 %249 = fadd float %248, %56 %250 = fcmp olt float %243, 0.000000e+00 %.94 = select i1 %250, float %182, float %180 %251 = fcmp olt float %245, 0.000000e+00 %temp48.0 = select i1 %251, float %183, float %181 %252 = fsub float %.94, %231 %253 = fsub float %temp48.0, %233 %254 = fdiv float 1.000000e+00, %243 %255 = fdiv float 1.000000e+00, %245 %256 = fmul float %252, %254 %257 = fmul float %253, %255 %258 = call float @llvm.AMDIL.clamp.(float %256, float 0.000000e+00, float 1.000000e+00) %259 = call float @llvm.AMDIL.clamp.(float %257, float 0.000000e+00, float 1.000000e+00) %260 = fmul float %258, %243 %261 = fadd float %260, %231 %262 = fmul float %259, %245 %263 = fadd float %262, %233 %264 = fmul float %258, %247 %265 = fadd float %264, %235 %266 = fmul float %259, %249 %267 = fadd float %266, %237 %268 = fmul float %258, 2.000000e+00 %269 = fmul float %259, 2.000000e+00 %270 = fsub float 1.000000e+00, %268 %271 = fsub float 1.000000e+00, %269 %272 = fmul float %270, %72 %273 = fmul float %271, %73 %274 = call float @llvm.AMDGPU.lrp(float %261, float %196, float %194) %275 = call float @llvm.AMDGPU.lrp(float %263, float %197, float %195) %276 = call float @llvm.AMDGPU.lrp(float %265, float %208, float %206) %277 = call float @llvm.AMDGPU.lrp(float %267, float %209, float %207) %278 = fmul float %84, 0x3FB3D07220000000 %279 = fmul float %85, 0x3FB3D07220000000 %280 = fmul float %86, 0x3FB3D07220000000 %281 = fmul float %84, 0x3FEE54EDE0000000 %282 = fadd float %281, 0x3FAAB12320000000 %283 = fmul float %85, 0x3FEE54EDE0000000 %284 = fadd float %283, 0x3FAAB12320000000 %285 = fmul float %86, 0x3FEE54EDE0000000 %286 = fadd float %285, 0x3FAAB12320000000 %287 = call float @llvm.pow.f32(float %282, float 0x4003333340000000) %288 = call float @llvm.pow.f32(float %284, float 0x4003333340000000) %289 = call float @llvm.pow.f32(float %286, float 0x4003333340000000) %290 = fcmp ogt float %84, 0x3FA4B5DCC0000000 %.95 = select i1 %290, float %287, float %278 %291 = fcmp ogt float %85, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %291, float %288, float %279 %292 = fcmp ogt float %86, 0x3FA4B5DCC0000000 %.96 = select i1 %292, float %289, float %280 %293 = call float @llvm.AMDGPU.lrp(float %57, float %.95, float %84) %294 = call float @llvm.AMDGPU.lrp(float %57, float %temp24.0, float %85) %295 = call float @llvm.AMDGPU.lrp(float %57, float %.96, float %86) %296 = call float @llvm.AMDGPU.lrp(float %57, float %87, float %87) %297 = fmul float %24, 0.000000e+00 %298 = fmul float %22, 0.000000e+00 %299 = fmul float %24, 0.000000e+00 %300 = fsub float %299, %23 %301 = fsub float %22, %297 %302 = fmul float %23, 0.000000e+00 %303 = fsub float %302, %298 %304 = call float @llvm.cos.f32(float %99) %305 = call float @llvm.sin.f32(float %99) %306 = fmul float %304, %300 %307 = fmul float %304, %301 %308 = fmul float %304, %303 %309 = fmul float %305, 0.000000e+00 %310 = fadd float %309, %306 %311 = fmul float %305, 0.000000e+00 %312 = fadd float %311, %307 %313 = fadd float %305, %308 %314 = fmul float %305, %300 %315 = fmul float %305, %301 %316 = fmul float %305, %303 %317 = fmul float %304, 0.000000e+00 %318 = fsub float %317, %314 %319 = fmul float %304, 0.000000e+00 %320 = fsub float %319, %315 %321 = fsub float %304, %316 %322 = call float @llvm.sin.f32(float %100) %323 = call float @llvm.cos.f32(float %100) %324 = fsub float 1.000000e+00, %323 %325 = fmul float %318, %320 %326 = fmul float %324, %325 %327 = fmul float %322, %321 %328 = fmul float %318, %321 %329 = fmul float %324, %328 %330 = fmul float %322, %320 %331 = fmul float %320, %321 %332 = fmul float %331, %324 %333 = fmul float %322, %318 %334 = fmul float %318, %318 %335 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %334) %336 = fsub float %326, %327 %337 = fadd float %329, %330 %338 = fadd float %326, %327 %339 = fmul float %320, %320 %340 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %339) %341 = fsub float %332, %333 %342 = fsub float %329, %330 %343 = fadd float %332, %333 %344 = fmul float %321, %321 %345 = call float @llvm.AMDGPU.lrp(float %323, float 1.000000e+00, float %344) %346 = fmul float %310, %335 %347 = fmul float %312, %336 %348 = fadd float %347, %346 %349 = fmul float %313, %337 %350 = fadd float %348, %349 %351 = fmul float %310, %338 %352 = fmul float %312, %340 %353 = fadd float %352, %351 %354 = fmul float %313, %341 %355 = fadd float %353, %354 %356 = fmul float %310, %342 %357 = fmul float %312, %343 %358 = fadd float %357, %356 %359 = fmul float %313, %345 %360 = fadd float %358, %359 %361 = fadd float %50, %272 %362 = fadd float %51, %273 %363 = fmul float %362, %318 %364 = fmul float %362, %320 %365 = fmul float %362, %321 %366 = fmul float %350, %361 %367 = fadd float %366, %363 %368 = fmul float %355, %361 %369 = fadd float %368, %364 %370 = fmul float %360, %361 %371 = fadd float %370, %365 %372 = fsub float %19, %92 %373 = fsub float %20, %93 %374 = fsub float %21, %94 %375 = fmul float %372, %372 %376 = fmul float %373, %373 %377 = fadd float %376, %375 %378 = fmul float %374, %374 %379 = fadd float %377, %378 %380 = call float @llvm.sqrt.f32(float %379) %381 = fcmp ogt float %52, 0.000000e+00 br i1 %381, label %IF83, label %ENDIF82 IF83: ; preds = %main_body %382 = call float @llvm.SI.load.const(<16 x i8> %44, i32 12) %383 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %384 = fmul float %380, %45 %385 = fcmp olt float %384, %101 %386 = fsub float %101, %384 %387 = fsub float %46, %45 %388 = fmul float %387, %380 %389 = fdiv float 1.000000e+00, %388 %390 = fmul float %386, %389 %391 = fsub float 1.000000e+00, %390 %392 = fmul float %391, %293 %393 = fmul float %391, %294 %394 = fmul float %391, %295 %395 = fmul float %391, %296 %396 = fmul float %380, %46 %397 = fcmp olt float %396, %101 %.97 = select i1 %397, float 0.000000e+00, float %392 %.98 = select i1 %397, float 0.000000e+00, float %393 %.99 = select i1 %397, float 0.000000e+00, float %394 %.100 = select i1 %397, float 0.000000e+00, float %395 %.97. = select i1 %385, float %.97, float %293 %.98. = select i1 %385, float %.98, float %294 %.99. = select i1 %385, float %.99, float %295 %.100. = select i1 %385, float %.100, float %296 %398 = fmul float %380, %383 %399 = call float @llvm.maxnum.f32(float %101, float %398) %400 = fmul float %380, %382 %401 = call float @llvm.minnum.f32(float %399, float %400) br label %ENDIF82 ENDIF82: ; preds = %main_body, %IF83 %temp20.1 = phi float [ %.97., %IF83 ], [ %293, %main_body ] %temp21.0 = phi float [ %.98., %IF83 ], [ %294, %main_body ] %temp22.0 = phi float [ %.99., %IF83 ], [ %295, %main_body ] %temp23.0 = phi float [ %.100., %IF83 ], [ %296, %main_body ] %temp24.1 = phi float [ %401, %IF83 ], [ %101, %main_body ] %402 = fcmp olt float %temp23.0, 0x3F70101060000000 %.temp24.1 = select i1 %402, float 0.000000e+00, float %temp24.1 %403 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %64, <16 x i8> %67, i32 2) %404 = extractelement <4 x float> %403, i32 0 %405 = extractelement <4 x float> %403, i32 1 %406 = extractelement <4 x float> %403, i32 2 %407 = fmul float %404, 0x3E7AD7F2A0000000 %408 = fmul float %405, 0x3E7AD7F2A0000000 %409 = fmul float %406, 0x3E7AD7F2A0000000 %410 = fmul float %.temp24.1, %367 %411 = fadd float %410, %92 %412 = fmul float %.temp24.1, %369 %413 = fadd float %412, %93 %414 = fmul float %.temp24.1, %371 %415 = fadd float %414, %94 %416 = fadd float %407, %411 %417 = fadd float %408, %413 %418 = fadd float %409, %415 %419 = fmul float %416, %27 %420 = fmul float %417, %31 %421 = fadd float %419, %420 %422 = fmul float %418, %35 %423 = fadd float %421, %422 %424 = fadd float %423, %39 %425 = fmul float %416, %28 %426 = fmul float %417, %32 %427 = fadd float %425, %426 %428 = fmul float %418, %36 %429 = fadd float %427, %428 %430 = fadd float %429, %40 %431 = fmul float %416, %30 %432 = fmul float %417, %34 %433 = fadd float %431, %432 %434 = fmul float %418, %38 %435 = fadd float %433, %434 %436 = fadd float %435, %42 %437 = fmul float %22, %49 %438 = fadd float %437, %416 %439 = fmul float %23, %49 %440 = fadd float %439, %417 %441 = fmul float %24, %49 %442 = fadd float %441, %418 %443 = fsub float %416, %19 %444 = fsub float %417, %20 %445 = fsub float %418, %21 %446 = fmul float %22, %443 %447 = fmul float %23, %444 %448 = fadd float %447, %446 %449 = fmul float %24, %445 %450 = fadd float %448, %449 %451 = fmul float %443, %22 %452 = fmul float %444, %23 %453 = fadd float %452, %451 %454 = fmul float %445, %24 %455 = fadd float %453, %454 %456 = fsub float %455, %25 %457 = fsub float %26, %25 %458 = fdiv float 1.000000e+00, %457 %459 = fmul float %456, %458 %460 = fsub float %416, %19 %461 = fsub float %417, %20 %462 = fsub float %418, %21 %463 = fmul float %460, %460 %464 = fmul float %461, %461 %465 = fadd float %464, %463 %466 = fmul float %462, %462 %467 = fadd float %465, %466 %468 = call float @llvm.AMDGPU.rsq.clamped.f32(float %467) %469 = fmul float %460, %468 %470 = fsub float -0.000000e+00, %469 %471 = fmul float %461, %468 %472 = fsub float -0.000000e+00, %471 %473 = fmul float %462, %468 %474 = fsub float -0.000000e+00, %473 %475 = fsub float -0.000000e+00, %430 %476 = fmul float %438, %29 %477 = fmul float %440, %33 %478 = fadd float %476, %477 %479 = fmul float %442, %37 %480 = fadd float %478, %479 %481 = fadd float %480, %41 %482 = fmul float %438, %30 %483 = fmul float %440, %34 %484 = fadd float %482, %483 %485 = fmul float %442, %38 %486 = fadd float %484, %485 %487 = fadd float %486, %42 %488 = fdiv float 1.000000e+00, %487 %489 = fmul float %481, %488 %490 = fmul float %489, %436 %491 = fmul float %416, %29 %492 = fmul float %417, %33 %493 = fadd float %491, %492 %494 = fmul float %418, %37 %495 = fadd float %493, %494 %496 = fadd float %495, %41 %497 = call float @llvm.minnum.f32(float %496, float 0x3F50624DE0000000) %498 = call float @llvm.maxnum.f32(float %490, float %497) %499 = fmul float %498, 2.000000e+00 %500 = fsub float %499, %436 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %274, float %275, float %276, float %277) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp20.1, float %temp21.0, float %temp22.0, float %temp23.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %424, float %430, float %459, float %436) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %416, float %417, float %418, float %450) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %470, float %472, float %474, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %143, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %424, float %475, float %500, float %436) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[16:19], s[2:3], 0x4 ; C0880304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v4, s11, v3 ; 4A08060B s_load_dwordx4 s[20:23], s[2:3], 0x10 ; C08A0310 v_mov_b32_e32 v19, 0x3d558919 ; 7E2602FF 3D558919 v_mov_b32_e32 v11, 0x3f72a76f ; 7E1602FF 3F72A76F v_mov_b32_e32 v12, 0x3d9e8391 ; 7E1802FF 3D9E8391 v_mov_b32_e32 v13, 0x3d25aee6 ; 7E1A02FF 3D25AEE6 s_load_dwordx4 s[12:15], s[2:3], 0x14 ; C0860314 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s36, s[16:19], 0x0 ; C2121100 s_buffer_load_dword s37, s[16:19], 0x1 ; C2129101 s_buffer_load_dword s11, s[16:19], 0x2 ; C2059102 s_buffer_load_dword s38, s[16:19], 0x3 ; C2131103 s_load_dwordx4 s[24:27], s[8:9], 0x4 ; C08C0904 s_load_dwordx4 s[28:31], s[8:9], 0x8 ; C08E0908 s_load_dwordx4 s[32:35], s[8:9], 0xc ; C090090C s_load_dwordx4 s[40:43], s[8:9], 0x10 ; C0940910 s_buffer_load_dword s39, s[12:15], 0x8 ; C2138D08 buffer_load_format_xyzw v[7:10], v0, s[0:3], 0 idxen ; E00C2000 80000700 s_buffer_load_dword s10, s[12:15], 0xa ; C2050D0A s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C s_buffer_load_dword s17, s[12:15], 0xd ; C2088D0D s_buffer_load_dword s16, s[12:15], 0xe ; C2080D0E s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[14:17], v4, s[24:27], 0 idxen ; E00C2000 80060E04 buffer_load_format_xyzw v[20:23], v4, s[28:31], 0 idxen ; E00C2000 80071404 buffer_load_format_xyzw v[0:3], v4, s[32:35], 0 idxen ; E00C2000 80080004 s_waitcnt vmcnt(2) ; BF8C0772 v_mul_f32_e64 v15, 0.5, s37 ; D210000F 00004AF0 s_buffer_load_dword s19, s[12:15], 0x1c ; C2098D1C s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 v_add_f32_e64 v9, -2.0, s37 ; D2060009 00004AF5 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[40:43], 0 idxen ; E00C2000 800A0304 v_rcp_f32_e32 v24, s37 ; 7E305425 v_mov_b32_e32 v32, s36 ; 7E400224 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v5, 1.0, s19 ; D2080005 000026F2 v_cmp_gt_f32_e64 s[8:9], s38, v14 ; D0080008 00021C26 v_cmp_gt_f32_e64 s[2:3], v20, v13 ; D0080002 00021B14 v_cmp_gt_f32_e64 s[0:1], v21, v13 ; D0080000 00021B15 v_cmp_gt_f32_e32 vcc, v22, v13 ; 7C081B16 v_mul_f32_e32 v10, 0x40400000, v14 ; 10141CFF 40400000 v_cndmask_b32_e64 v10, 0, v10, s[8:9] ; D200000A 00221480 v_mac_f32_e32 v32, s11, v10 ; 3E40140B v_mov_b32_e32 v31, 0 ; 7E3E0280 v_mul_f32_e32 v13, s39, v16 ; 101A2027 v_mov_b32_e32 v33, v31 ; 7E42031F image_sample_l v10, 4, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[28:35], s[24:27] ; F0900400 00C70A1F s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e64 s[8:9], 0, v10 ; D0020008 00021480 v_mul_f32_e32 v10, v13, v15 ; 10141F0D v_floor_f32_e32 v14, v10 ; 7E1C490A v_add_f32_e32 v10, v14, v14 ; 06141D0E v_min_f32_e32 v9, v10, v9 ; 1E12130A v_mul_f32_e32 v16, v24, v10 ; 10201518 v_floor_f32_e32 v16, v16 ; 7E204910 v_mad_f32 v10, -s37, v16, v10 ; D282000A 242A2025 v_cndmask_b32_e64 v9, v10, v9, s[8:9] ; D2000009 0022130A v_mul_f32_e32 v29, v9, v24 ; 103A3109 v_mad_f32 v27, v11, v20, v19 ; D282001B 044E290B v_mad_f32 v28, v11, v21, v19 ; D282001C 044E2B0B v_mac_f32_e32 v19, v11, v22 ; 3E262D0B v_mov_b32_e32 v30, v32 ; 7E3C0320 v_add_f32_e32 v25, s11, v32 ; 0632400B image_sample_l v[16:17], 9, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[28:35], s[24:27] ; F0900900 00C7101D v_mov_b32_e32 v30, v25 ; 7E3C0319 v_mac_f32_e32 v24, v9, v24 ; 3E303109 image_sample_l v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[28:35], s[24:27] ; F0900F00 00C7211D v_mov_b32_e32 v30, v32 ; 7E3C0320 v_mac_f32_e64 v30, 2.0, s11 ; D23E001E 000016F4 v_mul_f32_e32 v32, v12, v20 ; 1040290C v_mul_f32_e32 v37, v12, v21 ; 104A2B0C v_mul_f32_e32 v38, v12, v22 ; 104C2D0C v_mul_f32_e32 v9, v20, v5 ; 10120B14 v_mul_f32_e32 v10, v21, v5 ; 10140B15 v_mul_f32_e32 v11, v22, v5 ; 10160B16 v_mul_f32_e32 v12, v23, v5 ; 10180B17 v_mac_f32_e32 v12, s19, v23 ; 3E182E13 v_mov_b32_e32 v26, v31 ; 7E34031F image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[28:35], s[24:27] ; F0900F00 00C7141D image_sample_l v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[28:35], s[24:27] ; F0900F00 00C72718 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v5, v39, v41 ; 0A0A5327 v_rcp_f32_e32 v24, v5 ; 7E305505 v_subrev_f32_e32 v5, v40, v42 ; 0A0A5528 v_rcp_f32_e32 v25, v5 ; 7E325505 v_subrev_f32_e32 v18, v33, v35 ; 0A244721 v_mul_f32_e32 v5, v24, v18 ; 100A2518 v_mad_f32 v26, -v39, v5, v33 ; D282001A 24860B27 v_subrev_f32_e32 v29, v34, v36 ; 0A3A4922 v_mul_f32_e32 v5, v25, v29 ; 100A3B19 v_mad_f32 v30, -v40, v5, v34 ; D282001E 248A0B28 v_subrev_f32_e32 v22, v20, v22 ; 0A2C2D14 v_mul_f32_e32 v5, v24, v22 ; 100A2D18 v_mad_f32 v20, -v39, v5, v20 ; D2820014 24520B27 v_subrev_f32_e32 v23, v21, v23 ; 0A2E2F15 v_mul_f32_e32 v5, v25, v23 ; 100A2F19 v_mad_f32 v21, -v40, v5, v21 ; D2820015 24560B28 v_mad_f32 v13, v15, v13, -v14 ; D282000D 843A1B0F v_rcp_f32_e32 v14, s10 ; 7E1C540A v_sub_f32_e32 v5, 1.0, v13 ; 080A1AF2 v_mul_f32_e32 v5, v16, v5 ; 100A0B10 v_mac_f32_e32 v5, v17, v13 ; 3E0A1B11 v_add_f32_e64 v13, 0, v14 clamp ; D206080D 00021C80 v_mad_f32 v14, 0.5, v7, 0.5 ; D282000E 03C20EF0 v_add_f32_e32 v15, -0.5, v14 ; 061E1CF1 v_mad_f32 v15, v15, v13, 0.5 ; D282000F 03C21B0F s_buffer_load_dword s24, s[12:15], 0x18 ; C20C0D18 s_buffer_load_dword s25, s[12:15], 0x19 ; C20C8D19 s_buffer_load_dword s26, s[12:15], 0x1a ; C20D0D1A s_buffer_load_dword s27, s[12:15], 0x1b ; C20D8D1B v_mad_f32 v16, -0.5, v8, 0.5 ; D2820010 03C210F1 v_add_f32_e32 v17, -0.5, v16 ; 062220F1 v_mad_f32 v13, v17, v13, 0.5 ; D282000D 03C21B11 v_mad_f32 v17, 0.5, v5, 0.5 ; D2820011 03C20AF0 v_sub_f32_e32 v31, 1.0, v17 ; 083E22F2 v_mul_f32_e32 v33, v14, v31 ; 10423F0E v_mul_f32_e32 v31, v16, v31 ; 103E3F10 v_mac_f32_e32 v33, v15, v17 ; 3E42230F v_mac_f32_e32 v31, v13, v17 ; 3E3E230D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s24 ; 7E220218 v_mad_f32 v33, s26, v33, v17 ; D2820021 0446421A v_mov_b32_e32 v17, s25 ; 7E220219 v_mad_f32 v31, s27, v31, v17 ; D282001F 04463E1B v_sub_f32_e32 v17, s24, v33 ; 08224218 v_mad_f32 v34, 2.0, v17, s26 ; D2820022 006A22F4 v_sub_f32_e32 v17, s25, v31 ; 08223E19 v_mad_f32 v35, 2.0, v17, s27 ; D2820023 006E22F4 v_cmp_gt_f32_e64 s[8:9], 0, v34 ; D0080008 00024480 v_cmp_gt_f32_e64 s[10:11], 0, v35 ; D008000A 00024680 v_cndmask_b32_e64 v17, v39, v41, s[8:9] ; D2000011 00225327 v_cndmask_b32_e64 v36, v40, v42, s[10:11] ; D2000024 002A5528 v_mad_f32 v39, 0.5, -v5, 1.0 ; D2820027 43CA0AF0 v_mul_f32_e32 v40, v14, v39 ; 10504F0E v_mul_f32_e32 v16, v16, v39 ; 10204F10 v_mul_f32_e32 v14, 0.5, v5 ; 101C0AF0 v_rcp_f32_e32 v39, v34 ; 7E4E5522 v_mac_f32_e32 v40, v15, v14 ; 3E501D0F v_mac_f32_e32 v16, v13, v14 ; 3E201D0D v_subrev_f32_e32 v13, v33, v17 ; 0A1A2321 v_mul_f32_e32 v13, v39, v13 ; 101A1B27 v_add_f32_e64 v17, 0, v13 clamp ; D2060811 00021A80 v_mac_f32_e32 v33, v34, v17 ; 3E422322 v_mad_f32 v14, v24, v18, v26 ; D282000E 046A2518 v_rcp_f32_e32 v15, v35 ; 7E1E5523 v_sub_f32_e32 v13, 1.0, v33 ; 081A42F2 v_mul_f32_e32 v13, v26, v13 ; 101A1B1A v_mac_f32_e32 v13, v14, v33 ; 3E1A430E v_subrev_f32_e32 v14, v31, v36 ; 0A1C491F v_mul_f32_e32 v14, v15, v14 ; 101C1D0F v_add_f32_e64 v18, 0, v14 clamp ; D2060812 00021C80 v_mac_f32_e32 v31, v35, v18 ; 3E3E2523 v_mad_f32 v15, v25, v29, v30 ; D282000F 047A3B19 v_sub_f32_e32 v14, 1.0, v31 ; 081C3EF2 v_mul_f32_e32 v14, v30, v14 ; 101C1D1E v_mac_f32_e32 v14, v15, v31 ; 3E1C3F0F v_mov_b32_e32 v15, s24 ; 7E1E0218 v_mad_f32 v26, s26, v40, v15 ; D282001A 043E501A v_mad_f32 v22, v24, v22, v20 ; D2820016 04522D18 v_sub_f32_e32 v15, s24, v26 ; 081E3418 v_mad_f32 v15, 2.0, v15, s26 ; D282000F 006A1EF4 v_mac_f32_e32 v26, v15, v17 ; 3E34230F v_sub_f32_e32 v15, 1.0, v26 ; 081E34F2 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mac_f32_e32 v15, v22, v26 ; 3E1E3516 v_mov_b32_e32 v20, s25 ; 7E280219 v_mad_f32 v20, s27, v16, v20 ; D2820014 0452201B v_mad_f32 v22, v25, v23, v21 ; D2820016 04562F19 v_sub_f32_e32 v16, s25, v20 ; 08202819 v_mad_f32 v16, 2.0, v16, s27 ; D2820010 006E20F4 v_mac_f32_e32 v20, v16, v18 ; 3E282510 v_sub_f32_e32 v16, 1.0, v20 ; 082028F2 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mac_f32_e32 v16, v22, v20 ; 3E202916 v_log_f32_e32 v20, v27 ; 7E284F1B v_log_f32_e32 v21, v28 ; 7E2A4F1C v_log_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v22, 0x4019999a ; 7E2C02FF 4019999A v_mul_legacy_f32_e32 v20, v22, v20 ; 0E282916 v_mul_legacy_f32_e32 v21, v22, v21 ; 0E2A2B16 v_mul_legacy_f32_e32 v19, v22, v19 ; 0E262716 v_exp_f32_e32 v20, v20 ; 7E284B14 v_cndmask_b32_e64 v20, v32, v20, s[2:3] ; D2000014 000A2920 v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_cndmask_b32_e64 v21, v37, v21, s[0:1] ; D2000015 00022B25 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e32 v19, v38, v19 ; 00262726 v_mac_f32_e32 v9, s19, v20 ; 3E122813 v_mac_f32_e32 v10, s19, v21 ; 3E142A13 v_mac_f32_e32 v11, s19, v19 ; 3E162613 s_buffer_load_dword s27, s[20:23], 0x4c ; C20D954C s_buffer_load_dword s28, s[20:23], 0x4d ; C20E154D s_buffer_load_dword s29, s[20:23], 0x4e ; C20E954E s_buffer_load_dword s3, s[20:23], 0x50 ; C2019550 s_buffer_load_dword s8, s[20:23], 0x51 ; C2041551 s_buffer_load_dword s26, s[20:23], 0x52 ; C20D1552 s_buffer_load_dword s30, s[20:23], 0x5b ; C20F155B s_buffer_load_dword s0, s[20:23], 0x5d ; C200155D s_buffer_load_dword s32, s[20:23], 0x80 ; C2101580 s_buffer_load_dword s31, s[20:23], 0x81 ; C20F9581 s_buffer_load_dword s2, s[20:23], 0x82 ; C2011582 s_buffer_load_dword s9, s[20:23], 0x83 ; C2049583 s_buffer_load_dword s35, s[20:23], 0x84 ; C2119584 s_buffer_load_dword s33, s[20:23], 0x85 ; C2109585 s_buffer_load_dword s19, s[20:23], 0x86 ; C2099586 s_buffer_load_dword s24, s[20:23], 0x87 ; C20C1587 s_buffer_load_dword s36, s[20:23], 0x88 ; C2121588 s_buffer_load_dword s34, s[20:23], 0x89 ; C2111589 s_buffer_load_dword s10, s[20:23], 0x8a ; C205158A s_buffer_load_dword s25, s[20:23], 0x8b ; C20C958B s_buffer_load_dword s38, s[20:23], 0x8c ; C213158C s_buffer_load_dword s37, s[20:23], 0x8d ; C212958D s_buffer_load_dword s11, s[20:23], 0x8e ; C205958E s_buffer_load_dword s20, s[20:23], 0x8f ; C20A158F v_mov_b32_e32 v19, 0x3e22f983 ; 7E2602FF 3E22F983 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_fract_f32_e32 v3, v3 ; 7E064103 v_fract_f32_e32 v24, v4 ; 7E304104 v_cos_f32_e32 v4, v3 ; 7E086D03 v_sin_f32_e32 v19, v3 ; 7E266B03 v_cos_f32_e32 v25, v24 ; 7E326D18 v_sub_f32_e32 v3, 1.0, v25 ; 080632F2 v_mul_f32_e32 v20, v4, v4 ; 10280904 v_mad_f32 v20, v20, v3, v25 ; D2820014 04660714 s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v21, v19, -s8 ; D2100015 40001113 v_mul_f32_e32 v22, v21, v21 ; 102C2B15 v_mad_f32 v26, v22, v3, v25 ; D282001A 04660716 v_mul_f32_e32 v22, s3, v19 ; 102C2603 v_mul_f32_e32 v23, v22, v22 ; 102E2D16 v_mad_f32 v23, v23, v3, v25 ; D2820017 04660717 v_mov_b32_e32 v3, 0x80000000 ; 7E0602FF 80000000 v_xor_b32_e32 v21, v21, v3 ; 3A2A0715 v_xor_b32_e32 v22, v22, v3 ; 3A2C0716 v_xor_b32_e32 v28, s8, v3 ; 3A380608 v_sin_f32_e32 v24, v24 ; 7E306B18 v_cmp_lt_f32_e64 s[22:23], 0, s1 ; D0020016 00000280 v_mov_b32_e32 v27, s0 ; 7E360200 v_mov_b32_e32 v3, s18 ; 7E060212 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v29, s27, v0 ; 083A001B v_sub_f32_e32 v30, s28, v1 ; 083C021C v_sub_f32_e32 v31, s29, v2 ; 083E041D v_mul_f32_e32 v29, v29, v29 ; 103A3B1D s_buffer_load_dword s0, s[12:15], 0x1 ; C2000D01 s_buffer_load_dword s1, s[12:15], 0x2 ; C2008D02 s_buffer_load_dword s18, s[12:15], 0x0 ; C2090D00 s_buffer_load_dword s21, s[12:15], 0x3 ; C20A8D03 v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E v_mac_f32_e32 v29, v31, v31 ; 3E3A3F1F v_rsq_f32_e32 v30, v29 ; 7E3C5D1D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v31, s0 ; 7E3E0200 v_sub_f32_e32 v31, s1, v31 ; 083E3E01 v_rcp_f32_e32 v31, v31 ; 7E3E551F v_sqrt_f32_e32 v29, v29 ; 7E3A671D v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_mul_f32_e32 v31, s0, v29 ; 103E3A00 v_mad_f32 v32, v29, s0, -v6 ; D2820020 8418011D v_mad_f32 v30, v32, v30, 1.0 ; D282001E 03CA3D20 v_cmp_lt_f32_e32 vcc, v31, v6 ; 7C020D1F v_mul_f32_e32 v31, s1, v29 ; 103E3A01 v_mul_f32_e32 v32, s18, v29 ; 10403A12 v_max_f32_e32 v32, v32, v6 ; 20400D20 v_cmp_lt_f32_e64 s[0:1], v31, v6 ; D0020000 00020D1F v_mul_f32_e32 v6, v9, v30 ; 100C3D09 v_mul_f32_e32 v31, v10, v30 ; 103E3D0A v_mul_f32_e32 v33, v11, v30 ; 10423D0B v_mul_f32_e32 v30, v12, v30 ; 103C3D0C v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_cndmask_b32_e64 v31, v31, 0, s[0:1] ; D200001F 0001011F v_cndmask_b32_e64 v33, v33, 0, s[0:1] ; D2000021 00010121 v_cndmask_b32_e64 v30, v30, 0, s[0:1] ; D200001E 0001011E v_cndmask_b32_e32 v9, v9, v6 ; 00120D09 v_cndmask_b32_e32 v10, v10, v31 ; 00143F0A v_cndmask_b32_e32 v11, v11, v33 ; 0016430B v_cndmask_b32_e32 v12, v12, v30 ; 00183D0C v_mul_f32_e32 v6, s21, v29 ; 100C3A15 v_min_f32_e32 v6, v6, v32 ; 1E0C4106 s_or_b64 exec, exec, s[22:23] ; 88FE167E v_mad_f32 v17, -2.0, v17, 1.0 ; D2820011 03CA22F5 v_mad_f32 v7, v17, v7, s17 ; D2820007 00460F11 v_mad_f32 v17, -2.0, v18, 1.0 ; D2820011 03CA24F5 v_mad_f32 v8, v17, v8, s16 ; D2820008 00421111 v_mul_f32_e32 v17, v28, v4 ; 1022091C v_mac_f32_e32 v19, 0, v4 ; 3E260880 v_sub_f32_e32 v18, 1.0, v25 ; 082432F2 v_mul_f32_e32 v25, v26, v17 ; 1032231A v_subrev_f32_e32 v26, s30, v27 ; 0A34361E v_rcp_f32_e32 v26, v26 ; 7E34551A s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 v_mul_f32_e32 v27, s3, v4 ; 10360803 v_mul_f32_e32 v28, v22, v21 ; 10382B16 v_mul_f32_e32 v29, v4, v24 ; 103A3104 v_mul_f32_e32 v30, v4, v21 ; 103C2B04 v_mov_b32_e32 v31, 0x3b808083 ; 7E3E02FF 3B808083 v_cmp_gt_f32_e32 vcc, v31, v12 ; 7C08191F v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mad_f32 v31, v18, v28, -v29 ; D282001F 84763912 v_mac_f32_e32 v29, v28, v18 ; 3E3A251C v_mul_f32_e32 v28, v22, v24 ; 10383116 v_mad_f32 v32, v30, v18, v28 ; D2820020 0472251E v_mad_f32 v28, v18, v30, -v28 ; D282001C 84723D12 v_mul_f32_e32 v24, v21, v24 ; 10303115 v_mul_f32_e32 v30, v4, v22 ; 103C2D04 v_mad_f32 v33, v30, v18, -v24 ; D2820021 8462251E v_mac_f32_e32 v24, v18, v30 ; 3E303D12 v_mac_f32_e32 v25, v31, v27 ; 3E32371F v_mul_f32_e32 v18, v29, v17 ; 1024231D v_mac_f32_e32 v18, v23, v27 ; 3E243717 v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mac_f32_e32 v17, v24, v27 ; 3E223718 v_mac_f32_e32 v25, v32, v19 ; 3E322720 v_mac_f32_e32 v18, v33, v19 ; 3E242721 v_mac_f32_e32 v17, v20, v19 ; 3E222714 v_mul_f32_e32 v19, v21, v8 ; 10261115 v_mul_f32_e32 v20, v22, v8 ; 10281116 v_mul_f32_e32 v4, v4, v8 ; 10081104 v_mac_f32_e32 v19, v7, v25 ; 3E263307 v_mac_f32_e32 v20, v7, v18 ; 3E282507 v_mov_b32_e32 v21, 0 ; 7E2A0280 v_mac_f32_e32 v4, v7, v17 ; 3E082307 v_mov_b32_e32 v22, v21 ; 7E2C0315 v_mad_f32 v0, v19, v6, v0 ; D2820000 04020D13 v_mad_f32 v1, v20, v6, v1 ; D2820001 04060D14 v_mac_f32_e32 v2, v4, v6 ; 3E040D04 v_mov_b32_e32 v23, v21 ; 7E2E0315 v_mov_b32_e32 v4, 0x33d6bf95 ; 7E0802FF 33D6BF95 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[40:47], s[12:15] ; F0900700 006A0615 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v4, v6 ; 3E000D04 v_mac_f32_e32 v1, v4, v7 ; 3E020F04 v_mac_f32_e32 v2, v4, v8 ; 3E041104 exp 15, 32, 0, 0, 0, v21, v21, v21, v21 ; F800020F 15151515 exp 15, 33, 0, 0, 0, v13, v14, v15, v16 ; F800021F 100F0E0D exp 15, 34, 0, 0, 0, v9, v10, v11, v12 ; F800022F 0C0B0A09 v_mul_f32_e32 v4, s35, v1 ; 10080223 v_mac_f32_e32 v4, s32, v0 ; 3E080020 v_mac_f32_e32 v4, s36, v2 ; 3E080424 v_add_f32_e32 v4, s38, v4 ; 06080826 v_mul_f32_e32 v6, s33, v1 ; 100C0221 v_mac_f32_e32 v6, s31, v0 ; 3E0C001F v_mac_f32_e32 v6, s34, v2 ; 3E0C0422 v_add_f32_e32 v6, s37, v6 ; 060C0C25 v_subrev_f32_e32 v7, s27, v0 ; 0A0E001B v_subrev_f32_e32 v8, s28, v1 ; 0A10021C s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v9, s29, v2 ; 0A12041D v_mul_f32_e32 v10, s3, v7 ; 10140E03 v_mac_f32_e32 v10, s8, v8 ; 3E141008 v_mac_f32_e32 v10, s26, v9 ; 3E14121A v_subrev_f32_e32 v11, s30, v10 ; 0A16141E v_mul_f32_e32 v11, v26, v11 ; 1016171A v_mul_f32_e32 v12, s24, v1 ; 10180218 v_mac_f32_e32 v12, s9, v0 ; 3E180009 v_mac_f32_e32 v12, s25, v2 ; 3E180419 v_add_f32_e32 v12, s20, v12 ; 06181814 exp 15, 35, 0, 0, 0, v4, v6, v11, v12 ; F800023F 0C0B0604 exp 15, 36, 0, 0, 0, v0, v1, v2, v10 ; F800024F 0A020100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v10, v7, v7 ; 10140F07 v_mac_f32_e32 v10, v8, v8 ; 3E141108 v_mac_f32_e32 v10, v9, v9 ; 3E141309 v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A v_mul_f32_e32 v11, s19, v1 ; 10160213 v_mac_f32_e32 v11, s2, v0 ; 3E160002 v_mac_f32_e32 v11, s10, v2 ; 3E16040A v_mad_f32 v2, s26, v3, v2 ; D2820002 040A061A v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_mul_f32_e32 v8, v10, v8 ; 1010110A v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_xor_b32_e32 v7, v7, v10 ; 3A0E1507 v_xor_b32_e32 v8, v8, v10 ; 3A101508 v_xor_b32_e32 v9, v9, v10 ; 3A121509 exp 15, 37, 0, 0, 0, v7, v8, v9, v21 ; F800025F 15090807 v_mad_f32 v0, s3, v3, v0 ; D2820000 04020603 v_mad_f32 v1, s8, v3, v1 ; D2820001 04060608 v_xor_b32_e32 v3, v6, v10 ; 3A061506 v_mul_f32_e32 v6, s19, v1 ; 100C0213 v_mul_f32_e32 v1, s24, v1 ; 10020218 v_mac_f32_e32 v1, s9, v0 ; 3E020009 v_mac_f32_e32 v1, s25, v2 ; 3E020419 v_add_f32_e32 v1, s20, v1 ; 06020214 v_rcp_f32_e32 v1, v1 ; 7E025501 v_mac_f32_e32 v6, s2, v0 ; 3E0C0002 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_add_f32_e32 v0, s11, v6 ; 06000C0B v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_add_f32_e32 v1, s11, v11 ; 0602160B v_min_f32_e32 v1, 0x3a83126f, v1 ; 1E0202FF 3A83126F v_max_f32_e32 v0, v1, v0 ; 20000101 exp 15, 38, 0, 0, 0, v5, v21, v21, v21 ; F800026F 15151505 v_mad_f32 v0, 2.0, v0, -v12 ; D2820000 843200F4 exp 15, 12, 0, 0, 0, v4, v3, v0, v12 ; F80000CF 0C000304 exp 15, 13, 0, 1, 0, v21, v21, v21, v21 ; F80008DF 15151515 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 44 Code Size: 2052 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..4], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 2.0000, 0.0000} IMM[1] UINT32 {0, 12, 28, 44} IMM[2] UINT32 {60, 16, 32, 48} IMM[3] UINT32 {4, 20, 36, 52} IMM[4] UINT32 {8, 24, 40, 56} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MOV TEMP[0].w, IMM[0].yyyy 4: MOV TEMP[0].x, IN[0].xxxx 5: MOV TEMP[0].y, IN[0].yyyy 6: MOV TEMP[0].z, IN[0].zzzz 7: MOV TEMP[2].x, CONST[1][0].wwww 8: MOV TEMP[2].y, CONST[1][1].wwww 9: MOV TEMP[2].z, CONST[1][2].wwww 10: MOV TEMP[2].w, CONST[1][3].wwww 11: DP4 TEMP[2].x, TEMP[0], TEMP[2] 12: MOV TEMP[3].x, CONST[1][0].xxxx 13: MOV TEMP[3].y, CONST[1][1].xxxx 14: MOV TEMP[3].z, CONST[1][2].xxxx 15: MOV TEMP[3].w, CONST[1][3].xxxx 16: DP4 TEMP[3].x, TEMP[0], TEMP[3] 17: MOV TEMP[4].x, CONST[1][0].yyyy 18: MOV TEMP[4].y, CONST[1][1].yyyy 19: MOV TEMP[4].z, CONST[1][2].yyyy 20: MOV TEMP[4].w, CONST[1][3].yyyy 21: DP4 TEMP[4].x, TEMP[0], TEMP[4] 22: MOV TEMP[3].y, -TEMP[4].xxxx 23: MOV TEMP[4].x, CONST[1][0].zzzz 24: MOV TEMP[4].y, CONST[1][1].zzzz 25: MOV TEMP[4].z, CONST[1][2].zzzz 26: MOV TEMP[4].w, CONST[1][3].zzzz 27: DP4 TEMP[0].x, TEMP[0], TEMP[4] 28: MAD TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz, -TEMP[2].xxxx 29: MOV TEMP[3].z, TEMP[0].xxxx 30: MOV TEMP[3].w, TEMP[2].xxxx 31: MOV OUT[1], TEMP[1] 32: MOV OUT[0], TEMP[3] 33: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = fmul float %33, %16 %37 = fmul float %34, %20 %38 = fadd float %36, %37 %39 = fmul float %35, %24 %40 = fadd float %38, %39 %41 = fadd float %40, %28 %42 = fmul float %33, %13 %43 = fmul float %34, %17 %44 = fadd float %42, %43 %45 = fmul float %35, %21 %46 = fadd float %44, %45 %47 = fadd float %46, %25 %48 = fmul float %33, %14 %49 = fmul float %34, %18 %50 = fadd float %48, %49 %51 = fmul float %35, %22 %52 = fadd float %50, %51 %53 = fadd float %52, %26 %54 = fsub float -0.000000e+00, %53 %55 = fmul float %33, %15 %56 = fmul float %34, %19 %57 = fadd float %55, %56 %58 = fmul float %35, %23 %59 = fadd float %57, %58 %60 = fadd float %59, %27 %61 = fmul float %60, 2.000000e+00 %62 = fsub float %61, %41 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %47, float %54, float %62, float %41) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 v_mov_b32_e32 v1, 0 ; 7E020280 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200 exp 15, 32, 0, 0, 0, v1, v1, v1, v1 ; F800020F 01010101 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107 s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101 s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102 s_buffer_load_dword s12, s[0:3], 0x3 ; C2060103 s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104 s_buffer_load_dword s14, s[0:3], 0xa ; C207010A s_buffer_load_dword s15, s[0:3], 0xb ; C207810B s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s17, s[0:3], 0xd ; C208810D s_buffer_load_dword s18, s[0:3], 0xe ; C209010E s_buffer_load_dword s0, s[0:3], 0xf ; C200010F s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_mul_f32_e32 v0, s6, v3 ; 10000606 v_mac_f32_e32 v0, s12, v2 ; 3E00040C v_mul_f32_e32 v5, s13, v3 ; 100A060D v_mac_f32_e32 v5, s9, v2 ; 3E0A0409 v_mul_f32_e32 v6, s4, v3 ; 100C0604 v_mac_f32_e32 v6, s10, v2 ; 3E0C040A v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mac_f32_e32 v3, s11, v2 ; 3E06040B v_mac_f32_e32 v0, s15, v4 ; 3E00080F v_mac_f32_e32 v5, s7, v4 ; 3E0A0807 v_mac_f32_e32 v6, s8, v4 ; 3E0C0808 v_mac_f32_e32 v3, s14, v4 ; 3E06080E v_add_f32_e32 v0, s0, v0 ; 06000000 v_add_f32_e32 v2, s16, v5 ; 06040A10 v_add_f32_e32 v4, s17, v6 ; 06080C11 v_add_f32_e32 v3, s18, v3 ; 06060612 v_xor_b32_e32 v4, 0x80000000, v4 ; 3A0808FF 80000000 v_mad_f32 v3, 2.0, v3, -v0 ; D2820003 840206F4 exp 15, 12, 0, 0, 0, v2, v4, v3, v0 ; F80000CF 00030402 exp 15, 13, 0, 1, 0, v1, v1, v1, v1 ; F80008DF 01010101 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 24 VGPRS: 8 Code Size: 204 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyyx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2 v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 28 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR DCL CONST[1][0..41] DCL CONST[2][0..13] DCL CONST[3][0] IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xyyx 1: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2 v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 8 VGPRS: 4 Code Size: 28 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL CONST[4] DCL CONST[1][0..8] DCL CONST[2][0] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..16] DCL TEMP[0] DCL TEMP[1..6], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, -0.5000, 2.0000} IMM[1] UINT32 {3, 400, 304, 0} IMM[2] UINT32 {320, 12, 64, 60} IMM[3] FLT32 { 1.0000, 3.0000, 0.0000, 0.0000} IMM[4] UINT32 {36, 28, 16, 0} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: ADD TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx 3: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4][25].xyyy 4: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[4][19].xyzz 5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[3].xxxx 7: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].xyz, TEMP[3], SAMP[1], 2D 11: MOV TEMP[4].xy, TEMP[1].xyyy 12: MOV TEMP[4].w, IMM[0].yyyy 13: TXL TEMP[4].xyz, TEMP[4], SAMP[2], 2D 14: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz 15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].wwww 16: MOV TEMP[1].xy, TEMP[1].xyyy 17: MOV TEMP[1].w, IMM[0].yyyy 18: TXL TEMP[1].x, TEMP[1], SAMP[0], 2D 19: DP3 TEMP[5].x, CONST[4][20].xyzz, TEMP[2].xyzz 20: RCP TEMP[5].xyz, TEMP[5].xxxx 21: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz 22: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[4][19].xyzz 23: ADD TEMP[1].xyz, CONST[1][0].xyzz, -TEMP[1].xyzz 24: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz 25: RSQ TEMP[2].x, TEMP[2].xxxx 26: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[2].xxxx 27: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 28: SQRT TEMP[1].x, TEMP[1].xxxx 29: ADD TEMP[1].x, CONST[1][0].wwww, -TEMP[1].xxxx 30: FSLT TEMP[5].x, TEMP[1].xxxx, IMM[0].yyyy 31: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 32: KILL_IF -TEMP[5].xxxx 33: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[2].xyzz 34: FSLT TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy 35: AND TEMP[5].x, TEMP[5].xxxx, IMM[3].xxxx 36: KILL_IF -TEMP[5].xxxx 37: MUL TEMP[5].x, CONST[1][4].xxxx, TEMP[2].yyyy 38: MAD TEMP[5].x, CONST[1][3].wwww, TEMP[2].xxxx, -TEMP[5].xxxx 39: MUL TEMP[6].x, CONST[1][4].xxxx, TEMP[2].xxxx 40: MAD TEMP[2].x, CONST[1][3].wwww, TEMP[2].yyyy, TEMP[6].xxxx 41: MOV TEMP[5].y, TEMP[2].xxxx 42: RCP TEMP[2].x, CONST[1][0].wwww 43: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 44: RCP TEMP[2].x, CONST[1][2].yyyy 45: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 46: MOV_SAT TEMP[1].x, TEMP[1].xxxx 47: MUL TEMP[2].x, IMM[0].wwww, TEMP[1].xxxx 48: ADD TEMP[2].x, IMM[3].yyyy, -TEMP[2].xxxx 49: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx 50: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 51: MUL TEMP[1].xyz, CONST[1][1].wwww, TEMP[1].xxxx 52: MAD TEMP[2].xy, IMM[0].xxxx, TEMP[5].xyyy, IMM[0].xxxx 53: MOV TEMP[2].xy, TEMP[2].xyyy 54: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 55: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz 56: MUL TEMP[2].xyz, CONST[1][1].xyzz, TEMP[2].xyzz 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx 58: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 59: MOV TEMP[2].w, IMM[3].xxxx 60: MOV TEMP[2].x, TEMP[1].xxxx 61: MOV TEMP[2].y, TEMP[1].yyyy 62: MOV TEMP[2].z, TEMP[1].zzzz 63: MOV OUT[0], TEMP[2] 64: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0 %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %26 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, align 16, !tbaa !0 %28 = call float @llvm.SI.load.const(<16 x i8> %27, i32 0) %29 = call float @llvm.SI.load.const(<16 x i8> %27, i32 4) %30 = call float @llvm.SI.load.const(<16 x i8> %27, i32 8) %31 = call float @llvm.SI.load.const(<16 x i8> %27, i32 12) %32 = call float @llvm.SI.load.const(<16 x i8> %27, i32 16) %33 = call float @llvm.SI.load.const(<16 x i8> %27, i32 20) %34 = call float @llvm.SI.load.const(<16 x i8> %27, i32 24) %35 = call float @llvm.SI.load.const(<16 x i8> %27, i32 28) %36 = call float @llvm.SI.load.const(<16 x i8> %27, i32 36) %37 = call float @llvm.SI.load.const(<16 x i8> %27, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %27, i32 64) %39 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0 %41 = call float @llvm.SI.load.const(<16 x i8> %40, i32 304) %42 = call float @llvm.SI.load.const(<16 x i8> %40, i32 308) %43 = call float @llvm.SI.load.const(<16 x i8> %40, i32 312) %44 = call float @llvm.SI.load.const(<16 x i8> %40, i32 320) %45 = call float @llvm.SI.load.const(<16 x i8> %40, i32 324) %46 = call float @llvm.SI.load.const(<16 x i8> %40, i32 328) %47 = call float @llvm.SI.load.const(<16 x i8> %40, i32 400) %48 = call float @llvm.SI.load.const(<16 x i8> %40, i32 404) %49 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)* %50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0 %51 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)* %52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0 %53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %54 = bitcast <8 x i32> addrspace(2)* %53 to <32 x i8> addrspace(2)* %55 = load <32 x i8>, <32 x i8> addrspace(2)* %54, align 32, !tbaa !0 %56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %57 = bitcast <4 x i32> addrspace(2)* %56 to <16 x i8> addrspace(2)* %58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0 %59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2 %60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)* %61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2 %63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)* %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %74 = fmul float %24, %15 %75 = fadd float %74, %25 %76 = fadd float %14, 5.000000e-01 %77 = fadd float %75, 5.000000e-01 %78 = fmul float %76, %47 %79 = fmul float %77, %48 %80 = fsub float %71, %41 %81 = fsub float %72, %42 %82 = fsub float %73, %43 %83 = fmul float %80, %80 %84 = fmul float %81, %81 %85 = fadd float %84, %83 %86 = fmul float %82, %82 %87 = fadd float %85, %86 %88 = call float @llvm.AMDGPU.rsq.clamped.f32(float %87) %89 = fmul float %80, %88 %90 = fmul float %81, %88 %91 = fmul float %82, %88 %92 = bitcast float %78 to i32 %93 = bitcast float %79 to i32 %94 = insertelement <4 x i32> undef, i32 %92, i32 0 %95 = insertelement <4 x i32> %94, i32 %93, i32 1 %96 = insertelement <4 x i32> %95, i32 0, i32 2 %97 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %55, <16 x i8> %58, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = bitcast float %78 to i32 %102 = bitcast float %79 to i32 %103 = insertelement <4 x i32> undef, i32 %101, i32 0 %104 = insertelement <4 x i32> %103, i32 %102, i32 1 %105 = insertelement <4 x i32> %104, i32 0, i32 2 %106 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %105, <32 x i8> %61, <16 x i8> %64, i32 2) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fadd float %107, -5.000000e-01 %111 = fadd float %108, -5.000000e-01 %112 = fadd float %109, -5.000000e-01 %113 = fmul float %110, 2.000000e+00 %114 = fmul float %111, 2.000000e+00 %115 = fmul float %112, 2.000000e+00 %116 = bitcast float %78 to i32 %117 = bitcast float %79 to i32 %118 = insertelement <4 x i32> undef, i32 %116, i32 0 %119 = insertelement <4 x i32> %118, i32 %117, i32 1 %120 = insertelement <4 x i32> %119, i32 0, i32 2 %121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %120, <32 x i8> %50, <16 x i8> %52, i32 2) %122 = extractelement <4 x float> %121, i32 0 %123 = fmul float %44, %89 %124 = fmul float %45, %90 %125 = fadd float %124, %123 %126 = fmul float %46, %91 %127 = fadd float %125, %126 %128 = fdiv float 1.000000e+00, %127 %129 = fmul float %89, %128 %130 = fmul float %90, %128 %131 = fmul float %91, %128 %132 = fmul float %122, %129 %133 = fadd float %132, %41 %134 = fmul float %122, %130 %135 = fadd float %134, %42 %136 = fmul float %122, %131 %137 = fadd float %136, %43 %138 = fsub float %28, %133 %139 = fsub float %29, %135 %140 = fsub float %30, %137 %141 = fmul float %138, %138 %142 = fmul float %139, %139 %143 = fadd float %142, %141 %144 = fmul float %140, %140 %145 = fadd float %143, %144 %146 = call float @llvm.AMDGPU.rsq.clamped.f32(float %145) %147 = fmul float %138, %146 %148 = fmul float %139, %146 %149 = fmul float %140, %146 %150 = fmul float %138, %138 %151 = fmul float %139, %139 %152 = fadd float %151, %150 %153 = fmul float %140, %140 %154 = fadd float %152, %153 %155 = call float @llvm.sqrt.f32(float %154) %156 = fsub float %31, %155 %157 = fcmp olt float %156, 0.000000e+00 %158 = select i1 %157, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %158) %159 = fmul float %113, %147 %160 = fmul float %114, %148 %161 = fadd float %160, %159 %162 = fmul float %115, %149 %163 = fadd float %161, %162 %164 = fcmp olt float %163, 0.000000e+00 %165 = select i1 %164, float -1.000000e+00, float 0.000000e+00 call void @llvm.AMDGPU.kill(float %165) %166 = fmul float %38, %148 %167 = fmul float %37, %147 %168 = fsub float %167, %166 %169 = fmul float %38, %147 %170 = fmul float %37, %148 %171 = fadd float %170, %169 %172 = fdiv float 1.000000e+00, %31 %173 = fmul float %156, %172 %174 = fdiv float 1.000000e+00, %36 %175 = fmul float %173, %174 %176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %177 = fmul float %176, 2.000000e+00 %178 = fsub float 3.000000e+00, %177 %179 = fmul float %176, %178 %180 = fmul float %176, %179 %181 = fmul float %35, %180 %182 = fmul float %35, %180 %183 = fmul float %35, %180 %184 = fmul float %168, 5.000000e-01 %185 = fadd float %184, 5.000000e-01 %186 = fmul float %171, 5.000000e-01 %187 = fadd float %186, 5.000000e-01 %188 = bitcast float %185 to i32 %189 = bitcast float %187 to i32 %190 = insertelement <2 x i32> undef, i32 %188, i32 0 %191 = insertelement <2 x i32> %190, i32 %189, i32 1 %192 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %191, <32 x i8> %67, <16 x i8> %70, i32 2) %193 = extractelement <4 x float> %192, i32 0 %194 = extractelement <4 x float> %192, i32 1 %195 = extractelement <4 x float> %192, i32 2 %196 = fmul float %193, %98 %197 = fmul float %194, %99 %198 = fmul float %195, %100 %199 = fmul float %32, %196 %200 = fmul float %33, %197 %201 = fmul float %34, %198 %202 = fmul float %199, %163 %203 = fmul float %200, %163 %204 = fmul float %201, %163 %205 = fmul float %181, %202 %206 = fmul float %182, %203 %207 = fmul float %183, %204 %208 = call i32 @llvm.SI.packf16(float %205, float %206) %209 = bitcast i32 %208 to float %210 = call i32 @llvm.SI.packf16(float %207, float 1.000000e+00) %211 = bitcast i32 %210 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %209, float %211, float %209, float %211) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 s_load_dwordx4 s[0:3], s[2:3], 0x4 ; C0800304 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10 s_buffer_load_dword s10, s[12:15], 0x11 ; C2050D11 s_mov_b32 m0, s9 ; BEFC0309 s_buffer_load_dword s20, s[16:19], 0x4c ; C20A114C s_buffer_load_dword s21, s[16:19], 0x4d ; C20A914D s_buffer_load_dword s22, s[16:19], 0x4e ; C20B114E s_buffer_load_dword s23, s[16:19], 0x50 ; C20B9150 s_buffer_load_dword s24, s[16:19], 0x51 ; C20C1151 s_buffer_load_dword s25, s[16:19], 0x52 ; C20C9152 s_buffer_load_dword s9, s[16:19], 0x64 ; C2049164 s_buffer_load_dword s11, s[16:19], 0x65 ; C2059165 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200 v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s10 ; 7E02020A v_mac_f32_e32 v1, s8, v3 ; 3E020608 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_mul_f32_e32 v6, s9, v2 ; 100C0409 v_mul_f32_e32 v7, s11, v1 ; 100E020B v_subrev_f32_e32 v1, s20, v4 ; 0A020814 v_subrev_f32_e32 v2, s21, v5 ; 0A040A15 v_subrev_f32_e32 v0, s22, v0 ; 0A000016 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504 s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508 s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708 s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 s_load_dwordx8 s[56:63], s[6:7], 0x0 ; C0DC0700 v_mul_f32_e32 v3, v1, v1 ; 10060301 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_mov_b32_e32 v8, 0 ; 7E100280 s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[40:47], s[32:35] ; F0900700 010A0906 s_waitcnt vmcnt(0) ; BF8C0770 image_sample_l v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[48:55], s[36:39] ; F0900700 012C0C06 image_sample_l v4, 1, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[56:63], s[28:31] ; F0900100 00EE0406 v_mul_f32_e32 v1, v3, v1 ; 10020303 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v3, s23, v1 ; 10060217 v_mac_f32_e32 v3, s24, v2 ; 3E060418 v_mac_f32_e32 v3, s25, v0 ; 3E060019 v_rcp_f32_e32 v3, v3 ; 7E065503 s_waitcnt vmcnt(1) ; BF8C0771 v_add_f32_e32 v5, -0.5, v12 ; 060A18F1 v_add_f32_e32 v6, -0.5, v13 ; 060C1AF1 v_add_f32_e32 v7, -0.5, v14 ; 060E1CF1 v_mul_f32_e32 v1, v3, v1 ; 10020303 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v1, s20 ; D2820001 00520304 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mad_f32 v2, v4, v2, s21 ; D2820002 00560504 v_mad_f32 v0, v4, v0, s22 ; D2820000 005A0104 s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103 s_buffer_load_dword s20, s[0:3], 0x4 ; C20A0104 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s4, v1 ; 08020204 v_sub_f32_e32 v2, s5, v2 ; 08040405 v_sub_f32_e32 v0, s6, v0 ; 08000006 v_mul_f32_e32 v3, v1, v1 ; 10060301 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 v_mac_f32_e32 v3, v2, v2 ; 3E060502 v_mac_f32_e32 v3, v0, v0 ; 3E060100 v_sqrt_f32_e32 v4, v3 ; 7E086703 v_sub_f32_e32 v4, s7, v4 ; 08080807 v_cmp_gt_f32_e32 vcc, 0, v4 ; 7C080880 v_cndmask_b32_e64 v8, 0, -1.0, vcc ; D2000008 01A9E680 v_rsq_clamp_f32_e32 v3, v3 ; 7E065903 v_rcp_f32_e32 v12, s7 ; 7E185407 s_buffer_load_dword s5, s[0:3], 0xf ; C202810F s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v13, s4 ; 7E1A5404 v_mul_f32_e32 v14, v3, v1 ; 101C0303 v_mul_f32_e32 v15, v3, v2 ; 101E0503 v_mul_f32_e32 v16, v3, v0 ; 10200103 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 v_mad_f32 v1, v3, v1, v14 ; D2820001 043A0303 v_mul_f32_e32 v1, v1, v5 ; 10020B01 v_mad_f32 v2, v3, v2, v15 ; D2820002 043E0503 v_mac_f32_e32 v1, v2, v6 ; 3E020D02 v_mac_f32_e32 v16, v3, v0 ; 3E200103 v_mac_f32_e32 v1, v16, v7 ; 3E020F10 v_cmp_gt_f32_e32 vcc, 0, v1 ; 7C080280 v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680 v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v15 ; 10001E04 v_mul_f32_e32 v2, s4, v14 ; 10041C04 v_mad_f32 v0, s5, v14, -v0 ; D2820000 84021C05 v_mac_f32_e32 v2, s5, v15 ; 3E041E05 v_mad_f32 v5, 0.5, v0, 0.5 ; D2820005 03C200F0 v_mad_f32 v6, 0.5, v2, 0.5 ; D2820006 03C204F0 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[12:19], s[8:11] ; F0800700 00430505 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_mul_f32_e32 v0, v12, v4 ; 1000090C v_mul_f32_e32 v0, v13, v0 ; 1000010D v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080 v_madak_f32_e32 v2, -2.0, v0, 0x40400000 ; 420400F5 40400000 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v0, v2, v0 ; 10000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v9, v5 ; 10040B09 v_mul_f32_e32 v2, s20, v2 ; 10040414 v_mul_f32_e32 v3, v10, v6 ; 10060D0A v_mul_f32_e32 v3, s5, v3 ; 10060605 v_mul_f32_e32 v4, v11, v7 ; 10080F0B v_mul_f32_e32 v4, s0, v4 ; 10080800 v_mul_f32_e32 v2, v1, v2 ; 10040501 v_mul_f32_e32 v3, v1, v3 ; 10060701 v_mul_f32_e32 v1, v1, v4 ; 10020901 v_mul_f32_e32 v2, v2, v0 ; 10040102 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 72 VGPRS: 20 Code Size: 632 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..15], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 4095.9399, 2.0000, -2.0000, 1.0000} IMM[3] FLT32 { 0.0774, 0.9479, 0.0521, 2.4000} IMM[4] FLT32 { 0.0404, 0.0039, 0.0000, 0.0010} IMM[5] UINT32 {112, 3, 320, 336} IMM[6] UINT32 {48, 304, 64, 512} IMM[7] UINT32 {528, 544, 560, 516} IMM[8] UINT32 {532, 548, 564, 524} IMM[9] UINT32 {540, 556, 572, 364} IMM[10] UINT32 {372, 520, 536, 552} IMM[11] UINT32 {568, 0, 0, 0} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MOV TEMP[4].xy, TEMP[4].xyyy 21: MOV TEMP[4].w, IMM[0].xxxx 22: TXL TEMP[4].yz, TEMP[4], SAMP[0], 2D 23: MUL TEMP[5].x, TEMP[4].yyyy, IMM[2].xxxx 24: RCP TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, CONST[1][0].yyyy 26: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 27: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].zzzz 28: MUL TEMP[5].x, TEMP[5].xxxx, CONST[5][2].xxxx 29: FRC TEMP[6].x, TEMP[5].xxxx 30: FLR TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].yyyy 32: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].zzzz 33: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 34: RCP TEMP[8].x, CONST[1][0].yyyy 35: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 36: FLR TEMP[8].x, TEMP[8].xxxx 37: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 38: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 39: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 40: UIF TEMP[4].xxxx :0 41: MOV TEMP[4].x, TEMP[7].xxxx 42: ELSE :0 43: MOV TEMP[4].x, TEMP[5].xxxx 44: ENDIF 45: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 46: MOV TEMP[5].x, TEMP[4].xxxx 47: MOV TEMP[5].y, TEMP[2].xxxx 48: MOV TEMP[5].xy, TEMP[5].xyyy 49: MOV TEMP[5].w, IMM[0].xxxx 50: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 51: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 52: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 53: MOV TEMP[7].x, TEMP[4].xxxx 54: MOV TEMP[7].y, TEMP[6].xxxx 55: MOV TEMP[7].xy, TEMP[7].xyyy 56: MOV TEMP[7].w, IMM[0].xxxx 57: TXL TEMP[7], TEMP[7], SAMP[0], 2D 58: MOV TEMP[8].x, TEMP[4].xxxx 59: MAD TEMP[2].x, IMM[2].yyyy, CONST[1][0].zzzz, TEMP[2].xxxx 60: MOV TEMP[8].y, TEMP[2].xxxx 61: MOV TEMP[2].xy, TEMP[8].xyyy 62: MOV TEMP[2].w, IMM[0].xxxx 63: TXL TEMP[2], TEMP[2], SAMP[0], 2D 64: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 65: MOV TEMP[0].y, TEMP[6].xxxx 66: MOV TEMP[0].xy, TEMP[0].xyyy 67: MOV TEMP[0].w, IMM[0].xxxx 68: TXL TEMP[0], TEMP[0], SAMP[0], 2D 69: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 70: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 71: RCP TEMP[8].x, TEMP[4].xxxx 72: RCP TEMP[8].y, TEMP[4].yyyy 73: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 74: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 75: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 76: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 77: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 78: RCP TEMP[9].x, TEMP[4].xxxx 79: RCP TEMP[9].y, TEMP[4].yyyy 80: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 81: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 82: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 83: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 84: MOV TEMP[8].y, IMM[0].xxxx 85: MOV TEMP[8].x, TEMP[5].xxxx 86: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 87: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 88: MOV TEMP[10].x, TEMP[9].xxxx 89: MOV TEMP[10].y, TEMP[9].xxxx 90: MOV TEMP[10].z, TEMP[5].xxxx 91: MOV TEMP[10].w, TEMP[5].xxxx 92: ADD TEMP[5], TEMP[3], IMM[0].zzzz 93: RCP TEMP[9].x, CONST[5][2].zzzz 94: MOV_SAT TEMP[9].x, TEMP[9].xxxx 95: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 96: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 97: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 98: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 99: MOV TEMP[10].x, TEMP[5].xxxx 100: MOV TEMP[10].y, TEMP[9].xxxx 101: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 102: MOV TEMP[10].z, TEMP[11].xxxx 103: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 104: MOV TEMP[10].w, TEMP[3].xxxx 105: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 106: MAD TEMP[3], TEMP[3], IMM[2].yyyy, CONST[5][6].zwzw 107: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].x, TEMP[0].zzzz 110: ELSE :0 111: MOV TEMP[11].x, TEMP[0].xxxx 112: ENDIF 113: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 114: UIF TEMP[12].xxxx :0 115: MOV TEMP[12].x, TEMP[0].wwww 116: ELSE :0 117: MOV TEMP[12].x, TEMP[0].yyyy 118: ENDIF 119: MOV TEMP[0].x, TEMP[11].xxxx 120: MOV TEMP[0].y, TEMP[12].xxxx 121: MOV TEMP[5].x, TEMP[5].xxxx 122: MOV TEMP[5].y, TEMP[9].xxxx 123: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 124: RCP TEMP[5].x, TEMP[3].xxxx 125: RCP TEMP[5].y, TEMP[3].yyyy 126: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 127: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 128: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 129: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].yyyy 130: ADD TEMP[0].xy, IMM[2].wwww, -TEMP[0].xyyy 131: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 132: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 133: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 134: MOV TEMP[3].x, TEMP[5].xxxx 135: MOV TEMP[3].y, TEMP[5].yyyy 136: MOV TEMP[3].z, TEMP[2].xxxx 137: MOV TEMP[3].w, TEMP[2].yyyy 138: MUL TEMP[2].xyz, IN[2].xyzz, IMM[3].xxxx 139: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].yyyy, IMM[3].zzzz 140: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].wwww 141: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].wwww 142: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].wwww 143: FSLT TEMP[4].x, IMM[4].xxxx, IN[2].xxxx 144: UIF TEMP[4].xxxx :0 145: MOV TEMP[4].x, TEMP[5].xxxx 146: ELSE :0 147: MOV TEMP[4].x, TEMP[2].xxxx 148: ENDIF 149: FSLT TEMP[6].x, IMM[4].xxxx, IN[2].yyyy 150: UIF TEMP[6].xxxx :0 151: MOV TEMP[6].x, TEMP[5].yyyy 152: ELSE :0 153: MOV TEMP[6].x, TEMP[2].yyyy 154: ENDIF 155: FSLT TEMP[7].x, IMM[4].xxxx, IN[2].zzzz 156: UIF TEMP[7].xxxx :0 157: MOV TEMP[5].x, TEMP[5].zzzz 158: ELSE :0 159: MOV TEMP[5].x, TEMP[2].zzzz 160: ENDIF 161: MOV TEMP[2].x, TEMP[4].xxxx 162: MOV TEMP[2].y, TEMP[6].xxxx 163: MOV TEMP[2].z, TEMP[5].xxxx 164: MOV TEMP[2].w, IN[2].wwww 165: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 166: MUL TEMP[4].xyz, CONST[4][20].zxyy, CONST[4][21].yzxx 167: MAD TEMP[4].xyz, CONST[4][20].yzxx, CONST[4][21].zxyy, -TEMP[4].xyzz 168: COS TEMP[5].x, IN[4].xxxx 169: SIN TEMP[6].x, IN[4].xxxx 170: MUL TEMP[7].xyz, TEMP[5].xxxx, TEMP[4].xyzz 171: MAD TEMP[7].xyz, TEMP[6].xxxx, CONST[4][21].xyzz, TEMP[7].xyzz 172: MUL TEMP[4].xyz, TEMP[6].xxxx, TEMP[4].xyzz 173: MAD TEMP[4].xyz, TEMP[5].xxxx, CONST[4][21].xyzz, -TEMP[4].xyzz 174: SIN TEMP[5].x, IN[4].yyyy 175: COS TEMP[6].x, IN[4].yyyy 176: ADD TEMP[9].x, IMM[2].wwww, -TEMP[6].xxxx 177: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].yyyy 178: MUL TEMP[10].x, TEMP[9].xxxx, TEMP[10].xxxx 179: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[4].zzzz 180: MUL TEMP[12].x, TEMP[4].xxxx, TEMP[4].zzzz 181: MUL TEMP[12].x, TEMP[9].xxxx, TEMP[12].xxxx 182: MUL TEMP[13].x, TEMP[5].xxxx, TEMP[4].yyyy 183: MUL TEMP[14].x, TEMP[4].yyyy, TEMP[4].zzzz 184: MUL TEMP[9].x, TEMP[14].xxxx, TEMP[9].xxxx 185: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx 186: MUL TEMP[14].x, TEMP[4].xxxx, TEMP[4].xxxx 187: LRP TEMP[14].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[14].xxxx 188: ADD TEMP[15].x, TEMP[10].xxxx, -TEMP[11].xxxx 189: MOV TEMP[14].y, TEMP[15].xxxx 190: ADD TEMP[15].x, TEMP[12].xxxx, TEMP[13].xxxx 191: MOV TEMP[14].z, TEMP[15].xxxx 192: ADD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 193: MUL TEMP[11].x, TEMP[4].yyyy, TEMP[4].yyyy 194: LRP TEMP[11].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[11].xxxx 195: MOV TEMP[10].y, TEMP[11].xxxx 196: ADD TEMP[11].x, TEMP[9].xxxx, -TEMP[5].xxxx 197: MOV TEMP[10].z, TEMP[11].xxxx 198: ADD TEMP[11].x, TEMP[12].xxxx, -TEMP[13].xxxx 199: ADD TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx 200: MOV TEMP[11].y, TEMP[5].xxxx 201: MUL TEMP[5].x, TEMP[4].zzzz, TEMP[4].zzzz 202: LRP TEMP[5].x, TEMP[6].xxxx, IMM[2].wwww, TEMP[5].xxxx 203: MOV TEMP[11].z, TEMP[5].xxxx 204: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[14].xyzz 205: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[10].xyzz 206: MOV TEMP[5].y, TEMP[6].xxxx 207: DP3 TEMP[6].x, TEMP[7].xyzz, TEMP[11].xyzz 208: MOV TEMP[5].z, TEMP[6].xxxx 209: ADD TEMP[6].x, CONST[5][3].yyyy, TEMP[0].xxxx 210: ADD TEMP[0].x, CONST[5][3].zzzz, TEMP[0].yyyy 211: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[4].xyzz 212: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[0].xyzz 213: ADD TEMP[4].xyz, CONST[4][19].xyzz, -IN[3].xyzz 214: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz 215: SQRT TEMP[4].x, TEMP[4].xxxx 216: MOV TEMP[5], TEMP[2] 217: MOV TEMP[6].x, IN[4].wwww 218: FSLT TEMP[7].x, IMM[0].xxxx, CONST[5][4].zzzz 219: UIF TEMP[7].xxxx :0 220: MUL TEMP[7].x, TEMP[4].xxxx, CONST[5][0].yyyy 221: FSLT TEMP[9].x, TEMP[7].xxxx, IN[4].wwww 222: ADD TEMP[7].x, IN[4].wwww, -TEMP[7].xxxx 223: ADD TEMP[10].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 224: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[4].xxxx 225: RCP TEMP[10].x, TEMP[10].xxxx 226: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[10].xxxx 227: ADD TEMP[7].x, IMM[2].wwww, -TEMP[7].xxxx 228: MUL TEMP[7], TEMP[7].xxxx, TEMP[2] 229: MUL TEMP[10].x, TEMP[4].xxxx, CONST[5][0].zzzz 230: FSLT TEMP[10].x, TEMP[10].xxxx, IN[4].wwww 231: UIF TEMP[10].xxxx :0 232: MOV TEMP[10], IMM[0].xxxx 233: ELSE :0 234: MOV TEMP[10], TEMP[7] 235: ENDIF 236: UIF TEMP[9].xxxx :0 237: MOV TEMP[7], TEMP[10] 238: ELSE :0 239: MOV TEMP[7], TEMP[2] 240: ENDIF 241: MOV TEMP[5], TEMP[7] 242: MUL TEMP[2].x, TEMP[4].xxxx, CONST[5][0].xxxx 243: MAX TEMP[2].x, IN[4].wwww, TEMP[2].xxxx 244: MUL TEMP[4].x, TEMP[4].xxxx, CONST[5][0].wwww 245: MIN TEMP[6].x, TEMP[2].xxxx, TEMP[4].xxxx 246: ENDIF 247: FSLT TEMP[2].x, TEMP[5].wwww, IMM[4].yyyy 248: UIF TEMP[2].xxxx :0 249: MOV TEMP[2].x, IMM[0].xxxx 250: ELSE :0 251: MOV TEMP[2].x, TEMP[6].xxxx 252: ENDIF 253: MOV TEMP[4].xy, IMM[0].xxxx 254: MOV TEMP[4].w, IMM[0].xxxx 255: TXL TEMP[4], TEMP[4], SAMP[1], 2D 256: MUL TEMP[4].xyz, TEMP[4], IMM[4].zzzz 257: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 258: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz 259: MOV TEMP[2].w, IMM[2].wwww 260: MOV TEMP[2].x, TEMP[0].xxxx 261: MOV TEMP[2].y, TEMP[0].yyyy 262: MOV TEMP[2].z, TEMP[0].zzzz 263: MOV TEMP[4].x, CONST[4][32].xxxx 264: MOV TEMP[4].y, CONST[4][33].xxxx 265: MOV TEMP[4].z, CONST[4][34].xxxx 266: MOV TEMP[4].w, CONST[4][35].xxxx 267: DP4 TEMP[4].x, TEMP[2], TEMP[4] 268: MOV TEMP[6].x, CONST[4][32].yyyy 269: MOV TEMP[6].y, CONST[4][33].yyyy 270: MOV TEMP[6].z, CONST[4][34].yyyy 271: MOV TEMP[6].w, CONST[4][35].yyyy 272: DP4 TEMP[6].x, TEMP[2], TEMP[6] 273: MOV TEMP[7].x, CONST[4][32].wwww 274: MOV TEMP[7].y, CONST[4][33].wwww 275: MOV TEMP[7].z, CONST[4][34].wwww 276: MOV TEMP[7].w, CONST[4][35].wwww 277: DP4 TEMP[7].x, TEMP[2], TEMP[7] 278: MAD TEMP[9].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 279: MOV TEMP[10].w, IMM[2].wwww 280: MOV TEMP[10].x, TEMP[9].xxxx 281: MOV TEMP[10].y, TEMP[9].yyyy 282: MOV TEMP[10].z, TEMP[9].zzzz 283: MOV TEMP[9].xyz, -CONST[4][19].xyzx 284: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[9].xyzz 285: MOV TEMP[12].x, TEMP[0].xxxx 286: MOV TEMP[12].y, TEMP[0].yyyy 287: MOV TEMP[12].z, TEMP[0].zzzz 288: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 289: MOV TEMP[12].w, TEMP[13].xxxx 290: MOV TEMP[13].x, TEMP[4].xxxx 291: MOV TEMP[13].y, TEMP[6].xxxx 292: MOV TEMP[14].x, -CONST[4][22].wwww 293: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 294: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 295: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 296: RCP TEMP[14].x, TEMP[14].xxxx 297: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 298: MOV TEMP[13].z, TEMP[11].xxxx 299: MOV TEMP[13].w, TEMP[7].xxxx 300: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xyzz 301: MOV TEMP[0].xyz, -TEMP[0].xyzx 302: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[0].xyzz 303: RSQ TEMP[9].x, TEMP[9].xxxx 304: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[9].xxxx 305: MOV TEMP[4].x, TEMP[4].xxxx 306: MOV TEMP[4].y, -TEMP[6].xxxx 307: MOV TEMP[6].x, CONST[4][32].zzzz 308: MOV TEMP[6].y, CONST[4][33].zzzz 309: MOV TEMP[6].z, CONST[4][34].zzzz 310: MOV TEMP[6].w, CONST[4][35].zzzz 311: MOV TEMP[9].x, CONST[4][32].wwww 312: MOV TEMP[9].y, CONST[4][33].wwww 313: MOV TEMP[9].z, CONST[4][34].wwww 314: MOV TEMP[9].w, CONST[4][35].wwww 315: MOV TEMP[11].x, CONST[4][32].zzzz 316: MOV TEMP[11].y, CONST[4][33].zzzz 317: MOV TEMP[11].z, CONST[4][34].zzzz 318: MOV TEMP[11].w, CONST[4][35].zzzz 319: DP4 TEMP[6].x, TEMP[10], TEMP[6] 320: DP4 TEMP[9].x, TEMP[10], TEMP[9] 321: RCP TEMP[9].x, TEMP[9].xxxx 322: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 323: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx 324: DP4 TEMP[2].x, TEMP[2], TEMP[11] 325: MIN TEMP[2].x, IMM[4].wwww, TEMP[2].xxxx 326: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[2].xxxx 327: MAD TEMP[2].x, IMM[2].yyyy, TEMP[2].xxxx, -TEMP[7].xxxx 328: MOV TEMP[4].z, TEMP[2].xxxx 329: MOV TEMP[4].w, TEMP[7].xxxx 330: MOV OUT[1], TEMP[1] 331: MOV OUT[6].xyz, TEMP[0].xyzx 332: MOV OUT[2], TEMP[3] 333: MOV OUT[3], TEMP[5] 334: MOV OUT[4], TEMP[13] 335: MOV OUT[0], TEMP[4] 336: MOV OUT[5], TEMP[12] 337: MOV OUT[7].xy, TEMP[8].xyxx 338: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 336) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 340) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 344) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %43 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %44 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %45 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %46 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0 %48 = call float @llvm.SI.load.const(<16 x i8> %47, i32 4) %49 = call float @llvm.SI.load.const(<16 x i8> %47, i32 8) %50 = call float @llvm.SI.load.const(<16 x i8> %47, i32 32) %51 = call float @llvm.SI.load.const(<16 x i8> %47, i32 40) %52 = call float @llvm.SI.load.const(<16 x i8> %47, i32 48) %53 = call float @llvm.SI.load.const(<16 x i8> %47, i32 52) %54 = call float @llvm.SI.load.const(<16 x i8> %47, i32 56) %55 = call float @llvm.SI.load.const(<16 x i8> %47, i32 72) %56 = call float @llvm.SI.load.const(<16 x i8> %47, i32 96) %57 = call float @llvm.SI.load.const(<16 x i8> %47, i32 100) %58 = call float @llvm.SI.load.const(<16 x i8> %47, i32 104) %59 = call float @llvm.SI.load.const(<16 x i8> %47, i32 108) %60 = call float @llvm.SI.load.const(<16 x i8> %47, i32 112) %61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0 %63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0 %65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)* %67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0 %68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)* %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0 %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0 %73 = add i32 %5, %7 %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73) %75 = extractelement <4 x float> %74, i32 0 %76 = extractelement <4 x float> %74, i32 1 %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !tbaa !0 %79 = add i32 %10, %6 %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %79) %81 = extractelement <4 x float> %80, i32 0 %82 = extractelement <4 x float> %80, i32 2 %83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0 %85 = add i32 %10, %6 %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85) %87 = extractelement <4 x float> %86, i32 0 %88 = extractelement <4 x float> %86, i32 1 %89 = extractelement <4 x float> %86, i32 2 %90 = extractelement <4 x float> %86, i32 3 %91 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0 %93 = add i32 %10, %6 %94 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %93) %95 = extractelement <4 x float> %94, i32 0 %96 = extractelement <4 x float> %94, i32 1 %97 = extractelement <4 x float> %94, i32 2 %98 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0 %100 = add i32 %10, %6 %101 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %100) %102 = extractelement <4 x float> %101, i32 0 %103 = extractelement <4 x float> %101, i32 1 %104 = extractelement <4 x float> %101, i32 3 %105 = fmul float %75, 5.000000e-01 %106 = fadd float %105, 5.000000e-01 %107 = fmul float %76, -5.000000e-01 %108 = fadd float %107, 5.000000e-01 %109 = fdiv float 1.000000e+00, %14 %110 = fmul float %81, 3.000000e+00 %111 = fcmp olt float %81, %16 %. = select i1 %111, float %110, float 0.000000e+00 %112 = fmul float %15, %. %113 = fadd float %112, %13 %114 = bitcast float %113 to i32 %115 = insertelement <4 x i32> , i32 %114, i32 1 %116 = insertelement <4 x i32> %115, i32 0, i32 2 %117 = bitcast <8 x i32> %62 to <32 x i8> %118 = bitcast <4 x i32> %64 to <16 x i8> %119 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2) %120 = extractelement <4 x float> %119, i32 1 %121 = extractelement <4 x float> %119, i32 2 %122 = fmul float %120, 0x40AFFFE140000000 %123 = fdiv float 1.000000e+00, %122 %124 = fmul float %123, %14 %125 = fmul float %124, 5.000000e-01 %126 = fmul float %125, %82 %127 = fmul float %126, %50 %128 = call float @llvm.AMDIL.fraction.(float %127) %129 = call float @floor(float %127) %130 = fmul float %129, 2.000000e+00 %131 = fadd float %14, -2.000000e+00 %132 = call float @llvm.minnum.f32(float %131, float %130) %133 = fdiv float 1.000000e+00, %14 %134 = fmul float %130, %133 %135 = call float @floor(float %134) %136 = fmul float %14, %135 %137 = fsub float %130, %136 %138 = fcmp ogt float %121, 0.000000e+00 %temp16.0 = select i1 %138, float %132, float %137 %139 = fmul float %109, %temp16.0 %140 = bitcast float %139 to i32 %141 = bitcast float %113 to i32 %142 = insertelement <4 x i32> undef, i32 %140, i32 0 %143 = insertelement <4 x i32> %142, i32 %141, i32 1 %144 = insertelement <4 x i32> %143, i32 0, i32 2 %145 = bitcast <8 x i32> %62 to <32 x i8> %146 = bitcast <4 x i32> %64 to <16 x i8> %147 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 2) %148 = extractelement <4 x float> %147, i32 0 %149 = extractelement <4 x float> %147, i32 3 %150 = call float @llvm.AMDGPU.lrp(float %128, float %149, float %148) %151 = fadd float %113, %15 %152 = bitcast float %139 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <4 x i32> undef, i32 %152, i32 0 %155 = insertelement <4 x i32> %154, i32 %153, i32 1 %156 = insertelement <4 x i32> %155, i32 0, i32 2 %157 = bitcast <8 x i32> %62 to <32 x i8> %158 = bitcast <4 x i32> %64 to <16 x i8> %159 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 2) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = fmul float %15, 2.000000e+00 %165 = fadd float %164, %113 %166 = bitcast float %139 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <4 x i32> undef, i32 %166, i32 0 %169 = insertelement <4 x i32> %168, i32 %167, i32 1 %170 = insertelement <4 x i32> %169, i32 0, i32 2 %171 = bitcast <8 x i32> %62 to <32 x i8> %172 = bitcast <4 x i32> %64 to <16 x i8> %173 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = extractelement <4 x float> %173, i32 3 %178 = fadd float %109, %139 %179 = bitcast float %178 to i32 %180 = bitcast float %151 to i32 %181 = insertelement <4 x i32> undef, i32 %179, i32 0 %182 = insertelement <4 x i32> %181, i32 %180, i32 1 %183 = insertelement <4 x i32> %182, i32 0, i32 2 %184 = bitcast <8 x i32> %62 to <32 x i8> %185 = bitcast <4 x i32> %64 to <16 x i8> %186 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %183, <32 x i8> %184, <16 x i8> %185, i32 2) %187 = extractelement <4 x float> %186, i32 0 %188 = extractelement <4 x float> %186, i32 1 %189 = extractelement <4 x float> %186, i32 2 %190 = extractelement <4 x float> %186, i32 3 %191 = fsub float %189, %187 %192 = fsub float %190, %188 %193 = fsub float %162, %160 %194 = fsub float %163, %161 %195 = fdiv float 1.000000e+00, %191 %196 = fdiv float 1.000000e+00, %192 %197 = fmul float %193, %195 %198 = fmul float %194, %196 %199 = fmul float %187, %197 %200 = fmul float %188, %198 %201 = fsub float %160, %199 %202 = fsub float %161, %200 %203 = fadd float %201, %197 %204 = fadd float %202, %198 %205 = fsub float %176, %174 %206 = fsub float %177, %175 %207 = fdiv float 1.000000e+00, %191 %208 = fdiv float 1.000000e+00, %192 %209 = fmul float %205, %207 %210 = fmul float %206, %208 %211 = fmul float %187, %209 %212 = fmul float %188, %210 %213 = fsub float %174, %211 %214 = fsub float %175, %212 %215 = fadd float %213, %209 %216 = fadd float %214, %210 %217 = fmul float %150, 5.000000e-01 %218 = fadd float %217, 5.000000e-01 %219 = fadd float %106, -5.000000e-01 %220 = fadd float %108, -5.000000e-01 %221 = fadd float %106, -5.000000e-01 %222 = fadd float %108, -5.000000e-01 %223 = fdiv float 1.000000e+00, %51 %224 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00) %225 = fmul float %219, %224 %226 = fadd float %225, 5.000000e-01 %227 = fmul float %220, %224 %228 = fadd float %227, 5.000000e-01 %229 = fmul float %221, %224 %230 = fadd float %229, 5.000000e-01 %231 = fmul float %222, %224 %232 = fadd float %231, 5.000000e-01 %233 = call float @llvm.AMDGPU.lrp(float %218, float %226, float %106) %234 = call float @llvm.AMDGPU.lrp(float %218, float %228, float %108) %235 = call float @llvm.AMDGPU.lrp(float %217, float %230, float %106) %236 = call float @llvm.AMDGPU.lrp(float %217, float %232, float %108) %237 = fmul float %58, %233 %238 = fadd float %237, %56 %239 = fmul float %59, %234 %240 = fadd float %239, %57 %241 = fmul float %58, %235 %242 = fadd float %241, %56 %243 = fmul float %59, %236 %244 = fadd float %243, %57 %245 = fsub float %56, %238 %246 = fsub float %57, %240 %247 = fsub float %56, %242 %248 = fsub float %57, %244 %249 = fmul float %245, 2.000000e+00 %250 = fadd float %249, %58 %251 = fmul float %246, 2.000000e+00 %252 = fadd float %251, %59 %253 = fmul float %247, 2.000000e+00 %254 = fadd float %253, %58 %255 = fmul float %248, 2.000000e+00 %256 = fadd float %255, %59 %257 = fcmp olt float %250, 0.000000e+00 %.94 = select i1 %257, float %189, float %187 %258 = fcmp olt float %252, 0.000000e+00 %temp48.0 = select i1 %258, float %190, float %188 %259 = fsub float %.94, %238 %260 = fsub float %temp48.0, %240 %261 = fdiv float 1.000000e+00, %250 %262 = fdiv float 1.000000e+00, %252 %263 = fmul float %259, %261 %264 = fmul float %260, %262 %265 = call float @llvm.AMDIL.clamp.(float %263, float 0.000000e+00, float 1.000000e+00) %266 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %267 = fmul float %265, %250 %268 = fadd float %267, %238 %269 = fmul float %266, %252 %270 = fadd float %269, %240 %271 = fmul float %265, %254 %272 = fadd float %271, %242 %273 = fmul float %266, %256 %274 = fadd float %273, %244 %275 = fmul float %265, 2.000000e+00 %276 = fmul float %266, 2.000000e+00 %277 = fsub float 1.000000e+00, %275 %278 = fsub float 1.000000e+00, %276 %279 = fmul float %277, %75 %280 = fmul float %278, %76 %281 = call float @llvm.AMDGPU.lrp(float %268, float %203, float %201) %282 = call float @llvm.AMDGPU.lrp(float %270, float %204, float %202) %283 = call float @llvm.AMDGPU.lrp(float %272, float %215, float %213) %284 = call float @llvm.AMDGPU.lrp(float %274, float %216, float %214) %285 = fmul float %87, 0x3FB3D07220000000 %286 = fmul float %88, 0x3FB3D07220000000 %287 = fmul float %89, 0x3FB3D07220000000 %288 = fmul float %87, 0x3FEE54EDE0000000 %289 = fadd float %288, 0x3FAAB12320000000 %290 = fmul float %88, 0x3FEE54EDE0000000 %291 = fadd float %290, 0x3FAAB12320000000 %292 = fmul float %89, 0x3FEE54EDE0000000 %293 = fadd float %292, 0x3FAAB12320000000 %294 = call float @llvm.pow.f32(float %289, float 0x4003333340000000) %295 = call float @llvm.pow.f32(float %291, float 0x4003333340000000) %296 = call float @llvm.pow.f32(float %293, float 0x4003333340000000) %297 = fcmp ogt float %87, 0x3FA4B5DCC0000000 %.95 = select i1 %297, float %294, float %285 %298 = fcmp ogt float %88, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %298, float %295, float %286 %299 = fcmp ogt float %89, 0x3FA4B5DCC0000000 %.96 = select i1 %299, float %296, float %287 %300 = call float @llvm.AMDGPU.lrp(float %60, float %.95, float %87) %301 = call float @llvm.AMDGPU.lrp(float %60, float %temp24.0, float %88) %302 = call float @llvm.AMDGPU.lrp(float %60, float %.96, float %89) %303 = call float @llvm.AMDGPU.lrp(float %60, float %90, float %90) %304 = fmul float %24, %26 %305 = fmul float %22, %27 %306 = fmul float %23, %25 %307 = fmul float %23, %27 %308 = fsub float %307, %304 %309 = fmul float %24, %25 %310 = fsub float %309, %305 %311 = fmul float %22, %26 %312 = fsub float %311, %306 %313 = call float @llvm.cos.f32(float %102) %314 = call float @llvm.sin.f32(float %102) %315 = fmul float %313, %308 %316 = fmul float %313, %310 %317 = fmul float %313, %312 %318 = fmul float %314, %25 %319 = fadd float %318, %315 %320 = fmul float %314, %26 %321 = fadd float %320, %316 %322 = fmul float %314, %27 %323 = fadd float %322, %317 %324 = fmul float %314, %308 %325 = fmul float %314, %310 %326 = fmul float %314, %312 %327 = fmul float %313, %25 %328 = fsub float %327, %324 %329 = fmul float %313, %26 %330 = fsub float %329, %325 %331 = fmul float %313, %27 %332 = fsub float %331, %326 %333 = call float @llvm.sin.f32(float %103) %334 = call float @llvm.cos.f32(float %103) %335 = fsub float 1.000000e+00, %334 %336 = fmul float %328, %330 %337 = fmul float %335, %336 %338 = fmul float %333, %332 %339 = fmul float %328, %332 %340 = fmul float %335, %339 %341 = fmul float %333, %330 %342 = fmul float %330, %332 %343 = fmul float %342, %335 %344 = fmul float %333, %328 %345 = fmul float %328, %328 %346 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %345) %347 = fsub float %337, %338 %348 = fadd float %340, %341 %349 = fadd float %337, %338 %350 = fmul float %330, %330 %351 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %350) %352 = fsub float %343, %344 %353 = fsub float %340, %341 %354 = fadd float %343, %344 %355 = fmul float %332, %332 %356 = call float @llvm.AMDGPU.lrp(float %334, float 1.000000e+00, float %355) %357 = fmul float %319, %346 %358 = fmul float %321, %347 %359 = fadd float %358, %357 %360 = fmul float %323, %348 %361 = fadd float %359, %360 %362 = fmul float %319, %349 %363 = fmul float %321, %351 %364 = fadd float %363, %362 %365 = fmul float %323, %352 %366 = fadd float %364, %365 %367 = fmul float %319, %353 %368 = fmul float %321, %354 %369 = fadd float %368, %367 %370 = fmul float %323, %356 %371 = fadd float %369, %370 %372 = fadd float %53, %279 %373 = fadd float %54, %280 %374 = fmul float %373, %328 %375 = fmul float %373, %330 %376 = fmul float %373, %332 %377 = fmul float %361, %372 %378 = fadd float %377, %374 %379 = fmul float %366, %372 %380 = fadd float %379, %375 %381 = fmul float %371, %372 %382 = fadd float %381, %376 %383 = fsub float %19, %95 %384 = fsub float %20, %96 %385 = fsub float %21, %97 %386 = fmul float %383, %383 %387 = fmul float %384, %384 %388 = fadd float %387, %386 %389 = fmul float %385, %385 %390 = fadd float %388, %389 %391 = call float @llvm.sqrt.f32(float %390) %392 = fcmp ogt float %55, 0.000000e+00 br i1 %392, label %IF83, label %ENDIF82 IF83: ; preds = %main_body %393 = call float @llvm.SI.load.const(<16 x i8> %47, i32 12) %394 = call float @llvm.SI.load.const(<16 x i8> %47, i32 0) %395 = fmul float %391, %48 %396 = fcmp olt float %395, %104 %397 = fsub float %104, %395 %398 = fsub float %49, %48 %399 = fmul float %398, %391 %400 = fdiv float 1.000000e+00, %399 %401 = fmul float %397, %400 %402 = fsub float 1.000000e+00, %401 %403 = fmul float %402, %300 %404 = fmul float %402, %301 %405 = fmul float %402, %302 %406 = fmul float %402, %303 %407 = fmul float %391, %49 %408 = fcmp olt float %407, %104 %.97 = select i1 %408, float 0.000000e+00, float %403 %.98 = select i1 %408, float 0.000000e+00, float %404 %.99 = select i1 %408, float 0.000000e+00, float %405 %.100 = select i1 %408, float 0.000000e+00, float %406 %.97. = select i1 %396, float %.97, float %300 %.98. = select i1 %396, float %.98, float %301 %.99. = select i1 %396, float %.99, float %302 %.100. = select i1 %396, float %.100, float %303 %409 = fmul float %391, %394 %410 = call float @llvm.maxnum.f32(float %104, float %409) %411 = fmul float %391, %393 %412 = call float @llvm.minnum.f32(float %410, float %411) br label %ENDIF82 ENDIF82: ; preds = %main_body, %IF83 %temp20.1 = phi float [ %.97., %IF83 ], [ %300, %main_body ] %temp21.0 = phi float [ %.98., %IF83 ], [ %301, %main_body ] %temp22.0 = phi float [ %.99., %IF83 ], [ %302, %main_body ] %temp23.0 = phi float [ %.100., %IF83 ], [ %303, %main_body ] %temp24.1 = phi float [ %412, %IF83 ], [ %104, %main_body ] %413 = fcmp olt float %temp23.0, 0x3F70101060000000 %.temp24.1 = select i1 %413, float 0.000000e+00, float %temp24.1 %414 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %67, <16 x i8> %70, i32 2) %415 = extractelement <4 x float> %414, i32 0 %416 = extractelement <4 x float> %414, i32 1 %417 = extractelement <4 x float> %414, i32 2 %418 = fmul float %415, 0x3E7AD7F2A0000000 %419 = fmul float %416, 0x3E7AD7F2A0000000 %420 = fmul float %417, 0x3E7AD7F2A0000000 %421 = fmul float %.temp24.1, %378 %422 = fadd float %421, %95 %423 = fmul float %.temp24.1, %380 %424 = fadd float %423, %96 %425 = fmul float %.temp24.1, %382 %426 = fadd float %425, %97 %427 = fadd float %418, %422 %428 = fadd float %419, %424 %429 = fadd float %420, %426 %430 = fmul float %427, %30 %431 = fmul float %428, %34 %432 = fadd float %430, %431 %433 = fmul float %429, %38 %434 = fadd float %432, %433 %435 = fadd float %434, %42 %436 = fmul float %427, %31 %437 = fmul float %428, %35 %438 = fadd float %436, %437 %439 = fmul float %429, %39 %440 = fadd float %438, %439 %441 = fadd float %440, %43 %442 = fmul float %427, %33 %443 = fmul float %428, %37 %444 = fadd float %442, %443 %445 = fmul float %429, %41 %446 = fadd float %444, %445 %447 = fadd float %446, %45 %448 = fmul float %22, %52 %449 = fadd float %448, %427 %450 = fmul float %23, %52 %451 = fadd float %450, %428 %452 = fmul float %24, %52 %453 = fadd float %452, %429 %454 = fsub float %427, %19 %455 = fsub float %428, %20 %456 = fsub float %429, %21 %457 = fmul float %22, %454 %458 = fmul float %23, %455 %459 = fadd float %458, %457 %460 = fmul float %24, %456 %461 = fadd float %459, %460 %462 = fmul float %454, %22 %463 = fmul float %455, %23 %464 = fadd float %463, %462 %465 = fmul float %456, %24 %466 = fadd float %464, %465 %467 = fsub float %466, %28 %468 = fsub float %29, %28 %469 = fdiv float 1.000000e+00, %468 %470 = fmul float %467, %469 %471 = fsub float %427, %19 %472 = fsub float %428, %20 %473 = fsub float %429, %21 %474 = fmul float %471, %471 %475 = fmul float %472, %472 %476 = fadd float %475, %474 %477 = fmul float %473, %473 %478 = fadd float %476, %477 %479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %478) %480 = fmul float %471, %479 %481 = fsub float -0.000000e+00, %480 %482 = fmul float %472, %479 %483 = fsub float -0.000000e+00, %482 %484 = fmul float %473, %479 %485 = fsub float -0.000000e+00, %484 %486 = fsub float -0.000000e+00, %441 %487 = fmul float %449, %32 %488 = fmul float %451, %36 %489 = fadd float %487, %488 %490 = fmul float %453, %40 %491 = fadd float %489, %490 %492 = fadd float %491, %44 %493 = fmul float %449, %33 %494 = fmul float %451, %37 %495 = fadd float %493, %494 %496 = fmul float %453, %41 %497 = fadd float %495, %496 %498 = fadd float %497, %45 %499 = fdiv float 1.000000e+00, %498 %500 = fmul float %492, %499 %501 = fmul float %500, %447 %502 = fmul float %427, %32 %503 = fmul float %428, %36 %504 = fadd float %502, %503 %505 = fmul float %429, %40 %506 = fadd float %504, %505 %507 = fadd float %506, %44 %508 = call float @llvm.minnum.f32(float %507, float 0x3F50624DE0000000) %509 = call float @llvm.maxnum.f32(float %501, float %508) %510 = fmul float %509, 2.000000e+00 %511 = fsub float %510, %447 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %281, float %282, float %283, float %284) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp20.1, float %temp21.0, float %temp22.0, float %temp23.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %435, float %441, float %470, float %447) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %427, float %428, float %429, float %461) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %481, float %483, float %485, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %150, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %435, float %486, float %511, float %447) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v4, s11, v3 ; 4A08060B s_load_dwordx4 s[24:27], s[2:3], 0x10 ; C08C0310 v_mov_b32_e32 v19, 0x3d558919 ; 7E2602FF 3D558919 v_mov_b32_e32 v11, 0x3f72a76f ; 7E1602FF 3F72A76F v_mov_b32_e32 v20, 0x4019999a ; 7E2802FF 4019999A v_mov_b32_e32 v12, 0x3d9e8391 ; 7E1802FF 3D9E8391 v_mov_b32_e32 v13, 0x3d25aee6 ; 7E1A02FF 3D25AEE6 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_buffer_load_dword s30, s[12:15], 0x1 ; C20F0D01 s_buffer_load_dword s29, s[12:15], 0x2 ; C20E8D02 s_buffer_load_dword s31, s[12:15], 0x3 ; C20F8D03 s_buffer_load_dword s12, s[24:27], 0x52 ; C2061952 s_buffer_load_dword s16, s[24:27], 0x54 ; C2081954 s_buffer_load_dword s14, s[24:27], 0x55 ; C2071955 s_buffer_load_dword s15, s[24:27], 0x56 ; C2079956 s_buffer_load_dword s13, s[24:27], 0x5b ; C206995B s_buffer_load_dword s11, s[20:23], 0x8 ; C2059508 s_buffer_load_dword s10, s[20:23], 0xa ; C205150A s_buffer_load_dword s19, s[20:23], 0xc ; C209950C s_buffer_load_dword s18, s[20:23], 0xd ; C209150D s_buffer_load_dword s17, s[20:23], 0xe ; C208950E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e64 v14, -2.0, s30 ; D206000E 00003CF5 s_buffer_load_dword s28, s[20:23], 0x1c ; C20E151C s_load_dwordx4 s[52:55], s[8:9], 0x0 ; C09A0900 s_load_dwordx4 s[56:59], s[8:9], 0x4 ; C09C0904 s_load_dwordx4 s[48:51], s[8:9], 0x8 ; C0980908 v_rcp_f32_e32 v15, s30 ; 7E1E541E v_mov_b32_e32 v26, s0 ; 7E340200 s_load_dwordx4 s[44:47], s[8:9], 0xc ; C096090C s_load_dwordx4 s[0:3], s[8:9], 0x10 ; C0800910 s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500 s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e64 v17, 1.0, s28 ; D2080011 000038F2 buffer_load_format_xyzw v[7:10], v0, s[52:55], 0 idxen ; E00C2000 800D0700 buffer_load_format_xyzw v[21:24], v4, s[56:59], 0 idxen ; E00C2000 800E1504 buffer_load_format_xyzw v[28:31], v4, s[48:51], 0 idxen ; E00C2000 800C1C04 buffer_load_format_xyzw v[0:3], v4, s[44:47], 0 idxen ; E00C2000 800B0004 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v4, s[0:3], 0 idxen ; E00C2000 80000304 v_cmp_gt_f32_e64 s[2:3], s31, v21 ; D0080002 00022A1F v_cmp_gt_f32_e64 s[0:1], v28, v13 ; D0080000 00021B1C v_cmp_gt_f32_e32 vcc, v29, v13 ; 7C081B1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, 0x40400000, v21 ; 100A2AFF 40400000 v_cndmask_b32_e64 v5, 0, v5, s[2:3] ; D2000005 000A0A80 v_mac_f32_e32 v26, s29, v5 ; 3E340A1D v_mov_b32_e32 v25, 0 ; 7E320280 v_mov_b32_e32 v27, v25 ; 7E360319 v_mad_f32 v21, v11, v28, v19 ; D2820015 044E390B v_mad_f32 v22, v11, v29, v19 ; D2820016 044E3B0B v_mac_f32_e32 v19, v11, v30 ; 3E263D0B image_sample_l v[9:10], 6, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[36:43], s[32:35] ; F0900600 01090919 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e64 s[2:3], 0, v10 ; D0020002 00021480 v_mul_f32_e32 v5, 0x457fff0a, v9 ; 100A12FF 457FFF0A v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mul_f32_e32 v5, s30, v5 ; 100A0A1E v_mul_f32_e32 v5, 0.5, v5 ; 100A0AF0 v_mul_f32_e32 v5, v23, v5 ; 100A0B17 v_mul_f32_e32 v9, s11, v5 ; 10120A0B v_floor_f32_e32 v18, v9 ; 7E244909 v_add_f32_e32 v9, v18, v18 ; 06122512 v_min_f32_e32 v10, v9, v14 ; 1E141D09 v_mul_f32_e32 v11, v15, v9 ; 1016130F v_floor_f32_e32 v11, v11 ; 7E16490B v_mad_f32 v9, -s30, v11, v9 ; D2820009 2426161E v_cndmask_b32_e64 v9, v9, v10, s[2:3] ; D2000009 000A1509 v_mul_f32_e32 v23, v9, v15 ; 102E1F09 v_cmp_gt_f32_e64 s[2:3], v30, v13 ; D0080002 00021B1E v_mov_b32_e32 v24, v26 ; 7E30031A v_add_f32_e32 v16, s29, v26 ; 0620341D image_sample_l v[13:14], 9, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900900 01090D17 v_mov_b32_e32 v24, v16 ; 7E300310 v_mac_f32_e32 v15, v9, v15 ; 3E1E1F09 image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01092017 v_mov_b32_e32 v24, v26 ; 7E30031A v_mac_f32_e64 v24, 2.0, s29 ; D23E0018 00003AF4 v_mul_f32_e32 v26, v12, v28 ; 1034390C v_mul_f32_e32 v36, v12, v29 ; 10483B0C v_mul_f32_e32 v37, v12, v30 ; 104A3D0C v_mul_f32_e32 v9, v28, v17 ; 1012231C v_mul_f32_e32 v10, v29, v17 ; 1014231D v_mul_f32_e32 v11, v30, v17 ; 1016231E v_mul_f32_e32 v12, v31, v17 ; 1018231F v_mac_f32_e32 v12, s28, v31 ; 3E183E1C v_mov_b32_e32 v17, v25 ; 7E220319 image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091B17 image_sample_l v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[36:43], s[32:35] ; F0900F00 0109260F s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v15, v38, v40 ; 0A1E5126 v_rcp_f32_e32 v15, v15 ; 7E1E550F v_subrev_f32_e32 v16, v39, v41 ; 0A205327 v_rcp_f32_e32 v16, v16 ; 7E205510 v_subrev_f32_e32 v23, v32, v34 ; 0A2E4520 v_mul_f32_e32 v17, v15, v23 ; 10222F0F v_mad_f32 v24, -v38, v17, v32 ; D2820018 24822326 v_subrev_f32_e32 v25, v33, v35 ; 0A324721 v_mul_f32_e32 v17, v16, v25 ; 10223310 v_mad_f32 v31, -v39, v17, v33 ; D282001F 24862327 v_subrev_f32_e32 v29, v27, v29 ; 0A3A3B1B v_mul_f32_e32 v17, v15, v29 ; 10223B0F v_mad_f32 v27, -v38, v17, v27 ; D282001B 246E2326 v_subrev_f32_e32 v30, v28, v30 ; 0A3C3D1C v_mul_f32_e32 v17, v16, v30 ; 10223D10 v_mad_f32 v28, -v39, v17, v28 ; D282001C 24722327 v_mad_f32 v17, v5, s11, -v18 ; D2820011 84481705 v_rcp_f32_e32 v18, s10 ; 7E24540A v_sub_f32_e32 v5, 1.0, v17 ; 080A22F2 v_mul_f32_e32 v5, v13, v5 ; 100A0B0D v_mac_f32_e32 v5, v14, v17 ; 3E0A230E v_add_f32_e64 v13, 0, v18 clamp ; D206080D 00022480 v_mad_f32 v14, 0.5, v7, 0.5 ; D282000E 03C20EF0 v_add_f32_e32 v17, -0.5, v14 ; 06221CF1 v_mad_f32 v17, v17, v13, 0.5 ; D2820011 03C21B11 s_buffer_load_dword s29, s[20:23], 0x18 ; C20E9518 s_buffer_load_dword s30, s[20:23], 0x19 ; C20F1519 s_buffer_load_dword s31, s[20:23], 0x1a ; C20F951A s_buffer_load_dword s32, s[20:23], 0x1b ; C210151B v_mad_f32 v18, -0.5, v8, 0.5 ; D2820012 03C210F1 v_add_f32_e32 v32, -0.5, v18 ; 064024F1 v_mad_f32 v13, v32, v13, 0.5 ; D282000D 03C21B20 v_mad_f32 v32, 0.5, v5, 0.5 ; D2820020 03C20AF0 v_sub_f32_e32 v33, 1.0, v32 ; 084240F2 v_mul_f32_e32 v34, v14, v33 ; 1044430E v_mul_f32_e32 v33, v18, v33 ; 10424312 v_mac_f32_e32 v34, v17, v32 ; 3E444111 v_mac_f32_e32 v33, v13, v32 ; 3E42410D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v32, s29 ; 7E40021D v_mad_f32 v32, s31, v34, v32 ; D2820020 0482441F v_mov_b32_e32 v34, s30 ; 7E44021E v_mad_f32 v33, s32, v33, v34 ; D2820021 048A4220 v_sub_f32_e32 v34, s29, v32 ; 0844401D v_mad_f32 v34, 2.0, v34, s31 ; D2820022 007E44F4 v_sub_f32_e32 v35, s30, v33 ; 0846421E v_mad_f32 v35, 2.0, v35, s32 ; D2820023 008246F4 v_cmp_gt_f32_e64 s[8:9], 0, v34 ; D0080008 00024480 v_cmp_gt_f32_e64 s[10:11], 0, v35 ; D008000A 00024680 v_cndmask_b32_e64 v38, v38, v40, s[8:9] ; D2000026 00225126 v_cndmask_b32_e64 v39, v39, v41, s[10:11] ; D2000027 002A5327 v_mad_f32 v40, 0.5, -v5, 1.0 ; D2820028 43CA0AF0 v_mul_f32_e32 v41, v14, v40 ; 1052510E v_mul_f32_e32 v40, v18, v40 ; 10505112 v_mul_f32_e32 v14, 0.5, v5 ; 101C0AF0 v_rcp_f32_e32 v18, v34 ; 7E245522 v_mac_f32_e32 v41, v17, v14 ; 3E521D11 v_mac_f32_e32 v40, v13, v14 ; 3E501D0D v_subrev_f32_e32 v13, v32, v38 ; 0A1A4D20 v_mul_f32_e32 v13, v18, v13 ; 101A1B12 v_add_f32_e64 v17, 0, v13 clamp ; D2060811 00021A80 v_mac_f32_e32 v32, v34, v17 ; 3E402322 v_mad_f32 v14, v15, v23, v24 ; D282000E 04622F0F v_rcp_f32_e32 v18, v35 ; 7E245523 v_sub_f32_e32 v13, 1.0, v32 ; 081A40F2 v_mul_f32_e32 v13, v24, v13 ; 101A1B18 v_mac_f32_e32 v13, v14, v32 ; 3E1A410E v_subrev_f32_e32 v14, v33, v39 ; 0A1C4F21 v_mul_f32_e32 v14, v18, v14 ; 101C1D12 v_add_f32_e64 v18, 0, v14 clamp ; D2060812 00021C80 v_mac_f32_e32 v33, v35, v18 ; 3E422523 v_mad_f32 v23, v16, v25, v31 ; D2820017 047E3310 v_sub_f32_e32 v14, 1.0, v33 ; 081C42F2 v_mul_f32_e32 v14, v31, v14 ; 101C1D1F v_mac_f32_e32 v14, v23, v33 ; 3E1C4317 v_mov_b32_e32 v23, s29 ; 7E2E021D v_mad_f32 v23, s31, v41, v23 ; D2820017 045E521F v_mad_f32 v24, v15, v29, v27 ; D2820018 046E3B0F v_sub_f32_e32 v15, s29, v23 ; 081E2E1D v_mad_f32 v15, 2.0, v15, s31 ; D282000F 007E1EF4 v_mac_f32_e32 v23, v15, v17 ; 3E2E230F v_sub_f32_e32 v15, 1.0, v23 ; 081E2EF2 v_mul_f32_e32 v15, v27, v15 ; 101E1F1B v_mac_f32_e32 v15, v24, v23 ; 3E1E2F18 v_mov_b32_e32 v23, s30 ; 7E2E021E v_mad_f32 v23, s32, v40, v23 ; D2820017 045E5020 s_buffer_load_dword s8, s[24:27], 0x51 ; C2041951 s_buffer_load_dword s9, s[24:27], 0x50 ; C2049950 v_mad_f32 v24, v16, v30, v28 ; D2820018 04723D10 v_sub_f32_e32 v16, s30, v23 ; 08202E1E v_mad_f32 v16, 2.0, v16, s32 ; D2820010 008220F4 v_mac_f32_e32 v23, v16, v18 ; 3E2E2510 v_sub_f32_e32 v16, 1.0, v23 ; 08202EF2 v_mul_f32_e32 v16, v28, v16 ; 1020211C v_mac_f32_e32 v16, v24, v23 ; 3E202F18 v_mov_b32_e32 v23, s14 ; 7E2E020E v_mul_f32_e32 v23, s12, v23 ; 102E2E0C v_mov_b32_e32 v24, s15 ; 7E30020F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v24, v24, s8, -v23 ; D2820018 845C1118 v_mov_b32_e32 v23, s15 ; 7E2E020F v_mul_f32_e32 v23, s9, v23 ; 102E2E09 v_mov_b32_e32 v25, s16 ; 7E320210 v_mad_f32 v25, v25, s12, -v23 ; D2820019 845C1919 v_mov_b32_e32 v23, s16 ; 7E2E0210 v_mul_f32_e32 v23, s8, v23 ; 102E2E08 v_log_f32_e32 v21, v21 ; 7E2A4F15 v_log_f32_e32 v22, v22 ; 7E2C4F16 v_log_f32_e32 v19, v19 ; 7E264F13 v_mov_b32_e32 v27, s14 ; 7E36020E v_mad_f32 v27, v27, s9, -v23 ; D282001B 845C131B v_mul_legacy_f32_e32 v21, v20, v21 ; 0E2A2B14 v_mul_legacy_f32_e32 v22, v20, v22 ; 0E2C2D14 v_mul_legacy_f32_e32 v19, v20, v19 ; 0E262714 v_exp_f32_e32 v20, v21 ; 7E284B15 v_cndmask_b32_e64 v20, v26, v20, s[0:1] ; D2000014 0002291A v_exp_f32_e32 v21, v22 ; 7E2A4B16 v_cndmask_b32_e32 v21, v36, v21 ; 002A2B24 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e64 v19, v37, v19, s[2:3] ; D2000013 000A2725 v_mac_f32_e32 v9, s28, v20 ; 3E12281C v_mac_f32_e32 v10, s28, v21 ; 3E142A1C v_mac_f32_e32 v11, s28, v19 ; 3E16261C s_buffer_load_dword s31, s[24:27], 0x4c ; C20F994C s_buffer_load_dword s32, s[24:27], 0x4d ; C210194D s_buffer_load_dword s33, s[24:27], 0x4e ; C210994E s_buffer_load_dword s0, s[24:27], 0x5d ; C200195D s_buffer_load_dword s35, s[24:27], 0x80 ; C2119980 s_buffer_load_dword s34, s[24:27], 0x81 ; C2111981 s_buffer_load_dword s2, s[24:27], 0x82 ; C2011982 s_buffer_load_dword s3, s[24:27], 0x83 ; C2019983 s_buffer_load_dword s38, s[24:27], 0x84 ; C2131984 s_buffer_load_dword s36, s[24:27], 0x85 ; C2121985 s_buffer_load_dword s28, s[24:27], 0x86 ; C20E1986 s_buffer_load_dword s29, s[24:27], 0x87 ; C20E9987 s_buffer_load_dword s39, s[24:27], 0x88 ; C2139988 s_buffer_load_dword s37, s[24:27], 0x89 ; C2129989 s_buffer_load_dword s10, s[24:27], 0x8a ; C205198A s_buffer_load_dword s30, s[24:27], 0x8b ; C20F198B s_buffer_load_dword s41, s[24:27], 0x8c ; C214998C s_buffer_load_dword s40, s[24:27], 0x8d ; C214198D s_buffer_load_dword s11, s[24:27], 0x8e ; C205998E s_buffer_load_dword s24, s[24:27], 0x8f ; C20C198F v_mov_b32_e32 v19, 0x3e22f983 ; 7E2602FF 3E22F983 v_mul_f32_e32 v3, v19, v3 ; 10060713 v_mul_f32_e32 v4, v19, v4 ; 10080913 v_fract_f32_e32 v3, v3 ; 7E064103 v_fract_f32_e32 v4, v4 ; 7E084104 v_cos_f32_e32 v29, v3 ; 7E3A6D03 v_sin_f32_e32 v28, v3 ; 7E386B03 v_mul_f32_e32 v3, v24, v28 ; 10063918 v_mad_f32 v19, v29, s16, -v3 ; D2820013 840C211D v_mul_f32_e32 v3, v25, v28 ; 10063919 v_mad_f32 v20, v29, s14, -v3 ; D2820014 840C1D1D v_mul_f32_e32 v3, v27, v28 ; 1006391B v_mad_f32 v21, v29, s15, -v3 ; D2820015 840C1F1D s_buffer_load_dword s1, s[20:23], 0x12 ; C2009512 v_cos_f32_e32 v30, v4 ; 7E3C6D04 v_sub_f32_e32 v3, 1.0, v30 ; 08063CF2 v_mul_f32_e32 v22, v19, v19 ; 102C2713 v_mad_f32 v31, v22, v3, v30 ; D282001F 047A0716 v_mul_f32_e32 v22, v20, v20 ; 102C2914 v_mad_f32 v23, v22, v3, v30 ; D2820017 047A0716 v_mul_f32_e32 v22, v21, v21 ; 102C2B15 v_mad_f32 v22, v22, v3, v30 ; D2820016 047A0716 v_sin_f32_e32 v26, v4 ; 7E346B04 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[26:27], 0, s1 ; D002001A 00000280 v_mov_b32_e32 v4, s0 ; 7E080200 v_mov_b32_e32 v3, s19 ; 7E060213 s_and_saveexec_b64 s[26:27], s[26:27] ; BE9A241A s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v32, s31, v0 ; 0840001F v_sub_f32_e32 v33, s32, v1 ; 08420220 v_sub_f32_e32 v34, s33, v2 ; 08440421 v_mul_f32_e32 v32, v32, v32 ; 10404120 s_buffer_load_dword s0, s[20:23], 0x1 ; C2001501 s_buffer_load_dword s1, s[20:23], 0x2 ; C2009502 s_buffer_load_dword s19, s[20:23], 0x0 ; C2099500 s_buffer_load_dword s25, s[20:23], 0x3 ; C20C9503 v_mac_f32_e32 v32, v33, v33 ; 3E404321 v_mac_f32_e32 v32, v34, v34 ; 3E404522 v_rsq_f32_e32 v33, v32 ; 7E425D20 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v34, s0 ; 7E440200 v_sub_f32_e32 v34, s1, v34 ; 08444401 v_rcp_f32_e32 v34, v34 ; 7E445522 v_sqrt_f32_e32 v32, v32 ; 7E406720 v_mul_f32_e32 v33, v34, v33 ; 10424322 v_mul_f32_e32 v34, s0, v32 ; 10444000 v_mad_f32 v35, v32, s0, -v6 ; D2820023 84180120 v_mad_f32 v33, v35, v33, 1.0 ; D2820021 03CA4323 v_cmp_lt_f32_e32 vcc, v34, v6 ; 7C020D22 v_mul_f32_e32 v34, s1, v32 ; 10444001 v_mul_f32_e32 v35, s19, v32 ; 10464013 v_max_f32_e32 v35, v35, v6 ; 20460D23 v_cmp_lt_f32_e64 s[0:1], v34, v6 ; D0020000 00020D22 v_mul_f32_e32 v6, v9, v33 ; 100C4309 v_mul_f32_e32 v34, v10, v33 ; 1044430A v_mul_f32_e32 v36, v11, v33 ; 1048430B v_mul_f32_e32 v33, v12, v33 ; 1042430C v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_cndmask_b32_e64 v34, v34, 0, s[0:1] ; D2000022 00010122 v_cndmask_b32_e64 v36, v36, 0, s[0:1] ; D2000024 00010124 v_cndmask_b32_e64 v33, v33, 0, s[0:1] ; D2000021 00010121 v_cndmask_b32_e32 v9, v9, v6 ; 00120D09 v_cndmask_b32_e32 v10, v10, v34 ; 0014450A v_cndmask_b32_e32 v11, v11, v36 ; 0016490B v_cndmask_b32_e32 v12, v12, v33 ; 0018430C v_mul_f32_e32 v6, s25, v32 ; 100C4019 v_min_f32_e32 v6, v6, v35 ; 1E0C4706 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E v_mad_f32 v17, -2.0, v17, 1.0 ; D2820011 03CA22F5 v_mad_f32 v7, v17, v7, s18 ; D2820007 004A0F11 v_mad_f32 v17, -2.0, v18, 1.0 ; D2820011 03CA24F5 v_mad_f32 v8, v17, v8, s17 ; D2820008 00461111 v_mul_f32_e32 v17, v24, v29 ; 10223B18 v_mac_f32_e32 v17, s16, v28 ; 3E223810 v_mul_f32_e32 v18, v25, v29 ; 10243B19 v_mul_f32_e32 v24, v27, v29 ; 10303B1B v_mac_f32_e32 v18, s14, v28 ; 3E24380E v_mac_f32_e32 v24, s15, v28 ; 3E30380F v_sub_f32_e32 v25, 1.0, v30 ; 08323CF2 v_mul_f32_e32 v27, v31, v17 ; 1036231F s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708 v_mul_f32_e32 v28, v20, v19 ; 10382714 v_mul_f32_e32 v29, v21, v26 ; 103A3515 v_mul_f32_e32 v30, v21, v19 ; 103C2715 v_mov_b32_e32 v31, 0x3b808083 ; 7E3E02FF 3B808083 v_cmp_gt_f32_e32 vcc, v31, v12 ; 7C08191F v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mad_f32 v31, v25, v28, -v29 ; D282001F 84763919 v_mac_f32_e32 v29, v28, v25 ; 3E3A331C v_mul_f32_e32 v28, v20, v26 ; 10383514 v_mad_f32 v32, v30, v25, v28 ; D2820020 0472331E v_mad_f32 v28, v25, v30, -v28 ; D282001C 84723D19 v_mul_f32_e32 v26, v19, v26 ; 10343513 v_mul_f32_e32 v30, v21, v20 ; 103C2915 v_mad_f32 v33, v30, v25, -v26 ; D2820021 846A331E v_mac_f32_e32 v26, v25, v30 ; 3E343D19 v_mac_f32_e32 v27, v31, v18 ; 3E36251F v_mul_f32_e32 v25, v29, v17 ; 1032231D v_mac_f32_e32 v25, v23, v18 ; 3E322517 v_mul_f32_e32 v17, v28, v17 ; 1022231C v_mac_f32_e32 v17, v26, v18 ; 3E22251A v_mac_f32_e32 v27, v32, v24 ; 3E363120 v_mac_f32_e32 v25, v33, v24 ; 3E323121 v_mac_f32_e32 v17, v22, v24 ; 3E223116 v_mul_f32_e32 v18, v19, v8 ; 10241113 v_mul_f32_e32 v19, v20, v8 ; 10261114 v_mul_f32_e32 v8, v21, v8 ; 10101115 v_mac_f32_e32 v18, v7, v27 ; 3E243707 v_mac_f32_e32 v19, v7, v25 ; 3E263307 v_mov_b32_e32 v20, 0 ; 7E280280 v_mac_f32_e32 v8, v7, v17 ; 3E102307 v_mov_b32_e32 v21, v20 ; 7E2A0314 v_mad_f32 v0, v18, v6, v0 ; D2820000 04020D12 v_mad_f32 v1, v19, v6, v1 ; D2820001 04060D13 v_mac_f32_e32 v2, v8, v6 ; 3E040D08 v_mov_b32_e32 v22, v20 ; 7E2C0314 v_mov_b32_e32 v6, 0x33d6bf95 ; 7E0C02FF 33D6BF95 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[44:51], s[16:19] ; F0900700 008B1114 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v17 ; 3E002306 v_mac_f32_e32 v1, v6, v18 ; 3E022506 v_mac_f32_e32 v2, v6, v19 ; 3E042706 exp 15, 32, 0, 0, 0, v20, v20, v20, v20 ; F800020F 14141414 exp 15, 33, 0, 0, 0, v13, v14, v15, v16 ; F800021F 100F0E0D exp 15, 34, 0, 0, 0, v9, v10, v11, v12 ; F800022F 0C0B0A09 v_mul_f32_e32 v6, s38, v1 ; 100C0226 v_mac_f32_e32 v6, s35, v0 ; 3E0C0023 v_mac_f32_e32 v6, s39, v2 ; 3E0C0427 v_add_f32_e32 v6, s41, v6 ; 060C0C29 v_mul_f32_e32 v7, s36, v1 ; 100E0224 v_mac_f32_e32 v7, s34, v0 ; 3E0E0022 v_mac_f32_e32 v7, s37, v2 ; 3E0E0425 v_add_f32_e32 v7, s40, v7 ; 060E0E28 v_subrev_f32_e32 v8, s31, v0 ; 0A10001F s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v9, s32, v1 ; 0A120220 v_subrev_f32_e32 v10, s33, v2 ; 0A140421 v_subrev_f32_e32 v4, s13, v4 ; 0A08080D v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v11, s9, v8 ; 10161009 v_mac_f32_e32 v11, s8, v9 ; 3E161208 v_mac_f32_e32 v11, s12, v10 ; 3E16140C v_subrev_f32_e32 v12, s13, v11 ; 0A18160D v_mul_f32_e32 v4, v4, v12 ; 10081904 v_mul_f32_e32 v12, s29, v1 ; 1018021D v_mac_f32_e32 v12, s3, v0 ; 3E180003 v_mac_f32_e32 v12, s30, v2 ; 3E18041E v_add_f32_e32 v12, s24, v12 ; 06181818 exp 15, 35, 0, 0, 0, v6, v7, v4, v12 ; F800023F 0C040706 exp 15, 36, 0, 0, 0, v0, v1, v2, v11 ; F800024F 0B020100 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v4, v8, v8 ; 10081108 v_mac_f32_e32 v4, v9, v9 ; 3E081309 v_mac_f32_e32 v4, v10, v10 ; 3E08150A v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v11, s28, v1 ; 1016021C v_mac_f32_e32 v11, s2, v0 ; 3E160002 v_mac_f32_e32 v11, s10, v2 ; 3E16040A v_mad_f32 v2, s12, v3, v2 ; D2820002 040A060C v_mul_f32_e32 v8, v4, v8 ; 10101104 v_mul_f32_e32 v9, v4, v9 ; 10121304 v_mul_f32_e32 v4, v4, v10 ; 10081504 v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_xor_b32_e32 v8, v8, v10 ; 3A101508 v_xor_b32_e32 v9, v9, v10 ; 3A121509 v_xor_b32_e32 v4, v4, v10 ; 3A081504 exp 15, 37, 0, 0, 0, v8, v9, v4, v20 ; F800025F 14040908 v_mad_f32 v0, s9, v3, v0 ; D2820000 04020609 v_mad_f32 v1, s8, v3, v1 ; D2820001 04060608 v_xor_b32_e32 v3, v7, v10 ; 3A061507 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v4, s28, v1 ; 1008021C v_mul_f32_e32 v1, s29, v1 ; 1002021D v_mac_f32_e32 v1, s3, v0 ; 3E020003 v_mac_f32_e32 v1, s30, v2 ; 3E02041E v_add_f32_e32 v1, s24, v1 ; 06020218 v_rcp_f32_e32 v1, v1 ; 7E025501 v_mac_f32_e32 v4, s2, v0 ; 3E080002 v_mac_f32_e32 v4, s10, v2 ; 3E08040A v_add_f32_e32 v0, s11, v4 ; 0600080B v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mul_f32_e32 v0, v12, v0 ; 1000010C v_add_f32_e32 v1, s11, v11 ; 0602160B v_min_f32_e32 v1, 0x3a83126f, v1 ; 1E0202FF 3A83126F v_max_f32_e32 v0, v1, v0 ; 20000101 exp 15, 38, 0, 0, 0, v5, v20, v20, v20 ; F800026F 14141405 v_mad_f32 v0, 2.0, v0, -v12 ; D2820000 843200F4 exp 15, 12, 0, 0, 0, v6, v3, v0, v12 ; F80000CF 0C000306 exp 15, 13, 0, 1, 0, v20, v20, v20, v20 ; F80008DF 14141414 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 64 VGPRS: 44 Code Size: 2160 bytes LDS: 0 blocks Scratch: 0 bytes per wave ******************** SHADER KEY instance_divisors = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], ARRAY(1), CLIPDIST DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL OUT[6], GENERIC[4] DCL OUT[7], GENERIC[5] DCL SAMP[0] DCL SAMP[1] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL CONST[1][0] DCL CONST[2][0..15] DCL CONST[3][0..13] DCL CONST[4][0..41] DCL CONST[5][0..7] DCL CONST[6][0] DCL TEMP[0], LOCAL DCL TEMP[1], ARRAY(1), LOCAL DCL TEMP[2..14], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 3.0000} IMM[1] UINT32 {0, 4, 32, 96} IMM[2] FLT32 { 2.0000, -2.0000, 1.0000, 0.0774} IMM[3] FLT32 { 0.9479, 0.0521, 2.4000, 0.0404} IMM[4] UINT32 {112, 64, 3, 320} IMM[5] FLT32 { 0.9000, 1.0000, 0.0000, 0.0039} IMM[6] UINT32 {48, 304, 512, 528} IMM[7] FLT32 { 0.0000, 0.0010, 0.0000, 0.0000} IMM[8] UINT32 {544, 560, 516, 532} IMM[9] UINT32 {548, 564, 524, 540} IMM[10] UINT32 {556, 572, 520, 536} IMM[11] UINT32 {552, 568, 364, 372} 0: MOV TEMP[0].yzw, TEMP[1].zyzw 1: MOV TEMP[0].x, IMM[0].xxxx 2: MOV TEMP[1], TEMP[0] 3: MAD TEMP[0].x, IN[0].xxxx, IMM[0].yyyy, IMM[0].yyyy 4: MAD TEMP[2].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].yyyy 5: MOV TEMP[3].x, TEMP[0].xxxx 6: MOV TEMP[3].y, TEMP[2].xxxx 7: MOV TEMP[3].z, TEMP[0].xxxx 8: MOV TEMP[3].w, TEMP[2].xxxx 9: RCP TEMP[0].x, CONST[1][0].yyyy 10: MUL TEMP[2].x, IN[1].xxxx, IMM[0].wwww 11: FSLT TEMP[4].x, IN[1].xxxx, CONST[1][0].wwww 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[2].x, TEMP[2].xxxx 14: ELSE :0 15: MOV TEMP[2].x, IMM[0].xxxx 16: ENDIF 17: MAD TEMP[2].x, CONST[1][0].zzzz, TEMP[2].xxxx, CONST[1][0].xxxx 18: MOV TEMP[4].x, IMM[0].xxxx 19: MOV TEMP[4].y, TEMP[2].xxxx 20: MUL TEMP[5].x, IMM[0].yyyy, CONST[1][0].yyyy 21: MUL TEMP[6].x, IN[1].zzzz, CONST[5][2].xxxx 22: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx 23: FRC TEMP[6].x, TEMP[5].xxxx 24: FLR TEMP[5].x, TEMP[5].xxxx 25: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx 26: ADD TEMP[7].x, CONST[1][0].yyyy, IMM[2].yyyy 27: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 28: RCP TEMP[8].x, CONST[1][0].yyyy 29: MUL TEMP[8].x, TEMP[5].xxxx, TEMP[8].xxxx 30: FLR TEMP[8].x, TEMP[8].xxxx 31: MUL TEMP[8].x, CONST[1][0].yyyy, TEMP[8].xxxx 32: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[8].xxxx 33: MOV TEMP[4].xy, TEMP[4].xyyy 34: MOV TEMP[4].w, IMM[0].xxxx 35: TXL TEMP[4].z, TEMP[4], SAMP[0], 2D 36: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[4].zzzz 37: UIF TEMP[4].xxxx :0 38: MOV TEMP[4].x, TEMP[7].xxxx 39: ELSE :0 40: MOV TEMP[4].x, TEMP[5].xxxx 41: ENDIF 42: MUL TEMP[4].x, TEMP[0].xxxx, TEMP[4].xxxx 43: MOV TEMP[5].x, TEMP[4].xxxx 44: MOV TEMP[5].y, TEMP[2].xxxx 45: MOV TEMP[5].xy, TEMP[5].xyyy 46: MOV TEMP[5].w, IMM[0].xxxx 47: TXL TEMP[5].xw, TEMP[5], SAMP[0], 2D 48: LRP TEMP[5].x, TEMP[6].xxxx, TEMP[5].wwww, TEMP[5].xxxx 49: ADD TEMP[6].x, TEMP[2].xxxx, CONST[1][0].zzzz 50: MOV TEMP[7].x, TEMP[4].xxxx 51: MOV TEMP[7].y, TEMP[6].xxxx 52: MOV TEMP[7].xy, TEMP[7].xyyy 53: MOV TEMP[7].w, IMM[0].xxxx 54: TXL TEMP[7], TEMP[7], SAMP[0], 2D 55: MOV TEMP[8].x, TEMP[4].xxxx 56: MAD TEMP[2].x, IMM[2].xxxx, CONST[1][0].zzzz, TEMP[2].xxxx 57: MOV TEMP[8].y, TEMP[2].xxxx 58: MOV TEMP[2].xy, TEMP[8].xyyy 59: MOV TEMP[2].w, IMM[0].xxxx 60: TXL TEMP[2], TEMP[2], SAMP[0], 2D 61: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx 62: MOV TEMP[0].y, TEMP[6].xxxx 63: MOV TEMP[0].xy, TEMP[0].xyyy 64: MOV TEMP[0].w, IMM[0].xxxx 65: TXL TEMP[0], TEMP[0], SAMP[0], 2D 66: ADD TEMP[4].xy, TEMP[0].zwww, -TEMP[0].xyyy 67: ADD TEMP[6].xy, TEMP[7].zwww, -TEMP[7].xyyy 68: RCP TEMP[8].x, TEMP[4].xxxx 69: RCP TEMP[8].y, TEMP[4].yyyy 70: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[8].xyyy 71: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[6].xyyy 72: ADD TEMP[7].xy, TEMP[7].xyyy, -TEMP[8].xyyy 73: ADD TEMP[6].xy, TEMP[7].xyyy, TEMP[6].xyyy 74: ADD TEMP[8].xy, TEMP[2].zwww, -TEMP[2].xyyy 75: RCP TEMP[9].x, TEMP[4].xxxx 76: RCP TEMP[9].y, TEMP[4].yyyy 77: MUL TEMP[4].xy, TEMP[8].xyyy, TEMP[9].xyyy 78: MUL TEMP[8].xy, TEMP[0].xyyy, TEMP[4].xyyy 79: ADD TEMP[2].xy, TEMP[2].xyyy, -TEMP[8].xyyy 80: ADD TEMP[4].xy, TEMP[2].xyyy, TEMP[4].xyyy 81: MOV TEMP[8].y, IMM[0].xxxx 82: MOV TEMP[8].x, TEMP[5].xxxx 83: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx 84: ADD TEMP[9].x, IMM[0].yyyy, TEMP[5].xxxx 85: MOV TEMP[10].x, TEMP[9].xxxx 86: MOV TEMP[10].y, TEMP[9].xxxx 87: MOV TEMP[10].z, TEMP[5].xxxx 88: MOV TEMP[10].w, TEMP[5].xxxx 89: ADD TEMP[5], TEMP[3], IMM[0].zzzz 90: RCP TEMP[9].x, CONST[5][2].zzzz 91: MOV_SAT TEMP[9].x, TEMP[9].xxxx 92: MAD TEMP[5], TEMP[5], TEMP[9].xxxx, IMM[0].yyyy 93: LRP TEMP[3], TEMP[10], TEMP[5], TEMP[3] 94: MAD TEMP[5].x, CONST[5][6].zzzz, TEMP[3].xxxx, CONST[5][6].xxxx 95: MAD TEMP[9].x, CONST[5][6].wwww, TEMP[3].yyyy, CONST[5][6].yyyy 96: MOV TEMP[10].x, TEMP[5].xxxx 97: MOV TEMP[10].y, TEMP[9].xxxx 98: MAD TEMP[11].x, CONST[5][6].zzzz, TEMP[3].zzzz, CONST[5][6].xxxx 99: MOV TEMP[10].z, TEMP[11].xxxx 100: MAD TEMP[3].x, CONST[5][6].wwww, TEMP[3].wwww, CONST[5][6].yyyy 101: MOV TEMP[10].w, TEMP[3].xxxx 102: ADD TEMP[3], CONST[5][6].xyxy, -TEMP[10] 103: MAD TEMP[3], TEMP[3], IMM[2].xxxx, CONST[5][6].zwzw 104: FSLT TEMP[11].x, TEMP[3].xxxx, IMM[0].xxxx 105: UIF TEMP[11].xxxx :0 106: MOV TEMP[11].x, TEMP[0].zzzz 107: ELSE :0 108: MOV TEMP[11].x, TEMP[0].xxxx 109: ENDIF 110: FSLT TEMP[12].x, TEMP[3].yyyy, IMM[0].xxxx 111: UIF TEMP[12].xxxx :0 112: MOV TEMP[12].x, TEMP[0].wwww 113: ELSE :0 114: MOV TEMP[12].x, TEMP[0].yyyy 115: ENDIF 116: MOV TEMP[0].x, TEMP[11].xxxx 117: MOV TEMP[0].y, TEMP[12].xxxx 118: MOV TEMP[5].x, TEMP[5].xxxx 119: MOV TEMP[5].y, TEMP[9].xxxx 120: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[5].xyyy 121: RCP TEMP[5].x, TEMP[3].xxxx 122: RCP TEMP[5].y, TEMP[3].yyyy 123: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xyyy 124: MOV_SAT TEMP[0].xy, TEMP[0].xyyy 125: MAD TEMP[3], TEMP[0].xyxy, TEMP[3], TEMP[10] 126: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[2].xxxx 127: ADD TEMP[0].xy, IMM[2].zzzz, -TEMP[0].xyyy 128: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].xyyy 129: LRP TEMP[5].xy, TEMP[3].xyyy, TEMP[6].xyyy, TEMP[7].xyyy 130: LRP TEMP[2].xy, TEMP[3].zwww, TEMP[4].xyyy, TEMP[2].xyyy 131: MOV TEMP[3].x, TEMP[5].xxxx 132: MOV TEMP[3].y, TEMP[5].yyyy 133: MOV TEMP[3].z, TEMP[2].xxxx 134: MOV TEMP[3].w, TEMP[2].yyyy 135: MUL TEMP[2].xyz, IN[2].xyzz, IMM[2].wwww 136: MAD TEMP[4].xyz, IN[2].xyzz, IMM[3].xxxx, IMM[3].yyyy 137: POW TEMP[5].x, TEMP[4].xxxx, IMM[3].zzzz 138: POW TEMP[5].y, TEMP[4].yyyy, IMM[3].zzzz 139: POW TEMP[5].z, TEMP[4].zzzz, IMM[3].zzzz 140: FSLT TEMP[4].x, IMM[3].wwww, IN[2].xxxx 141: UIF TEMP[4].xxxx :0 142: MOV TEMP[4].x, TEMP[5].xxxx 143: ELSE :0 144: MOV TEMP[4].x, TEMP[2].xxxx 145: ENDIF 146: FSLT TEMP[6].x, IMM[3].wwww, IN[2].yyyy 147: UIF TEMP[6].xxxx :0 148: MOV TEMP[6].x, TEMP[5].yyyy 149: ELSE :0 150: MOV TEMP[6].x, TEMP[2].yyyy 151: ENDIF 152: FSLT TEMP[7].x, IMM[3].wwww, IN[2].zzzz 153: UIF TEMP[7].xxxx :0 154: MOV TEMP[5].x, TEMP[5].zzzz 155: ELSE :0 156: MOV TEMP[5].x, TEMP[2].zzzz 157: ENDIF 158: MOV TEMP[2].x, TEMP[4].xxxx 159: MOV TEMP[2].y, TEMP[6].xxxx 160: MOV TEMP[2].z, TEMP[5].xxxx 161: MOV TEMP[2].w, IN[2].wwww 162: LRP TEMP[2], CONST[5][7].xxxx, TEMP[2], IN[2] 163: ABS TEMP[4].x, IN[5].zzzz 164: FSLT TEMP[4].x, IMM[5].xxxx, TEMP[4].xxxx 165: UIF TEMP[4].xxxx :0 166: MOV TEMP[4].xyz, IMM[5].yzzy 167: ELSE :0 168: MOV TEMP[4].xyz, IMM[5].zzyz 169: ENDIF 170: MUL TEMP[5].xyz, IN[5].zxyy, TEMP[4].yzxx 171: MAD TEMP[4].xyz, IN[5].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 172: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 173: RSQ TEMP[5].x, TEMP[5].xxxx 174: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 175: MUL TEMP[5].xyz, IN[5].zxyy, TEMP[4].yzxx 176: MAD TEMP[5].xyz, IN[5].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz 177: MOV TEMP[6].x, -CONST[5][4].xxxx 178: DP3 TEMP[7].x, IN[5].xyzz, IN[5].xyzz 179: RSQ TEMP[7].x, TEMP[7].xxxx 180: MUL TEMP[7].xyz, IN[5].xyzz, TEMP[7].xxxx 181: DP3 TEMP[7].x, TEMP[7].xyzz, CONST[4][20].xyzz 182: ABS TEMP[7].x, TEMP[7].xxxx 183: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx 184: ADD TEMP[6].x, CONST[5][4].yyyy, TEMP[6].xxxx 185: RCP TEMP[6].x, TEMP[6].xxxx 186: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx 187: MOV_SAT TEMP[6].x, TEMP[6].xxxx 188: MOV TEMP[7].xyz, TEMP[2].xyzx 189: MUL TEMP[9].x, IMM[2].xxxx, TEMP[6].xxxx 190: ADD TEMP[9].x, IMM[0].wwww, -TEMP[9].xxxx 191: MUL TEMP[9].x, TEMP[6].xxxx, TEMP[9].xxxx 192: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[9].xxxx 193: ADD TEMP[6].x, IMM[2].zzzz, -TEMP[6].xxxx 194: MUL TEMP[2].x, TEMP[6].xxxx, TEMP[2].wwww 195: MOV TEMP[7].w, TEMP[2].xxxx 196: COS TEMP[2].x, IN[4].xxxx 197: SIN TEMP[6].x, IN[4].xxxx 198: MUL TEMP[9].xyz, TEMP[2].xxxx, TEMP[5].xyzz 199: MAD TEMP[9].xyz, TEMP[6].xxxx, TEMP[4].xyzz, TEMP[9].xyzz 200: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[5].xyzz 201: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, -TEMP[5].xyzz 202: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx 203: MUL TEMP[5].x, TEMP[2].yyyy, TEMP[2].yyyy 204: MUL TEMP[6].x, TEMP[2].zzzz, TEMP[2].zzzz 205: MOV TEMP[10].yz, IMM[0].xxxx 206: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[4].xxxx 207: ADD TEMP[10].x, TEMP[11].xxxx, TEMP[4].xxxx 208: MOV TEMP[4].x, IMM[0].xxxx 209: MOV TEMP[11].y, IMM[0].xxxx 210: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[5].xxxx 211: ADD TEMP[11].x, TEMP[12].xxxx, TEMP[5].xxxx 212: MOV TEMP[4].yz, TEMP[11].yxyy 213: MOV TEMP[5].xy, IMM[0].xxxx 214: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[6].xxxx 215: ADD TEMP[6].x, TEMP[11].xxxx, TEMP[6].xxxx 216: MOV TEMP[5].z, TEMP[6].xxxx 217: DP3 TEMP[6].x, TEMP[9].xyzz, TEMP[10].xyzz 218: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[4].xyzz 219: MOV TEMP[6].y, TEMP[11].xxxx 220: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[5].xyzz 221: MOV TEMP[6].z, TEMP[9].xxxx 222: DP3 TEMP[9].x, IN[5].xyzz, TEMP[10].xyzz 223: DP3 TEMP[4].x, IN[5].xyzz, TEMP[4].xyzz 224: MOV TEMP[9].y, TEMP[4].xxxx 225: DP3 TEMP[4].x, IN[5].xyzz, TEMP[5].xyzz 226: MOV TEMP[9].z, TEMP[4].xxxx 227: ADD TEMP[4].x, CONST[5][3].zzzz, TEMP[0].yyyy 228: ADD TEMP[0].x, CONST[5][3].yyyy, TEMP[0].xxxx 229: MUL TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx 230: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[0].xyzz 231: ADD TEMP[2].xyz, CONST[4][19].xyzz, -IN[3].xyzz 232: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz 233: SQRT TEMP[2].x, TEMP[2].xxxx 234: MOV TEMP[4], TEMP[7] 235: MOV TEMP[5].x, IN[4].wwww 236: FSLT TEMP[6].x, IMM[0].xxxx, CONST[5][4].zzzz 237: UIF TEMP[6].xxxx :0 238: MUL TEMP[6].x, TEMP[2].xxxx, CONST[5][0].yyyy 239: FSLT TEMP[10].x, TEMP[6].xxxx, IN[4].wwww 240: ADD TEMP[6].x, IN[4].wwww, -TEMP[6].xxxx 241: ADD TEMP[11].x, CONST[5][0].zzzz, -CONST[5][0].yyyy 242: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[2].xxxx 243: RCP TEMP[11].x, TEMP[11].xxxx 244: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx 245: ADD TEMP[6].x, IMM[2].zzzz, -TEMP[6].xxxx 246: MUL TEMP[6], TEMP[6].xxxx, TEMP[7] 247: MUL TEMP[11].x, TEMP[2].xxxx, CONST[5][0].zzzz 248: FSLT TEMP[11].x, TEMP[11].xxxx, IN[4].wwww 249: UIF TEMP[11].xxxx :0 250: MOV TEMP[11], IMM[0].xxxx 251: ELSE :0 252: MOV TEMP[11], TEMP[6] 253: ENDIF 254: UIF TEMP[10].xxxx :0 255: MOV TEMP[6], TEMP[11] 256: ELSE :0 257: MOV TEMP[6], TEMP[7] 258: ENDIF 259: MOV TEMP[4], TEMP[6] 260: MUL TEMP[6].x, TEMP[2].xxxx, CONST[5][0].xxxx 261: MAX TEMP[6].x, IN[4].wwww, TEMP[6].xxxx 262: MUL TEMP[2].x, TEMP[2].xxxx, CONST[5][0].wwww 263: MIN TEMP[5].x, TEMP[6].xxxx, TEMP[2].xxxx 264: ENDIF 265: FSLT TEMP[2].x, TEMP[4].wwww, IMM[5].wwww 266: UIF TEMP[2].xxxx :0 267: MOV TEMP[2].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[2].x, TEMP[5].xxxx 270: ENDIF 271: MOV TEMP[5].xy, IMM[0].xxxx 272: MOV TEMP[5].w, IMM[0].xxxx 273: TXL TEMP[5], TEMP[5], SAMP[1], 2D 274: MUL TEMP[5].xyz, TEMP[5], IMM[7].xxxx 275: MAD TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, IN[3].xyzz 276: ADD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xyzz 277: MOV TEMP[2].w, IMM[2].zzzz 278: MOV TEMP[2].x, TEMP[0].xxxx 279: MOV TEMP[2].y, TEMP[0].yyyy 280: MOV TEMP[2].z, TEMP[0].zzzz 281: MOV TEMP[5].x, CONST[4][32].xxxx 282: MOV TEMP[5].y, CONST[4][33].xxxx 283: MOV TEMP[5].z, CONST[4][34].xxxx 284: MOV TEMP[5].w, CONST[4][35].xxxx 285: DP4 TEMP[5].x, TEMP[2], TEMP[5] 286: MOV TEMP[6].x, CONST[4][32].yyyy 287: MOV TEMP[6].y, CONST[4][33].yyyy 288: MOV TEMP[6].z, CONST[4][34].yyyy 289: MOV TEMP[6].w, CONST[4][35].yyyy 290: DP4 TEMP[6].x, TEMP[2], TEMP[6] 291: MOV TEMP[7].x, CONST[4][32].wwww 292: MOV TEMP[7].y, CONST[4][33].wwww 293: MOV TEMP[7].z, CONST[4][34].wwww 294: MOV TEMP[7].w, CONST[4][35].wwww 295: DP4 TEMP[7].x, TEMP[2], TEMP[7] 296: MAD TEMP[10].xyz, CONST[4][20].xyzz, CONST[5][3].xxxx, TEMP[0].xyzz 297: MOV TEMP[11].w, IMM[2].zzzz 298: MOV TEMP[11].x, TEMP[10].xxxx 299: MOV TEMP[11].y, TEMP[10].yyyy 300: MOV TEMP[11].z, TEMP[10].zzzz 301: MOV TEMP[10].x, CONST[4][32].zzzz 302: MOV TEMP[10].y, CONST[4][33].zzzz 303: MOV TEMP[10].z, CONST[4][34].zzzz 304: MOV TEMP[10].w, CONST[4][35].zzzz 305: MOV TEMP[12].x, CONST[4][32].wwww 306: MOV TEMP[12].y, CONST[4][33].wwww 307: MOV TEMP[12].z, CONST[4][34].wwww 308: MOV TEMP[12].w, CONST[4][35].wwww 309: MOV TEMP[13].x, CONST[4][32].zzzz 310: MOV TEMP[13].y, CONST[4][33].zzzz 311: MOV TEMP[13].z, CONST[4][34].zzzz 312: MOV TEMP[13].w, CONST[4][35].zzzz 313: DP4 TEMP[10].x, TEMP[11], TEMP[10] 314: DP4 TEMP[11].x, TEMP[11], TEMP[12] 315: RCP TEMP[11].x, TEMP[11].xxxx 316: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 317: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[7].xxxx 318: DP4 TEMP[2].x, TEMP[2], TEMP[13] 319: MIN TEMP[2].x, IMM[7].yyyy, TEMP[2].xxxx 320: MAX TEMP[2].x, TEMP[10].xxxx, TEMP[2].xxxx 321: MOV TEMP[10].xyz, -CONST[4][19].xyzx 322: ADD TEMP[11].xyz, TEMP[0].xyzz, TEMP[10].xyzz 323: MOV TEMP[12].x, TEMP[0].xxxx 324: MOV TEMP[12].y, TEMP[0].yyyy 325: MOV TEMP[12].z, TEMP[0].zzzz 326: DP3 TEMP[13].x, CONST[4][20].xyzz, TEMP[11].xyzz 327: MOV TEMP[12].w, TEMP[13].xxxx 328: MOV TEMP[13].x, TEMP[5].xxxx 329: MOV TEMP[13].y, TEMP[6].xxxx 330: MOV TEMP[14].x, -CONST[4][22].wwww 331: DP3 TEMP[11].x, TEMP[11].xyzz, CONST[4][20].xyzz 332: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 333: ADD TEMP[14].x, CONST[4][23].yyyy, TEMP[14].xxxx 334: RCP TEMP[14].x, TEMP[14].xxxx 335: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx 336: MOV TEMP[13].z, TEMP[11].xxxx 337: MOV TEMP[13].w, TEMP[7].xxxx 338: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[10].xyzz 339: DP3 TEMP[0].x, TEMP[9].xyzz, TEMP[0].xyzz 340: FSLT TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 341: UIF TEMP[0].xxxx :0 342: MOV TEMP[0].xyz, TEMP[9].xyzx 343: ELSE :0 344: MOV TEMP[0].xyz, -TEMP[9].xyzx 345: ENDIF 346: MOV TEMP[5].x, TEMP[5].xxxx 347: MOV TEMP[5].y, -TEMP[6].xxxx 348: MAD TEMP[2].x, IMM[2].xxxx, TEMP[2].xxxx, -TEMP[7].xxxx 349: MOV TEMP[5].z, TEMP[2].xxxx 350: MOV TEMP[5].w, TEMP[7].xxxx 351: MOV OUT[1], TEMP[1] 352: MOV OUT[6].xyz, TEMP[0].xyzx 353: MOV OUT[2], TEMP[3] 354: MOV OUT[3], TEMP[4] 355: MOV OUT[4], TEMP[13] 356: MOV OUT[0], TEMP[5] 357: MOV OUT[5], TEMP[12] 358: MOV OUT[7].xy, TEMP[8].xyxx 359: END ; ModuleID = 'tgsi' define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1 %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 4 %18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0 %19 = call float @llvm.SI.load.const(<16 x i8> %18, i32 304) %20 = call float @llvm.SI.load.const(<16 x i8> %18, i32 308) %21 = call float @llvm.SI.load.const(<16 x i8> %18, i32 312) %22 = call float @llvm.SI.load.const(<16 x i8> %18, i32 320) %23 = call float @llvm.SI.load.const(<16 x i8> %18, i32 324) %24 = call float @llvm.SI.load.const(<16 x i8> %18, i32 328) %25 = call float @llvm.SI.load.const(<16 x i8> %18, i32 364) %26 = call float @llvm.SI.load.const(<16 x i8> %18, i32 372) %27 = call float @llvm.SI.load.const(<16 x i8> %18, i32 512) %28 = call float @llvm.SI.load.const(<16 x i8> %18, i32 516) %29 = call float @llvm.SI.load.const(<16 x i8> %18, i32 520) %30 = call float @llvm.SI.load.const(<16 x i8> %18, i32 524) %31 = call float @llvm.SI.load.const(<16 x i8> %18, i32 528) %32 = call float @llvm.SI.load.const(<16 x i8> %18, i32 532) %33 = call float @llvm.SI.load.const(<16 x i8> %18, i32 536) %34 = call float @llvm.SI.load.const(<16 x i8> %18, i32 540) %35 = call float @llvm.SI.load.const(<16 x i8> %18, i32 544) %36 = call float @llvm.SI.load.const(<16 x i8> %18, i32 548) %37 = call float @llvm.SI.load.const(<16 x i8> %18, i32 552) %38 = call float @llvm.SI.load.const(<16 x i8> %18, i32 556) %39 = call float @llvm.SI.load.const(<16 x i8> %18, i32 560) %40 = call float @llvm.SI.load.const(<16 x i8> %18, i32 564) %41 = call float @llvm.SI.load.const(<16 x i8> %18, i32 568) %42 = call float @llvm.SI.load.const(<16 x i8> %18, i32 572) %43 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 5 %44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0 %45 = call float @llvm.SI.load.const(<16 x i8> %44, i32 4) %46 = call float @llvm.SI.load.const(<16 x i8> %44, i32 8) %47 = call float @llvm.SI.load.const(<16 x i8> %44, i32 32) %48 = call float @llvm.SI.load.const(<16 x i8> %44, i32 40) %49 = call float @llvm.SI.load.const(<16 x i8> %44, i32 48) %50 = call float @llvm.SI.load.const(<16 x i8> %44, i32 52) %51 = call float @llvm.SI.load.const(<16 x i8> %44, i32 56) %52 = call float @llvm.SI.load.const(<16 x i8> %44, i32 64) %53 = call float @llvm.SI.load.const(<16 x i8> %44, i32 68) %54 = call float @llvm.SI.load.const(<16 x i8> %44, i32 72) %55 = call float @llvm.SI.load.const(<16 x i8> %44, i32 96) %56 = call float @llvm.SI.load.const(<16 x i8> %44, i32 100) %57 = call float @llvm.SI.load.const(<16 x i8> %44, i32 104) %58 = call float @llvm.SI.load.const(<16 x i8> %44, i32 108) %59 = call float @llvm.SI.load.const(<16 x i8> %44, i32 112) %60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0 %61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0 %62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0 %63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0 %64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1 %65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)* %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0 %67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1 %68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)* %69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0 %70 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0 %71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1 %77 = load <16 x i8>, <16 x i8> addrspace(2)* %76, align 16, !tbaa !0 %78 = add i32 %10, %6 %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %78) %80 = extractelement <4 x float> %79, i32 0 %81 = extractelement <4 x float> %79, i32 2 %82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2 %83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0 %84 = add i32 %10, %6 %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84) %86 = extractelement <4 x float> %85, i32 0 %87 = extractelement <4 x float> %85, i32 1 %88 = extractelement <4 x float> %85, i32 2 %89 = extractelement <4 x float> %85, i32 3 %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0 %92 = add i32 %10, %6 %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 1 %96 = extractelement <4 x float> %93, i32 2 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0 %99 = add i32 %10, %6 %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99) %101 = extractelement <4 x float> %100, i32 0 %102 = extractelement <4 x float> %100, i32 3 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 5 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0 %105 = add i32 %10, %6 %106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105) %107 = extractelement <4 x float> %106, i32 0 %108 = extractelement <4 x float> %106, i32 1 %109 = extractelement <4 x float> %106, i32 2 %110 = fmul float %74, 5.000000e-01 %111 = fadd float %110, 5.000000e-01 %112 = fmul float %75, -5.000000e-01 %113 = fadd float %112, 5.000000e-01 %114 = fdiv float 1.000000e+00, %14 %115 = fmul float %80, 3.000000e+00 %116 = fcmp olt float %80, %16 %. = select i1 %116, float %115, float 0.000000e+00 %117 = fmul float %15, %. %118 = fadd float %117, %13 %119 = fmul float %14, 5.000000e-01 %120 = fmul float %81, %47 %121 = fmul float %119, %120 %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = call float @floor(float %121) %124 = fmul float %123, 2.000000e+00 %125 = fadd float %14, -2.000000e+00 %126 = call float @llvm.minnum.f32(float %125, float %124) %127 = fdiv float 1.000000e+00, %14 %128 = fmul float %124, %127 %129 = call float @floor(float %128) %130 = fmul float %14, %129 %131 = fsub float %124, %130 %132 = bitcast float %118 to i32 %133 = insertelement <4 x i32> , i32 %132, i32 1 %134 = insertelement <4 x i32> %133, i32 0, i32 2 %135 = bitcast <8 x i32> %61 to <32 x i8> %136 = bitcast <4 x i32> %63 to <16 x i8> %137 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %135, <16 x i8> %136, i32 2) %138 = extractelement <4 x float> %137, i32 2 %139 = fcmp ogt float %138, 0.000000e+00 %temp16.0 = select i1 %139, float %126, float %131 %140 = fmul float %114, %temp16.0 %141 = bitcast float %140 to i32 %142 = bitcast float %118 to i32 %143 = insertelement <4 x i32> undef, i32 %141, i32 0 %144 = insertelement <4 x i32> %143, i32 %142, i32 1 %145 = insertelement <4 x i32> %144, i32 0, i32 2 %146 = bitcast <8 x i32> %61 to <32 x i8> %147 = bitcast <4 x i32> %63 to <16 x i8> %148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %145, <32 x i8> %146, <16 x i8> %147, i32 2) %149 = extractelement <4 x float> %148, i32 0 %150 = extractelement <4 x float> %148, i32 3 %151 = call float @llvm.AMDGPU.lrp(float %122, float %150, float %149) %152 = fadd float %118, %15 %153 = bitcast float %140 to i32 %154 = bitcast float %152 to i32 %155 = insertelement <4 x i32> undef, i32 %153, i32 0 %156 = insertelement <4 x i32> %155, i32 %154, i32 1 %157 = insertelement <4 x i32> %156, i32 0, i32 2 %158 = bitcast <8 x i32> %61 to <32 x i8> %159 = bitcast <4 x i32> %63 to <16 x i8> %160 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = extractelement <4 x float> %160, i32 1 %163 = extractelement <4 x float> %160, i32 2 %164 = extractelement <4 x float> %160, i32 3 %165 = fmul float %15, 2.000000e+00 %166 = fadd float %165, %118 %167 = bitcast float %140 to i32 %168 = bitcast float %166 to i32 %169 = insertelement <4 x i32> undef, i32 %167, i32 0 %170 = insertelement <4 x i32> %169, i32 %168, i32 1 %171 = insertelement <4 x i32> %170, i32 0, i32 2 %172 = bitcast <8 x i32> %61 to <32 x i8> %173 = bitcast <4 x i32> %63 to <16 x i8> %174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = extractelement <4 x float> %174, i32 3 %179 = fadd float %114, %140 %180 = bitcast float %179 to i32 %181 = bitcast float %152 to i32 %182 = insertelement <4 x i32> undef, i32 %180, i32 0 %183 = insertelement <4 x i32> %182, i32 %181, i32 1 %184 = insertelement <4 x i32> %183, i32 0, i32 2 %185 = bitcast <8 x i32> %61 to <32 x i8> %186 = bitcast <4 x i32> %63 to <16 x i8> %187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %184, <32 x i8> %185, <16 x i8> %186, i32 2) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = extractelement <4 x float> %187, i32 3 %192 = fsub float %190, %188 %193 = fsub float %191, %189 %194 = fsub float %163, %161 %195 = fsub float %164, %162 %196 = fdiv float 1.000000e+00, %192 %197 = fdiv float 1.000000e+00, %193 %198 = fmul float %194, %196 %199 = fmul float %195, %197 %200 = fmul float %188, %198 %201 = fmul float %189, %199 %202 = fsub float %161, %200 %203 = fsub float %162, %201 %204 = fadd float %202, %198 %205 = fadd float %203, %199 %206 = fsub float %177, %175 %207 = fsub float %178, %176 %208 = fdiv float 1.000000e+00, %192 %209 = fdiv float 1.000000e+00, %193 %210 = fmul float %206, %208 %211 = fmul float %207, %209 %212 = fmul float %188, %210 %213 = fmul float %189, %211 %214 = fsub float %175, %212 %215 = fsub float %176, %213 %216 = fadd float %214, %210 %217 = fadd float %215, %211 %218 = fmul float %151, 5.000000e-01 %219 = fadd float %218, 5.000000e-01 %220 = fadd float %111, -5.000000e-01 %221 = fadd float %113, -5.000000e-01 %222 = fadd float %111, -5.000000e-01 %223 = fadd float %113, -5.000000e-01 %224 = fdiv float 1.000000e+00, %48 %225 = call float @llvm.AMDIL.clamp.(float %224, float 0.000000e+00, float 1.000000e+00) %226 = fmul float %220, %225 %227 = fadd float %226, 5.000000e-01 %228 = fmul float %221, %225 %229 = fadd float %228, 5.000000e-01 %230 = fmul float %222, %225 %231 = fadd float %230, 5.000000e-01 %232 = fmul float %223, %225 %233 = fadd float %232, 5.000000e-01 %234 = call float @llvm.AMDGPU.lrp(float %219, float %227, float %111) %235 = call float @llvm.AMDGPU.lrp(float %219, float %229, float %113) %236 = call float @llvm.AMDGPU.lrp(float %218, float %231, float %111) %237 = call float @llvm.AMDGPU.lrp(float %218, float %233, float %113) %238 = fmul float %57, %234 %239 = fadd float %238, %55 %240 = fmul float %58, %235 %241 = fadd float %240, %56 %242 = fmul float %57, %236 %243 = fadd float %242, %55 %244 = fmul float %58, %237 %245 = fadd float %244, %56 %246 = fsub float %55, %239 %247 = fsub float %56, %241 %248 = fsub float %55, %243 %249 = fsub float %56, %245 %250 = fmul float %246, 2.000000e+00 %251 = fadd float %250, %57 %252 = fmul float %247, 2.000000e+00 %253 = fadd float %252, %58 %254 = fmul float %248, 2.000000e+00 %255 = fadd float %254, %57 %256 = fmul float %249, 2.000000e+00 %257 = fadd float %256, %58 %258 = fcmp olt float %251, 0.000000e+00 %.96 = select i1 %258, float %190, float %188 %259 = fcmp olt float %253, 0.000000e+00 %temp48.0 = select i1 %259, float %191, float %189 %260 = fsub float %.96, %239 %261 = fsub float %temp48.0, %241 %262 = fdiv float 1.000000e+00, %251 %263 = fdiv float 1.000000e+00, %253 %264 = fmul float %260, %262 %265 = fmul float %261, %263 %266 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00) %267 = call float @llvm.AMDIL.clamp.(float %265, float 0.000000e+00, float 1.000000e+00) %268 = fmul float %266, %251 %269 = fadd float %268, %239 %270 = fmul float %267, %253 %271 = fadd float %270, %241 %272 = fmul float %266, %255 %273 = fadd float %272, %243 %274 = fmul float %267, %257 %275 = fadd float %274, %245 %276 = fmul float %266, 2.000000e+00 %277 = fmul float %267, 2.000000e+00 %278 = fsub float 1.000000e+00, %276 %279 = fsub float 1.000000e+00, %277 %280 = fmul float %278, %74 %281 = fmul float %279, %75 %282 = call float @llvm.AMDGPU.lrp(float %269, float %204, float %202) %283 = call float @llvm.AMDGPU.lrp(float %271, float %205, float %203) %284 = call float @llvm.AMDGPU.lrp(float %273, float %216, float %214) %285 = call float @llvm.AMDGPU.lrp(float %275, float %217, float %215) %286 = fmul float %86, 0x3FB3D07220000000 %287 = fmul float %87, 0x3FB3D07220000000 %288 = fmul float %88, 0x3FB3D07220000000 %289 = fmul float %86, 0x3FEE54EDE0000000 %290 = fadd float %289, 0x3FAAB12320000000 %291 = fmul float %87, 0x3FEE54EDE0000000 %292 = fadd float %291, 0x3FAAB12320000000 %293 = fmul float %88, 0x3FEE54EDE0000000 %294 = fadd float %293, 0x3FAAB12320000000 %295 = call float @llvm.pow.f32(float %290, float 0x4003333340000000) %296 = call float @llvm.pow.f32(float %292, float 0x4003333340000000) %297 = call float @llvm.pow.f32(float %294, float 0x4003333340000000) %298 = fcmp ogt float %86, 0x3FA4B5DCC0000000 %.97 = select i1 %298, float %295, float %286 %299 = fcmp ogt float %87, 0x3FA4B5DCC0000000 %temp24.0 = select i1 %299, float %296, float %287 %300 = fcmp ogt float %88, 0x3FA4B5DCC0000000 %.98 = select i1 %300, float %297, float %288 %301 = call float @llvm.AMDGPU.lrp(float %59, float %.97, float %86) %302 = call float @llvm.AMDGPU.lrp(float %59, float %temp24.0, float %87) %303 = call float @llvm.AMDGPU.lrp(float %59, float %.98, float %88) %304 = call float @llvm.AMDGPU.lrp(float %59, float %89, float %89) %305 = call float @fabs(float %109) %306 = fcmp ogt float %305, 0x3FECCCCCC0000000 %temp16.2 = select i1 %306, float 1.000000e+00, float 0.000000e+00 %temp18.0 = select i1 %306, float 0.000000e+00, float 1.000000e+00 %307 = fmul float %109, 0.000000e+00 %308 = fmul float %107, %temp18.0 %309 = fmul float %108, %temp16.2 %310 = fmul float %108, %temp18.0 %311 = fsub float %310, %307 %312 = fmul float %109, %temp16.2 %313 = fsub float %312, %308 %314 = fmul float %107, 0.000000e+00 %315 = fsub float %314, %309 %316 = fmul float %311, %311 %317 = fmul float %313, %313 %318 = fadd float %317, %316 %319 = fmul float %315, %315 %320 = fadd float %318, %319 %321 = call float @llvm.AMDGPU.rsq.clamped.f32(float %320) %322 = fmul float %311, %321 %323 = fmul float %313, %321 %324 = fmul float %315, %321 %325 = fmul float %109, %323 %326 = fmul float %107, %324 %327 = fmul float %108, %322 %328 = fmul float %108, %324 %329 = fsub float %328, %325 %330 = fmul float %109, %322 %331 = fsub float %330, %326 %332 = fmul float %107, %323 %333 = fsub float %332, %327 %334 = fmul float %107, %107 %335 = fmul float %108, %108 %336 = fadd float %335, %334 %337 = fmul float %109, %109 %338 = fadd float %336, %337 %339 = call float @llvm.AMDGPU.rsq.clamped.f32(float %338) %340 = fmul float %107, %339 %341 = fmul float %108, %339 %342 = fmul float %109, %339 %343 = fmul float %340, %22 %344 = fmul float %341, %23 %345 = fadd float %344, %343 %346 = fmul float %342, %24 %347 = fadd float %345, %346 %348 = call float @fabs(float %347) %349 = fsub float %348, %52 %350 = fsub float %53, %52 %351 = fdiv float 1.000000e+00, %350 %352 = fmul float %349, %351 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fmul float %353, 2.000000e+00 %355 = fsub float 3.000000e+00, %354 %356 = fmul float %353, %355 %357 = fmul float %353, %356 %358 = fsub float 1.000000e+00, %357 %359 = fmul float %358, %304 %360 = call float @llvm.cos.f32(float %101) %361 = call float @llvm.sin.f32(float %101) %362 = fmul float %360, %329 %363 = fmul float %360, %331 %364 = fmul float %360, %333 %365 = fmul float %361, %322 %366 = fadd float %365, %362 %367 = fmul float %361, %323 %368 = fadd float %367, %363 %369 = fmul float %361, %324 %370 = fadd float %369, %364 %371 = fmul float %361, %329 %372 = fmul float %361, %331 %373 = fmul float %361, %333 %374 = fmul float %360, %322 %375 = fsub float %374, %371 %376 = fmul float %360, %323 %377 = fsub float %376, %372 %378 = fmul float %360, %324 %379 = fsub float %378, %373 %380 = fmul float %375, %375 %381 = fmul float %377, %377 %382 = fmul float %379, %379 %383 = fsub float 1.000000e+00, %380 %384 = fadd float %383, %380 %385 = fsub float 1.000000e+00, %381 %386 = fadd float %385, %381 %387 = fsub float 1.000000e+00, %382 %388 = fadd float %387, %382 %389 = fmul float %366, %384 %390 = fmul float %368, 0.000000e+00 %391 = fadd float %390, %389 %392 = fmul float %370, 0.000000e+00 %393 = fadd float %391, %392 %394 = fmul float %366, 0.000000e+00 %395 = fmul float %368, %386 %396 = fadd float %395, %394 %397 = fmul float %370, 0.000000e+00 %398 = fadd float %396, %397 %399 = fmul float %366, 0.000000e+00 %400 = fmul float %368, 0.000000e+00 %401 = fadd float %400, %399 %402 = fmul float %370, %388 %403 = fadd float %401, %402 %404 = fmul float %107, %384 %405 = fmul float %108, 0.000000e+00 %406 = fadd float %405, %404 %407 = fmul float %109, 0.000000e+00 %408 = fadd float %406, %407 %409 = fmul float %107, 0.000000e+00 %410 = fmul float %108, %386 %411 = fadd float %410, %409 %412 = fmul float %109, 0.000000e+00 %413 = fadd float %411, %412 %414 = fmul float %107, 0.000000e+00 %415 = fmul float %108, 0.000000e+00 %416 = fadd float %415, %414 %417 = fmul float %109, %388 %418 = fadd float %416, %417 %419 = fadd float %51, %281 %420 = fadd float %50, %280 %421 = fmul float %393, %420 %422 = fmul float %398, %420 %423 = fmul float %403, %420 %424 = fmul float %419, %375 %425 = fadd float %424, %421 %426 = fmul float %419, %377 %427 = fadd float %426, %422 %428 = fmul float %419, %379 %429 = fadd float %428, %423 %430 = fsub float %19, %94 %431 = fsub float %20, %95 %432 = fsub float %21, %96 %433 = fmul float %430, %430 %434 = fmul float %431, %431 %435 = fadd float %434, %433 %436 = fmul float %432, %432 %437 = fadd float %435, %436 %438 = call float @llvm.sqrt.f32(float %437) %439 = fcmp ogt float %54, 0.000000e+00 br i1 %439, label %IF82, label %ENDIF81 IF82: ; preds = %main_body %440 = call float @llvm.SI.load.const(<16 x i8> %44, i32 12) %441 = call float @llvm.SI.load.const(<16 x i8> %44, i32 0) %442 = fmul float %438, %45 %443 = fcmp olt float %442, %102 %444 = fsub float %102, %442 %445 = fsub float %46, %45 %446 = fmul float %445, %438 %447 = fdiv float 1.000000e+00, %446 %448 = fmul float %444, %447 %449 = fsub float 1.000000e+00, %448 %450 = fmul float %449, %301 %451 = fmul float %449, %302 %452 = fmul float %449, %303 %453 = fmul float %449, %359 %454 = fmul float %438, %46 %455 = fcmp olt float %454, %102 %.99 = select i1 %455, float 0.000000e+00, float %450 %.100 = select i1 %455, float 0.000000e+00, float %451 %.101 = select i1 %455, float 0.000000e+00, float %452 %.102 = select i1 %455, float 0.000000e+00, float %453 %.99. = select i1 %443, float %.99, float %301 %.100. = select i1 %443, float %.100, float %302 %.101. = select i1 %443, float %.101, float %303 %.102. = select i1 %443, float %.102, float %359 %456 = fmul float %438, %441 %457 = call float @llvm.maxnum.f32(float %102, float %456) %458 = fmul float %438, %440 %459 = call float @llvm.minnum.f32(float %457, float %458) br label %ENDIF81 ENDIF81: ; preds = %main_body, %IF82 %temp16.3 = phi float [ %.99., %IF82 ], [ %301, %main_body ] %temp17.1 = phi float [ %.100., %IF82 ], [ %302, %main_body ] %temp18.1 = phi float [ %.101., %IF82 ], [ %303, %main_body ] %temp19.0 = phi float [ %.102., %IF82 ], [ %359, %main_body ] %temp20.1 = phi float [ %459, %IF82 ], [ %102, %main_body ] %460 = fcmp olt float %temp19.0, 0x3F70101060000000 %.temp20.1 = select i1 %460, float 0.000000e+00, float %temp20.1 %461 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> , <32 x i8> %66, <16 x i8> %69, i32 2) %462 = extractelement <4 x float> %461, i32 0 %463 = extractelement <4 x float> %461, i32 1 %464 = extractelement <4 x float> %461, i32 2 %465 = fmul float %462, 0x3E7AD7F2A0000000 %466 = fmul float %463, 0x3E7AD7F2A0000000 %467 = fmul float %464, 0x3E7AD7F2A0000000 %468 = fmul float %.temp20.1, %425 %469 = fadd float %468, %94 %470 = fmul float %.temp20.1, %427 %471 = fadd float %470, %95 %472 = fmul float %.temp20.1, %429 %473 = fadd float %472, %96 %474 = fadd float %465, %469 %475 = fadd float %466, %471 %476 = fadd float %467, %473 %477 = fmul float %474, %27 %478 = fmul float %475, %31 %479 = fadd float %477, %478 %480 = fmul float %476, %35 %481 = fadd float %479, %480 %482 = fadd float %481, %39 %483 = fmul float %474, %28 %484 = fmul float %475, %32 %485 = fadd float %483, %484 %486 = fmul float %476, %36 %487 = fadd float %485, %486 %488 = fadd float %487, %40 %489 = fmul float %474, %30 %490 = fmul float %475, %34 %491 = fadd float %489, %490 %492 = fmul float %476, %38 %493 = fadd float %491, %492 %494 = fadd float %493, %42 %495 = fmul float %22, %49 %496 = fadd float %495, %474 %497 = fmul float %23, %49 %498 = fadd float %497, %475 %499 = fmul float %24, %49 %500 = fadd float %499, %476 %501 = fmul float %496, %29 %502 = fmul float %498, %33 %503 = fadd float %501, %502 %504 = fmul float %500, %37 %505 = fadd float %503, %504 %506 = fadd float %505, %41 %507 = fmul float %496, %30 %508 = fmul float %498, %34 %509 = fadd float %507, %508 %510 = fmul float %500, %38 %511 = fadd float %509, %510 %512 = fadd float %511, %42 %513 = fdiv float 1.000000e+00, %512 %514 = fmul float %506, %513 %515 = fmul float %514, %494 %516 = fmul float %474, %29 %517 = fmul float %475, %33 %518 = fadd float %516, %517 %519 = fmul float %476, %37 %520 = fadd float %518, %519 %521 = fadd float %520, %41 %522 = call float @llvm.minnum.f32(float %521, float 0x3F50624DE0000000) %523 = call float @llvm.maxnum.f32(float %515, float %522) %524 = fsub float %474, %19 %525 = fsub float %475, %20 %526 = fsub float %476, %21 %527 = fmul float %22, %524 %528 = fmul float %23, %525 %529 = fadd float %528, %527 %530 = fmul float %24, %526 %531 = fadd float %529, %530 %532 = fmul float %524, %22 %533 = fmul float %525, %23 %534 = fadd float %533, %532 %535 = fmul float %526, %24 %536 = fadd float %534, %535 %537 = fsub float %536, %25 %538 = fsub float %26, %25 %539 = fdiv float 1.000000e+00, %538 %540 = fmul float %537, %539 %541 = fsub float %474, %19 %542 = fsub float %475, %20 %543 = fsub float %476, %21 %544 = fmul float %408, %541 %545 = fmul float %413, %542 %546 = fadd float %545, %544 %547 = fmul float %418, %543 %548 = fadd float %546, %547 %549 = fcmp olt float %548, 0.000000e+00 br i1 %549, label %ENDIF93, label %ELSE95 ELSE95: ; preds = %ENDIF81 %550 = fsub float -0.000000e+00, %408 %551 = fsub float -0.000000e+00, %413 %552 = fsub float -0.000000e+00, %418 br label %ENDIF93 ENDIF93: ; preds = %ENDIF81, %ELSE95 %temp.0 = phi float [ %550, %ELSE95 ], [ %408, %ENDIF81 ] %temp1.0 = phi float [ %551, %ELSE95 ], [ %413, %ENDIF81 ] %temp2.0 = phi float [ %552, %ELSE95 ], [ %418, %ENDIF81 ] %553 = fsub float -0.000000e+00, %488 %554 = fmul float %523, 2.000000e+00 %555 = fsub float %554, %494 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %282, float %283, float %284, float %285) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp16.3, float %temp17.1, float %temp18.1, float %temp19.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %482, float %488, float %540, float %494) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %474, float %475, float %476, float %531) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp.0, float %temp1.0, float %temp2.0, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %151, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %482, float %553, float %555, float %494) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @floor(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.pow.f32(float, float) #1 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.cos.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[12:15], s[2:3], 0x4 ; C0860304 v_add_i32_e32 v0, s10, v0 ; 4A00000A v_add_i32_e32 v11, s11, v3 ; 4A16060B s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C0880310 v_mov_b32_e32 v18, 0x3d558919 ; 7E2402FF 3D558919 v_mov_b32_e32 v13, 0x3f72a76f ; 7E1A02FF 3F72A76F v_mov_b32_e32 v14, 0x3d9e8391 ; 7E1C02FF 3D9E8391 v_mov_b32_e32 v15, 0x3d25aee6 ; 7E1E02FF 3D25AEE6 s_load_dwordx4 s[20:23], s[2:3], 0x14 ; C08A0314 s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s11, s[12:15], 0x0 ; C2058D00 s_buffer_load_dword s40, s[12:15], 0x1 ; C2140D01 s_buffer_load_dword s10, s[12:15], 0x2 ; C2050D02 s_buffer_load_dword s41, s[12:15], 0x3 ; C2148D03 s_load_dwordx4 s[24:27], s[8:9], 0x4 ; C08C0904 s_load_dwordx4 s[28:31], s[8:9], 0x8 ; C08E0908 s_load_dwordx4 s[32:35], s[8:9], 0xc ; C090090C s_load_dwordx4 s[36:39], s[8:9], 0x10 ; C0920910 s_buffer_load_dword s42, s[20:23], 0x8 ; C2151508 buffer_load_format_xyzw v[7:10], v0, s[0:3], 0 idxen ; E00C2000 80000700 s_buffer_load_dword s0, s[20:23], 0xa ; C200150A s_buffer_load_dword s12, s[20:23], 0xc ; C206150C s_buffer_load_dword s14, s[20:23], 0xd ; C207150D s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[19:22], v11, s[24:27], 0 idxen ; E00C2000 8006130B s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[22:25], v11, s[28:31], 0 idxen ; E00C2000 8007160B buffer_load_format_xyzw v[0:3], v11, s[32:35], 0 idxen ; E00C2000 8008000B v_mul_f32_e64 v16, 0.5, s40 ; D2100010 000050F0 s_buffer_load_dword s26, s[20:23], 0x1a ; C20D151A s_buffer_load_dword s24, s[20:23], 0x1b ; C20C151B s_buffer_load_dword s15, s[20:23], 0x1c ; C207951C s_buffer_load_dword s27, s[20:23], 0x18 ; C20D9518 s_buffer_load_dword s25, s[20:23], 0x19 ; C20C9519 s_load_dwordx4 s[44:47], s[8:9], 0x14 ; C0960914 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 v_rcp_f32_e32 v9, s0 ; 7E125400 v_add_f32_e64 v17, -2.0, s40 ; D2060011 000050F5 s_waitcnt vmcnt(0) ; BF8C0770 buffer_load_format_xyzw v[3:6], v11, s[36:39], 0 idxen ; E00C2000 8009030B s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 v_add_f32_e64 v20, 0, v9 clamp ; D2060814 00021280 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_sub_f32_e64 v5, 1.0, s15 ; D2080005 00001EF2 v_mov_b32_e32 v26, s27 ; 7E34021B v_rcp_f32_e32 v27, s40 ; 7E365428 v_mov_b32_e32 v30, s25 ; 7E3C0219 v_mov_b32_e32 v36, s11 ; 7E48020B buffer_load_format_xyzw v[9:12], v11, s[44:47], 0 idxen ; E00C2000 800B090B v_mad_f32 v31, 0.5, v7, 0.5 ; D282001F 03C20EF0 v_add_f32_e32 v4, -0.5, v31 ; 06083EF1 v_mad_f32 v32, v4, v20, 0.5 ; D2820020 03C22904 v_cmp_gt_f32_e64 s[8:9], s41, v19 ; D0080008 00022629 v_cmp_gt_f32_e64 s[2:3], v22, v15 ; D0080002 00021F16 v_cmp_gt_f32_e64 s[0:1], v23, v15 ; D0080000 00021F17 v_cmp_gt_f32_e32 vcc, v24, v15 ; 7C081F18 v_mul_f32_e32 v15, s42, v21 ; 101E2A2A v_mul_f32_e32 v4, 0x40400000, v19 ; 100826FF 40400000 v_cndmask_b32_e64 v4, 0, v4, s[8:9] ; D2000004 00220880 v_mac_f32_e32 v36, s10, v4 ; 3E48080A v_mov_b32_e32 v35, 0 ; 7E460280 v_mov_b32_e32 v37, v35 ; 7E4A0323 image_sample_l v4, 4, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[32:39], s[28:31] ; F0900400 00E80423 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_lt_f32_e64 s[8:9], 0, v4 ; D0020008 00020880 v_mul_f32_e32 v4, v15, v16 ; 1008210F v_floor_f32_e32 v19, v4 ; 7E264904 v_add_f32_e32 v4, v19, v19 ; 06082713 v_min_f32_e32 v12, v4, v17 ; 1E182304 v_mul_f32_e32 v17, v27, v4 ; 1022091B v_floor_f32_e32 v17, v17 ; 7E224911 v_mad_f32 v4, -s40, v17, v4 ; D2820004 24122228 v_cndmask_b32_e64 v4, v4, v12, s[8:9] ; D2000004 00221904 v_mul_f32_e32 v33, v4, v27 ; 10423704 v_mad_f32 v37, v13, v22, v18 ; D2820025 044A2D0D v_mad_f32 v38, v13, v23, v18 ; D2820026 044A2F0D v_mac_f32_e32 v18, v13, v24 ; 3E24310D v_mov_b32_e32 v34, v36 ; 7E440324 v_add_f32_e32 v28, s10, v36 ; 0638480A image_sample_l v[39:40], 9, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[32:39], s[28:31] ; F0900900 00E82721 v_mov_b32_e32 v34, v28 ; 7E44031C v_mac_f32_e32 v27, v4, v27 ; 3E363704 image_sample_l v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[32:39], s[28:31] ; F0900F00 00E82921 v_mov_b32_e32 v34, v36 ; 7E440324 v_mac_f32_e64 v34, 2.0, s10 ; D23E0022 000014F4 v_mul_f32_e32 v36, v14, v22 ; 10482D0E v_mul_f32_e32 v45, v14, v23 ; 105A2F0E v_mul_f32_e32 v46, v14, v24 ; 105C310E v_mul_f32_e32 v4, v22, v5 ; 10080B16 v_mul_f32_e32 v12, v23, v5 ; 10180B17 v_mul_f32_e32 v13, v24, v5 ; 101A0B18 v_mul_f32_e32 v23, v25, v5 ; 102E0B19 v_mac_f32_e32 v23, s15, v25 ; 3E2E320F v_mov_b32_e32 v29, v35 ; 7E3A0323 image_sample_l v[47:50], 15, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[32:39], s[28:31] ; F0900F00 00E82F21 image_sample_l v[51:54], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[32:39], s[28:31] ; F0900F00 00E8331B s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v5, v51, v53 ; 0A0A6B33 v_rcp_f32_e32 v17, v5 ; 7E225505 v_subrev_f32_e32 v5, v52, v54 ; 0A0A6D34 v_rcp_f32_e32 v24, v5 ; 7E305505 v_subrev_f32_e32 v14, v41, v43 ; 0A1C5729 v_mul_f32_e32 v5, v17, v14 ; 100A1D11 v_mad_f32 v22, -v51, v5, v41 ; D2820016 24A60B33 v_subrev_f32_e32 v25, v42, v44 ; 0A32592A v_mul_f32_e32 v5, v24, v25 ; 100A3318 v_mad_f32 v27, -v52, v5, v42 ; D282001B 24AA0B34 v_subrev_f32_e32 v28, v47, v49 ; 0A38632F v_mul_f32_e32 v5, v17, v28 ; 100A3911 v_mad_f32 v29, -v51, v5, v47 ; D282001D 24BE0B33 v_subrev_f32_e32 v33, v48, v50 ; 0A426530 v_mul_f32_e32 v5, v24, v33 ; 100A4318 v_mad_f32 v34, -v52, v5, v48 ; D2820022 24C20B34 v_mad_f32 v15, v16, v15, -v19 ; D282000F 844E1F10 v_sub_f32_e32 v5, 1.0, v15 ; 080A1EF2 v_mul_f32_e32 v5, v39, v5 ; 100A0B27 v_mac_f32_e32 v5, v40, v15 ; 3E0A1F28 v_mad_f32 v15, -0.5, v8, 0.5 ; D282000F 03C210F1 v_add_f32_e32 v16, -0.5, v15 ; 06201EF1 v_mad_f32 v16, v16, v20, 0.5 ; D2820010 03C22910 v_mad_f32 v19, 0.5, v5, 0.5 ; D2820013 03C20AF0 v_sub_f32_e32 v20, 1.0, v19 ; 082826F2 v_mul_f32_e32 v21, v31, v20 ; 102A291F v_mul_f32_e32 v20, v15, v20 ; 1028290F v_mac_f32_e32 v21, v32, v19 ; 3E2A2720 v_mac_f32_e32 v20, v16, v19 ; 3E282710 v_mad_f32 v19, s26, v21, v26 ; D2820013 046A2A1A v_mad_f32 v20, s24, v20, v30 ; D2820014 047A2818 v_sub_f32_e32 v21, s27, v19 ; 082A261B v_mad_f32 v26, 2.0, v21, s26 ; D282001A 006A2AF4 v_sub_f32_e32 v21, s25, v20 ; 082A2819 v_mad_f32 v30, 2.0, v21, s24 ; D282001E 00622AF4 v_cmp_gt_f32_e64 s[8:9], 0, v26 ; D0080008 00023480 v_cmp_gt_f32_e64 s[10:11], 0, v30 ; D008000A 00023C80 v_cndmask_b32_e64 v21, v51, v53, s[8:9] ; D2000015 00226B33 v_cndmask_b32_e64 v35, v52, v54, s[10:11] ; D2000023 002A6D34 v_mad_f32 v39, 0.5, -v5, 1.0 ; D2820027 43CA0AF0 v_mul_f32_e32 v31, v31, v39 ; 103E4F1F v_mul_f32_e32 v39, v15, v39 ; 104E4F0F v_mul_f32_e32 v15, 0.5, v5 ; 101E0AF0 v_rcp_f32_e32 v40, v26 ; 7E50551A v_mac_f32_e32 v31, v32, v15 ; 3E3E1F20 v_mac_f32_e32 v39, v16, v15 ; 3E4E1F10 v_subrev_f32_e32 v15, v19, v21 ; 0A1E2B13 v_mul_f32_e32 v15, v40, v15 ; 101E1F28 v_add_f32_e64 v21, 0, v15 clamp ; D2060815 00021E80 v_mac_f32_e32 v19, v26, v21 ; 3E262B1A v_mad_f32 v15, v17, v14, v22 ; D282000F 045A1D11 v_rcp_f32_e32 v16, v30 ; 7E20551E v_sub_f32_e32 v14, 1.0, v19 ; 081C26F2 v_mul_f32_e32 v14, v22, v14 ; 101C1D16 v_mac_f32_e32 v14, v15, v19 ; 3E1C270F v_subrev_f32_e32 v15, v20, v35 ; 0A1E4714 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_add_f32_e64 v22, 0, v15 clamp ; D2060816 00021E80 v_mac_f32_e32 v20, v30, v22 ; 3E282D1E v_mad_f32 v16, v24, v25, v27 ; D2820010 046E3318 v_sub_f32_e32 v15, 1.0, v20 ; 081E28F2 v_mul_f32_e32 v15, v27, v15 ; 101E1F1B v_mac_f32_e32 v15, v16, v20 ; 3E1E2910 v_mov_b32_e32 v16, s27 ; 7E20021B v_mad_f32 v19, s26, v31, v16 ; D2820013 04423E1A v_mad_f32 v17, v17, v28, v29 ; D2820011 04763911 v_sub_f32_e32 v16, s27, v19 ; 0820261B v_mad_f32 v16, 2.0, v16, s26 ; D2820010 006A20F4 v_mac_f32_e32 v19, v16, v21 ; 3E262B10 v_sub_f32_e32 v16, 1.0, v19 ; 082026F2 v_mul_f32_e32 v16, v29, v16 ; 1020211D v_mac_f32_e32 v16, v17, v19 ; 3E202711 v_mov_b32_e32 v17, s25 ; 7E220219 v_mad_f32 v19, s24, v39, v17 ; D2820013 04464E18 v_mad_f32 v20, v24, v33, v34 ; D2820014 048A4318 v_sub_f32_e32 v17, s25, v19 ; 08222619 v_mad_f32 v17, 2.0, v17, s24 ; D2820011 006222F4 v_mac_f32_e32 v19, v17, v22 ; 3E262D11 v_sub_f32_e32 v17, 1.0, v19 ; 082226F2 v_mul_f32_e32 v17, v34, v17 ; 10222322 v_mac_f32_e32 v17, v20, v19 ; 3E222714 v_mov_b32_e32 v19, 0x3f666666 ; 7E2602FF 3F666666 v_cmp_gt_f32_e64 s[10:11], |v11|, v19 ; D008010A 0002270B v_log_f32_e32 v19, v37 ; 7E264F25 v_log_f32_e32 v20, v38 ; 7E284F26 v_log_f32_e32 v18, v18 ; 7E244F12 v_mov_b32_e32 v24, 0x4019999a ; 7E3002FF 4019999A v_mul_legacy_f32_e32 v19, v24, v19 ; 0E262718 v_mul_legacy_f32_e32 v20, v24, v20 ; 0E282918 v_mul_legacy_f32_e32 v18, v24, v18 ; 0E242518 v_exp_f32_e32 v19, v19 ; 7E264B13 v_cndmask_b32_e64 v19, v36, v19, s[2:3] ; D2000013 000A2724 v_exp_f32_e32 v20, v20 ; 7E284B14 v_cndmask_b32_e64 v20, v45, v20, s[0:1] ; D2000014 0002292D v_exp_f32_e32 v18, v18 ; 7E244B12 v_cndmask_b32_e32 v18, v46, v18 ; 0024252E v_mac_f32_e32 v4, s15, v19 ; 3E08260F v_mul_f32_e32 v19, v9, v9 ; 10261309 v_mac_f32_e32 v19, v10, v10 ; 3E26150A s_buffer_load_dword s2, s[16:19], 0x50 ; C2011150 v_mac_f32_e32 v19, v11, v11 ; 3E26170B s_buffer_load_dword s3, s[16:19], 0x51 ; C2019151 s_buffer_load_dword s0, s[20:23], 0x10 ; C2001510 s_buffer_load_dword s1, s[20:23], 0x11 ; C2009511 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mac_f32_e32 v12, s15, v20 ; 3E18280F v_mac_f32_e32 v13, s15, v18 ; 3E1A240F s_buffer_load_dword s8, s[16:19], 0x52 ; C2041152 v_mul_f32_e32 v18, v19, v9 ; 10241313 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v18, s2, v18 ; 10242402 v_mul_f32_e32 v20, v19, v10 ; 10281513 v_mac_f32_e32 v18, s3, v20 ; 3E242803 s_buffer_load_dword s24, s[20:23], 0x12 ; C20C1512 v_mov_b32_e32 v20, s0 ; 7E280200 v_sub_f32_e32 v20, s1, v20 ; 08282801 v_mul_f32_e32 v19, v19, v11 ; 10261713 v_rcp_f32_e32 v20, v20 ; 7E285514 s_buffer_load_dword s15, s[16:19], 0x5d ; C207915D v_mac_f32_e32 v18, s8, v19 ; 3E242608 v_sub_f32_e64 v18, |v18|, s0 ; D2080112 00000112 v_mul_f32_e32 v18, v20, v18 ; 10242514 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_madak_f32_e32 v19, 2.0, v18, 0xc0400000 ; 422624F4 C0400000 v_mul_f32_e32 v19, v19, v18 ; 10262513 v_mad_f32 v18, v18, v19, 1.0 ; D2820012 03CA2712 v_mul_f32_e32 v18, v23, v18 ; 10242517 v_mul_f32_e32 v3, 0x3e22f983, v3 ; 100606FF 3E22F983 v_fract_f32_e32 v19, v3 ; 7E264103 v_cndmask_b32_e64 v20, 0, 1.0, s[10:11] ; D2000014 0029E480 v_cndmask_b32_e64 v23, 1.0, 0, s[10:11] ; D2000017 002900F2 v_mul_f32_e32 v3, v23, v9 ; 10061317 v_mad_f32 v3, v11, v20, -v3 ; D2820003 840E290B v_mul_f32_e32 v25, v20, v10 ; 10321514 s_buffer_load_dword s10, s[16:19], 0x4c ; C205114C s_buffer_load_dword s11, s[16:19], 0x4d ; C205914D s_buffer_load_dword s9, s[16:19], 0x4e ; C204914E v_mul_f32_e32 v23, v23, v10 ; 102E1517 v_mul_f32_e32 v20, v23, v23 ; 10282F17 v_mac_f32_e32 v20, v3, v3 ; 3E280703 v_mac_f32_e32 v20, v25, v25 ; 3E283319 v_rsq_clamp_f32_e32 v24, v20 ; 7E305914 v_cos_f32_e32 v20, v19 ; 7E286D13 v_sin_f32_e32 v19, v19 ; 7E266B13 v_xor_b32_e32 v25, 0x80000000, v25 ; 3A3232FF 80000000 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_lt_f32_e64 s[0:1], 0, s24 ; D0020000 00003080 s_and_saveexec_b64 s[24:25], s[0:1] ; BE982400 s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_2 ; BF880000 v_sub_f32_e32 v26, s10, v0 ; 0834000A v_sub_f32_e32 v27, s11, v1 ; 0836020B v_sub_f32_e32 v28, s9, v2 ; 08380409 v_mul_f32_e32 v26, v26, v26 ; 1034351A s_buffer_load_dword s0, s[20:23], 0x1 ; C2001501 s_buffer_load_dword s1, s[20:23], 0x2 ; C2009502 s_buffer_load_dword s26, s[20:23], 0x0 ; C20D1500 s_buffer_load_dword s27, s[20:23], 0x3 ; C20D9503 v_mac_f32_e32 v26, v27, v27 ; 3E34371B v_mac_f32_e32 v26, v28, v28 ; 3E34391C v_rsq_f32_e32 v27, v26 ; 7E365D1A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v28, s0 ; 7E380200 v_sub_f32_e32 v28, s1, v28 ; 08383801 v_rcp_f32_e32 v28, v28 ; 7E38551C v_sqrt_f32_e32 v26, v26 ; 7E34671A v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mul_f32_e32 v28, s0, v26 ; 10383400 v_mad_f32 v29, v26, s0, -v6 ; D282001D 8418011A v_mad_f32 v27, v29, v27, 1.0 ; D282001B 03CA371D v_cmp_lt_f32_e32 vcc, v28, v6 ; 7C020D1C v_mul_f32_e32 v28, s1, v26 ; 10383401 v_mul_f32_e32 v29, s26, v26 ; 103A341A v_max_f32_e32 v29, v29, v6 ; 203A0D1D v_cmp_lt_f32_e64 s[0:1], v28, v6 ; D0020000 00020D1C v_mul_f32_e32 v6, v4, v27 ; 100C3704 v_mul_f32_e32 v28, v12, v27 ; 1038370C v_mul_f32_e32 v30, v13, v27 ; 103C370D v_mul_f32_e32 v27, v18, v27 ; 10363712 v_cndmask_b32_e64 v6, v6, 0, s[0:1] ; D2000006 00010106 v_cndmask_b32_e64 v28, v28, 0, s[0:1] ; D200001C 0001011C v_cndmask_b32_e64 v30, v30, 0, s[0:1] ; D200001E 0001011E v_cndmask_b32_e64 v27, v27, 0, s[0:1] ; D200001B 0001011B v_cndmask_b32_e32 v4, v4, v6 ; 00080D04 v_cndmask_b32_e32 v12, v12, v28 ; 0018390C v_cndmask_b32_e32 v13, v13, v30 ; 001A3D0D v_cndmask_b32_e32 v18, v18, v27 ; 00243712 v_mul_f32_e32 v6, s27, v26 ; 100C341B v_min_f32_e32 v6, v6, v29 ; 1E0C3B06 s_or_b64 exec, exec, s[24:25] ; 88FE187E v_mad_f32 v21, -2.0, v21, 1.0 ; D2820015 03CA2AF5 v_mad_f32 v21, v21, v7, s14 ; D2820015 003A0F15 v_mad_f32 v7, -2.0, v22, 1.0 ; D2820007 03CA2CF5 v_mad_f32 v22, v7, v8, s13 ; D2820016 00361107 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v26, v24, v3 ; 10340718 v_mul_f32_e32 v24, v24, v25 ; 10303318 s_buffer_load_dword s0, s[16:19], 0x5b ; C200115B s_buffer_load_dword s20, s[16:19], 0x80 ; C20A1180 s_buffer_load_dword s13, s[16:19], 0x81 ; C2069181 v_mul_f32_e32 v3, v26, v11 ; 1006171A v_mad_f32 v3, v10, v24, -v3 ; D2820003 840E310A v_mul_f32_e32 v7, v24, v9 ; 100E1318 v_mad_f32 v7, v11, v23, -v7 ; D2820007 841E2F0B v_mul_f32_e32 v8, v23, v10 ; 10101517 v_mad_f32 v25, v9, v26, -v8 ; D2820019 84223509 v_mul_f32_e32 v27, v3, v20 ; 10362903 v_mul_f32_e32 v28, v7, v20 ; 10382907 v_mul_f32_e32 v8, v23, v20 ; 10102917 v_mad_f32 v8, v19, v3, -v8 ; D2820008 84220713 v_mul_f32_e32 v3, v3, v19 ; 10062703 v_mul_f32_e32 v29, v26, v20 ; 103A291A v_mad_f32 v29, v19, v7, -v29 ; D282001D 84760F13 v_mul_f32_e32 v7, v7, v19 ; 100E2707 v_mad_f32 v30, v20, v23, -v3 ; D282001E 840E2F14 v_mad_f32 v31, v20, v26, -v7 ; D282001F 841E3514 v_mul_f32_e32 v3, v25, v19 ; 10062719 v_mad_f32 v32, v20, v24, -v3 ; D2820020 840E3114 v_mad_f32 v33, v8, v30, 1.0 ; D2820021 03CA3D08 v_mac_f32_e32 v33, v30, v30 ; 3E423D1E v_mad_f32 v29, v29, v31, 1.0 ; D282001D 03CA3F1D v_mac_f32_e32 v29, v31, v31 ; 3E3A3F1F v_mul_f32_e32 v3, v24, v20 ; 10062918 v_mad_f32 v3, v19, v25, -v3 ; D2820003 840E3313 v_mad_f32 v34, v3, v32, 1.0 ; D2820022 03CA4103 s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 v_mac_f32_e32 v34, v32, v32 ; 3E444120 v_mul_f32_e32 v3, v33, v9 ; 10061321 v_mul_f32_e32 v7, v29, v10 ; 100E151D v_mul_f32_e32 v8, v34, v11 ; 10101722 v_mov_b32_e32 v9, 0x3b808083 ; 7E1202FF 3B808083 v_cmp_gt_f32_e32 vcc, v9, v18 ; 7C082509 v_mov_b32_e32 v35, 0 ; 7E460280 v_cndmask_b32_e64 v6, v6, 0, vcc ; D2000006 01A90106 v_mov_b32_e32 v36, v35 ; 7E480323 v_mul_f32_e32 v9, v25, v20 ; 10122919 v_mov_b32_e32 v37, v35 ; 7E4A0323 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[24:27] ; F0900700 00C72323 s_buffer_load_dword s1, s[16:19], 0x82 ; C2009182 s_buffer_load_dword s7, s[16:19], 0x83 ; C2039183 s_buffer_load_dword s27, s[16:19], 0x84 ; C20D9184 s_buffer_load_dword s24, s[16:19], 0x85 ; C20C1185 s_buffer_load_dword s4, s[16:19], 0x86 ; C2021186 s_buffer_load_dword s14, s[16:19], 0x87 ; C2071187 s_buffer_load_dword s25, s[16:19], 0x88 ; C20C9188 s_buffer_load_dword s22, s[16:19], 0x89 ; C20B1189 s_buffer_load_dword s5, s[16:19], 0x8a ; C202918A s_buffer_load_dword s21, s[16:19], 0x8b ; C20A918B s_buffer_load_dword s26, s[16:19], 0x8c ; C20D118C s_buffer_load_dword s23, s[16:19], 0x8d ; C20B918D s_buffer_load_dword s6, s[16:19], 0x8e ; C203118E s_buffer_load_dword s16, s[16:19], 0x8f ; C208118F v_mac_f32_e32 v27, v23, v19 ; 3E362717 v_mac_f32_e32 v28, v26, v19 ; 3E38271A v_mac_f32_e32 v9, v24, v19 ; 3E122718 v_mul_f32_e32 v10, v33, v27 ; 10143721 v_mul_f32_e32 v11, v29, v28 ; 1016391D v_mul_f32_e32 v9, v34, v9 ; 10121322 v_mul_f32_e32 v10, v21, v10 ; 10141515 v_mul_f32_e32 v11, v21, v11 ; 10161715 v_mul_f32_e32 v9, v21, v9 ; 10121315 v_mac_f32_e32 v10, v30, v22 ; 3E142D1E v_mac_f32_e32 v11, v31, v22 ; 3E162D1F v_mac_f32_e32 v9, v32, v22 ; 3E122D20 v_mad_f32 v0, v10, v6, v0 ; D2820000 04020D0A v_mad_f32 v1, v11, v6, v1 ; D2820001 04060D0B v_mac_f32_e32 v2, v9, v6 ; 3E040D09 v_mov_b32_e32 v6, 0x33d6bf95 ; 7E0C02FF 33D6BF95 s_waitcnt vmcnt(0) ; BF8C0770 v_mac_f32_e32 v0, v6, v35 ; 3E004706 v_mac_f32_e32 v1, v6, v36 ; 3E024906 v_mac_f32_e32 v2, v6, v37 ; 3E044B06 v_subrev_f32_e32 v6, s10, v0 ; 0A0C000A v_mul_f32_e32 v10, v6, v3 ; 10140706 v_subrev_f32_e32 v6, s11, v1 ; 0A0C020B v_mac_f32_e32 v10, v6, v7 ; 3E140F06 v_subrev_f32_e32 v6, s9, v2 ; 0A0C0409 v_mac_f32_e32 v10, v6, v8 ; 3E141106 v_mov_b32_e32 v9, s15 ; 7E12020F v_mov_b32_e32 v6, s12 ; 7E0C020C v_cmp_ngt_f32_e32 vcc, 0, v10 ; 7C161480 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[18:19], vcc ; BE92246A s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E v_mov_b32_e32 v10, 0x80000000 ; 7E1402FF 80000000 v_xor_b32_e32 v3, v3, v10 ; 3A061503 v_xor_b32_e32 v7, v7, v10 ; 3A0E1507 v_xor_b32_e32 v8, v8, v10 ; 3A101508 s_or_b64 exec, exec, s[18:19] ; 88FE127E v_subrev_f32_e32 v9, s0, v9 ; 0A121200 v_rcp_f32_e32 v9, v9 ; 7E125509 v_mul_f32_e32 v10, s27, v1 ; 1014021B v_mac_f32_e32 v10, s20, v0 ; 3E140014 v_mac_f32_e32 v10, s25, v2 ; 3E140419 v_add_f32_e32 v10, s26, v10 ; 0614141A v_mul_f32_e32 v11, s24, v1 ; 10160218 v_mac_f32_e32 v11, s13, v0 ; 3E16000D v_mac_f32_e32 v11, s22, v2 ; 3E160416 v_add_f32_e32 v11, s23, v11 ; 06161617 v_mov_b32_e32 v19, 0 ; 7E260280 exp 15, 32, 0, 0, 0, v19, v19, v19, v19 ; F800020F 13131313 exp 15, 33, 0, 0, 0, v14, v15, v16, v17 ; F800021F 11100F0E exp 15, 34, 0, 0, 0, v4, v12, v13, v18 ; F800022F 120D0C04 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v4, s10, v0 ; 0A08000A v_subrev_f32_e32 v12, s11, v1 ; 0A18020B v_mul_f32_e32 v4, s2, v4 ; 10080802 v_mac_f32_e32 v4, s3, v12 ; 3E081803 v_subrev_f32_e32 v12, s9, v2 ; 0A180409 v_mac_f32_e32 v4, s8, v12 ; 3E081808 v_mad_f32 v12, s2, v6, v0 ; D282000C 04020C02 v_mad_f32 v13, s3, v6, v1 ; D282000D 04060C03 v_mad_f32 v6, s8, v6, v2 ; D2820006 040A0C08 v_mul_f32_e32 v14, s14, v1 ; 101C020E v_mac_f32_e32 v14, s7, v0 ; 3E1C0007 v_mul_f32_e32 v15, s14, v13 ; 101E1A0E v_mac_f32_e32 v15, s7, v12 ; 3E1E1807 v_mac_f32_e32 v14, s21, v2 ; 3E1C0415 v_mac_f32_e32 v15, s21, v6 ; 3E1E0C15 v_add_f32_e32 v14, s16, v14 ; 061C1C10 v_add_f32_e32 v15, s16, v15 ; 061E1E10 v_mul_f32_e32 v13, s4, v13 ; 101A1A04 v_rcp_f32_e32 v15, v15 ; 7E1E550F v_mac_f32_e32 v13, s1, v12 ; 3E1A1801 v_mac_f32_e32 v13, s5, v6 ; 3E1A0C05 v_add_f32_e32 v6, s6, v13 ; 060C1A06 v_mul_f32_e32 v6, v15, v6 ; 100C0D0F v_mul_f32_e32 v12, s4, v1 ; 10180204 v_mac_f32_e32 v12, s1, v0 ; 3E180001 v_mac_f32_e32 v12, s5, v2 ; 3E180405 v_add_f32_e32 v12, s6, v12 ; 06181806 v_mul_f32_e32 v6, v14, v6 ; 100C0D0E v_min_f32_e32 v12, 0x3a83126f, v12 ; 1E1818FF 3A83126F v_max_f32_e32 v6, v12, v6 ; 200C0D0C v_subrev_f32_e32 v12, s0, v4 ; 0A180800 v_mul_f32_e32 v9, v9, v12 ; 10121909 v_xor_b32_e32 v12, 0x80000000, v11 ; 3A1816FF 80000000 v_mad_f32 v6, 2.0, v6, -v14 ; D2820006 843A0CF4 exp 15, 35, 0, 0, 0, v10, v11, v9, v14 ; F800023F 0E090B0A exp 15, 36, 0, 0, 0, v0, v1, v2, v4 ; F800024F 04020100 exp 15, 37, 0, 0, 0, v3, v7, v8, v19 ; F800025F 13080703 exp 15, 38, 0, 0, 0, v5, v19, v19, v19 ; F800026F 13131305 exp 15, 12, 0, 0, 0, v10, v12, v6, v14 ; F80000CF 0E060C0A exp 15, 13, 0, 1, 0, v19, v19, v19, v19 ; F80008DF 13131313 s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 56 VGPRS: 56 Code Size: 2260 bytes LDS: 0 blocks Scratch: 0 bytes per wave ********************