[...] [traceshaders] glCreateShader(GL_VERTEX_SHADER) → shader=73 [traceshaders] glShaderSource(shader=73) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; out gl_PerVertex { vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[];}; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[63]; }; uniform cbuffer_1 { // VSOffsetConstants vec4 Const1[5]; }; layout(location = 0) in vec4 dcl_Input0; vec4 Input0; layout(location = 1) in vec4 dcl_Input1; vec4 Input1; layout(location = 2) in vec4 dcl_Input2; vec4 Input2; layout(location = 4) in vec4 dcl_Input4; vec4 Input4; layout(location = 5) in vec4 dcl_Input5; vec4 Input5; layout(location = 6) in vec4 dcl_Input6; vec4 Input6; layout(location = 7) in vec4 dcl_Input7; vec4 Input7; layout(location = 0) out vec4 VtxGeoOutput0; #define Output0 VtxGeoOutput0 layout(location = 1) out vec4 VtxGeoOutput1; #define Output1 VtxGeoOutput1 layout(location = 2) out vec4 VtxGeoOutput2; #define Output2 VtxGeoOutput2 layout(location = 3) out vec4 VtxGeoOutput3; #define Output3 VtxGeoOutput3 layout(location = 4) out vec4 VtxGeoOutput4; #define Output4 VtxGeoOutput4 #undef Output5 #define Output5 phase0_Output5 vec4 phase0_Output5; vec4 Temp[4]; ivec4 Temp_int[4]; uvec4 Temp_uint[4]; void main() { Input0 = dcl_Input0; Input1 = dcl_Input1; Input2 = dcl_Input2; Input4 = dcl_Input4; Input5 = dcl_Input5; Input6 = dcl_Input6; Input7 = dcl_Input7; Temp[0].xyz = Input1.xyz * vec3(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec3(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[1] = Input2 * vec4(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec4(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[2].xyz = Temp[0].xyz * Temp[1].yzx; Temp[0].xyz = Temp[1].xyz * Temp[0].yzx + (-Temp[2].xyz); Temp[0].xyz = Temp[1].www * Temp[0].xyz; Temp[2].xyz = Temp[1].yzx * Temp[0].xyz; Temp[2].xyz = Temp[0].zxy * Temp[1].zxy + (-Temp[2].xyz); Temp[2].xyz = Temp[1].www * Temp[2].xyz; Temp[2].yw = Temp[2].yy * Const0[1].xz; Temp[2].xy = Const0[0].xz * Temp[2].xx + Temp[2].yw; Temp[2].xy = Const0[2].xz * Temp[2].zz + Temp[2].xy; Temp[0].zw = Temp[0].zz * Const0[1].xz; Temp[0].yz = Const0[0].xz * Temp[0].yy + Temp[0].zw; Temp[0].xy = Const0[2].xz * Temp[0].xx + Temp[0].yz; Temp[2].z = Temp[0].x; Temp[3].xy = Temp[1].yy * Const0[1].xz; Temp[1].xy = Const0[0].xz * Temp[1].xx + Temp[3].xy; Temp[0].xw = Const0[2].xz * Temp[1].zz + Temp[1].xy; Output1.w = Temp[1].w * Const0[51].x; Temp[2].w = Temp[0].x; Temp[0].x = dot(Temp[2].xzw, Temp[2].xzw); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Output0.xyz = Temp[0].xxx * Temp[2].xzw; Temp[0].z = Temp[2].y; Temp[0].x = dot(Temp[0].yzw, Temp[0].yzw); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Output1.xyz = Temp[0].xxx * Temp[0].zyw; Output2.xy = Input7.xy * Const0[52].xy + Const0[52].wz; Output3.xy = Input4.xy; Output3.zw = Input5.yx; Output4.xy = Input6.xy; Output4.zw = vec2(uintBitsToFloat(0u), uintBitsToFloat(0u)); Temp[0].x = Const0[18].w * uintBitsToFloat(1067114824u) + Const0[46].w; Temp[1].xyz = Input0.xyz * Const0[57].xyz + Const0[56].xyz; Temp[0].y = Const0[49].w * Temp[1].x + Temp[0].x; Temp[0].y = Temp[0].y * uintBitsToFloat(1065353216u); Temp[0].z = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).z != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].y = Temp[0].y * uintBitsToFloat(1086918619u); Temp[0].z = Const0[50].x * Temp[1].x + Temp[0].x; Temp[0].z = Temp[0].z * uintBitsToFloat(1056964608u); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].yz = sin(Temp[0].yz); Temp[0].y = Temp[0].z + Temp[0].y; Temp[0].y = Temp[0].y * Const0[50].y; Temp[0].zw = Temp[0].xx * vec2(uintBitsToFloat(1065353216u), uintBitsToFloat(1061481552u)); Temp[0].x = Temp[0].x + Const0[48].x; Temp[2].xy = uintBitsToFloat(uvec2(greaterThanEqual(Temp[0].zwzz, (-Temp[0].zwzz)).xy) * 0xFFFFFFFFu); Temp[0].zw = fract(abs(Temp[0].zw)); Temp[0].z = (floatBitsToInt(Temp[2]).x != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].w = (floatBitsToInt(Temp[2]).y != 0) ? Temp[0].w : (-Temp[0].w); Temp[0].zw = Temp[0].zw * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].zw = sin(Temp[0].zw); Temp[0].z = Temp[0].w + Temp[0].z; Temp[0].z = Temp[0].z + uintBitsToFloat(1065353216u); Temp[0].w = Temp[1].x * Const0[49].z; Temp[0].z = Temp[0].z * Temp[0].w; Temp[2].z = Temp[0].z * uintBitsToFloat(1045220557u) + Temp[0].y; Temp[0].y = Const0[46].z * Temp[1].x + (-Temp[0].x); Temp[0].xz = Const0[48].yw * Temp[1].xx + (-Temp[0].xx); Temp[0].xyz = Temp[0].xyz * vec3(uintBitsToFloat(1045494470u), uintBitsToFloat(1055439406u), uintBitsToFloat(1065353216u)); Temp[0].w = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].y = Temp[0].y * uintBitsToFloat(1086918619u); Temp[3].xy = uintBitsToFloat(uvec2(greaterThanEqual(Temp[0].xzxx, (-Temp[0].xzxx)).xy) * 0xFFFFFFFFu); Temp[0].xz = fract(abs(Temp[0].xz)); Temp[0].x = (floatBitsToInt(Temp[3]).x != 0) ? Temp[0].x : (-Temp[0].x); Temp[0].z = (floatBitsToInt(Temp[3]).y != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].xz = Temp[0].xz * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].xyz = sin(Temp[0].xyz); Temp[0].x = Temp[0].x + Temp[0].y; Temp[0].y = Temp[0].x * Const0[49].y; Temp[0].x = abs(Temp[0].x) * Const0[48].z; Temp[2].y = Const0[49].x * Temp[0].z + Temp[0].y; Temp[0].y = Const0[46].y * uintBitsToFloat(3256877056u) + uintBitsToFloat(3240099840u); Temp[2].x = Temp[0].x * Temp[0].y; Temp[0].xyz = Temp[2].xyz * Input6.xxx; Temp[1].w = Input0.w; Temp[0].w = uintBitsToFloat(0u); Temp[0] = Temp[0] + Temp[1]; Temp[1] = Temp[0].yyyy * Const0[1]; Temp[1] = Const0[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[2] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[3] * Temp[0].wwww + Temp[1]; Temp[1] = Temp[0].yyyy * Const1[1]; Temp[1] = Const1[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const1[2] * Temp[0].zzzz + Temp[1]; Output5 = Const1[3] * Temp[0].wwww + Temp[1]; gl_Position = vec4(phase0_Output5); gl_Position.y = -gl_Position.y; gl_Position.z = gl_Position.z * 2.0 - gl_Position.w; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=73) [traceshaders] glCreateProgram() = 74 [traceshaders] glAttachShader(program=74, shader=73) [traceshaders] glLinkProgram(program=74) SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[1][0..62] DCL CONST[2][0..4] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.0000, 340282346638528859811704183484516925440.0000} IMM[1] UINT32 {0, 16, 32, 816} IMM[2] UINT32 {832, 288, 736, 912} IMM[3] FLT32 { 1.2100, 6.2832, 0.5000, 1.0000} IMM[4] UINT32 {896, 784, 4294967295, 800} IMM[5] INT32 {0, 1, 0, 0} IMM[6] FLT32 { 1.0000, 0.7692, 0.2000, -40.0000} IMM[7] UINT32 {768, 48, 1, 0} IMM[8] FLT32 { 0.2041, 0.4545, 1.0000, -10.0000} 0: MAD TEMP[0].xyz, IN[1].xyzz, IMM[0].xxxx, IMM[0].yyyy 1: MAD TEMP[1], IN[2], IMM[0].xxxx, IMM[0].yyyy 2: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[1].yzxx 3: MAD TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].yzxx, -TEMP[2].xyzz 4: MUL TEMP[0].xyz, TEMP[1].wwww, TEMP[0].xyzz 5: MUL TEMP[2].xyz, TEMP[1].yzxx, TEMP[0].xyzz 6: MAD TEMP[2].xyz, TEMP[0].zxyy, TEMP[1].zxyy, -TEMP[2].xyzz 7: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[2].xyzz 8: MUL TEMP[3].xy, TEMP[2].yyyy, CONST[1][1].xzzz 9: MAD TEMP[2].xy, CONST[1][0].xzzz, TEMP[2].xxxx, TEMP[3].xyyy 10: MAD TEMP[2].xy, CONST[1][2].xzzz, TEMP[2].zzzz, TEMP[2].xyyy 11: MUL TEMP[3].xy, TEMP[0].zzzz, CONST[1][1].xzzz 12: MAD TEMP[3].xy, CONST[1][0].xzzz, TEMP[0].yyyy, TEMP[3].xyyy 13: MAD TEMP[0].xy, CONST[1][2].xzzz, TEMP[0].xxxx, TEMP[3].xyyy 14: MOV TEMP[2].z, TEMP[0].xxxx 15: MUL TEMP[3].xy, TEMP[1].yyyy, CONST[1][1].xzzz 16: MAD TEMP[1].xy, CONST[1][0].xzzz, TEMP[1].xxxx, TEMP[3].xyyy 17: MAD TEMP[4].xy, CONST[1][2].xzzz, TEMP[1].zzzz, TEMP[1].xyyy 18: MOV TEMP[0].w, TEMP[4].xxxy 19: MUL TEMP[5].x, TEMP[1].wwww, CONST[1][51].xxxx 20: MOV TEMP[5].w, TEMP[5].xxxx 21: MOV TEMP[2].w, TEMP[4].xxxx 22: DP3 TEMP[0].x, TEMP[2].xzww, TEMP[2].xzww 23: FSLT TEMP[4].x, TEMP[0].xxxx, IMM[0].zzzz 24: UIF TEMP[4].xxxx :0 25: MOV TEMP[4].x, IMM[0].zzzz 26: ELSE :0 27: FSLT TEMP[6].x, IMM[0].zzzz, TEMP[0].xxxx 28: UIF TEMP[6].xxxx :0 29: RSQ TEMP[6].x, TEMP[0].xxxx 30: ELSE :0 31: SSG TEMP[7].x, TEMP[0].xxxx 32: MUL TEMP[6].x, IMM[0].wwww, TEMP[7].xxxx 33: ENDIF 34: MOV TEMP[4].x, TEMP[6].xxxx 35: ENDIF 36: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[2].xzww 37: MOV TEMP[0].z, TEMP[2].yyyy 38: DP3 TEMP[0].x, TEMP[0].yzww, TEMP[0].yzww 39: FSLT TEMP[6].x, TEMP[0].xxxx, IMM[0].zzzz 40: UIF TEMP[6].xxxx :0 41: MOV TEMP[6].x, IMM[0].zzzz 42: ELSE :0 43: FSLT TEMP[7].x, IMM[0].zzzz, TEMP[0].xxxx 44: UIF TEMP[7].xxxx :0 45: RSQ TEMP[7].x, TEMP[0].xxxx 46: ELSE :0 47: SSG TEMP[8].x, TEMP[0].xxxx 48: MUL TEMP[7].x, IMM[0].wwww, TEMP[8].xxxx 49: ENDIF 50: MOV TEMP[6].x, TEMP[7].xxxx 51: ENDIF 52: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[0].zyww 53: MAD TEMP[6].xy, IN[6].xyyy, CONST[1][52].xyyy, CONST[1][52].wzzz 54: MOV TEMP[7].xy, IN[3].xyxx 55: MOV TEMP[7].zw, IN[4].xxyx 56: MOV TEMP[8].xy, IN[5].xyxx 57: MOV TEMP[8].zw, IMM[0].zzzz 58: MAD TEMP[0].x, CONST[1][18].wwww, IMM[3].xxxx, CONST[1][46].wwww 59: MAD TEMP[1].xyz, IN[0].xyzz, CONST[1][57].xyzz, CONST[1][56].xyzz 60: MAD TEMP[9].x, CONST[1][49].wwww, TEMP[1].xxxx, TEMP[0].xxxx 61: FSGE TEMP[10].x, TEMP[9].xxxx, -TEMP[9].xxxx 62: UIF TEMP[10].xxxx :0 63: MOV TEMP[10].x, IMM[4].zzzz 64: ELSE :0 65: MOV TEMP[10].x, IMM[1].xxxx 66: ENDIF 67: MOV TEMP[0].z, TEMP[10].xxxx 68: ABS TEMP[9].x, TEMP[9].xxxx 69: FRC TEMP[9].x, TEMP[9].xxxx 70: USNE TEMP[10].x, TEMP[0].zzzz, IMM[5].xxxx 71: UIF TEMP[10].xxxx :0 72: MOV TEMP[10].x, TEMP[9].xxxx 73: ELSE :0 74: MOV TEMP[10].x, -TEMP[9].xxxx 75: ENDIF 76: MUL TEMP[9].x, TEMP[10].xxxx, IMM[3].yyyy 77: MAD TEMP[10].x, CONST[1][50].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 78: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz 79: FSGE TEMP[11].x, TEMP[10].xxxx, -TEMP[10].xxxx 80: UIF TEMP[11].xxxx :0 81: MOV TEMP[11].x, IMM[4].zzzz 82: ELSE :0 83: MOV TEMP[11].x, IMM[1].xxxx 84: ENDIF 85: MOV TEMP[0].w, TEMP[11].xxxx 86: ABS TEMP[10].x, TEMP[10].xxxx 87: FRC TEMP[10].x, TEMP[10].xxxx 88: USNE TEMP[11].x, TEMP[0].wwww, IMM[5].xxxx 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[11].x, TEMP[10].xxxx 91: ELSE :0 92: MOV TEMP[11].x, -TEMP[10].xxxx 93: ENDIF 94: MUL TEMP[10].x, TEMP[11].xxxx, IMM[3].yyyy 95: SIN TEMP[9].x, TEMP[9].xxxx 96: SIN TEMP[9].y, TEMP[10].xxxx 97: ADD TEMP[9].x, TEMP[9].yyyy, TEMP[9].xxxx 98: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][50].yyyy 99: MUL TEMP[10].xy, TEMP[0].xxxx, IMM[6].xyyy 100: ADD TEMP[0].x, TEMP[0].xxxx, CONST[1][48].xxxx 101: FSGE TEMP[11].xy, TEMP[10].xyxx, -TEMP[10].xyxx 102: AND TEMP[11].xy, TEMP[11].xyyy, IMM[5].yyyy 103: INEG TEMP[11].xy, TEMP[11].xyyy 104: MOV TEMP[2].xy, TEMP[11].xyxx 105: ABS TEMP[10].xy, TEMP[10].xyyy 106: FRC TEMP[10].xy, TEMP[10].xyyy 107: USNE TEMP[11].x, TEMP[2].xxxx, IMM[5].xxxx 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].x, TEMP[10].xxxx 110: ELSE :0 111: MOV TEMP[11].x, -TEMP[10].xxxx 112: ENDIF 113: MOV TEMP[0].z, TEMP[11].xxxx 114: USNE TEMP[11].x, TEMP[2].yyyy, IMM[5].xxxx 115: UIF TEMP[11].xxxx :0 116: MOV TEMP[11].x, TEMP[10].yyyy 117: ELSE :0 118: MOV TEMP[11].x, -TEMP[10].yyyy 119: ENDIF 120: MOV TEMP[0].w, TEMP[11].xxxx 121: MUL TEMP[10].xy, TEMP[0].zwww, IMM[3].yyyy 122: SIN TEMP[11].x, TEMP[10].xxxx 123: SIN TEMP[11].y, TEMP[10].yyyy 124: ADD TEMP[10].x, TEMP[11].yyyy, TEMP[11].xxxx 125: ADD TEMP[10].x, TEMP[10].xxxx, IMM[3].wwww 126: MUL TEMP[11].x, TEMP[1].xxxx, CONST[1][49].zzzz 127: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 128: MAD TEMP[9].x, TEMP[10].xxxx, IMM[6].zzzz, TEMP[9].xxxx 129: MOV TEMP[2].z, TEMP[9].xxxx 130: MAD TEMP[9].x, CONST[1][46].zzzz, TEMP[1].xxxx, -TEMP[0].xxxx 131: MOV TEMP[0].y, TEMP[9].xxxx 132: MAD TEMP[9].xy, CONST[1][48].ywww, TEMP[1].xxxx, -TEMP[0].xxxx 133: MOV TEMP[0].xz, TEMP[9].xxyx 134: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xyzz 135: FSGE TEMP[9].x, TEMP[0].yyyy, -TEMP[0].yyyy 136: UIF TEMP[9].xxxx :0 137: MOV TEMP[9].x, IMM[4].zzzz 138: ELSE :0 139: MOV TEMP[9].x, IMM[1].xxxx 140: ENDIF 141: MOV TEMP[0].w, TEMP[9].xxxx 142: ABS TEMP[9].x, TEMP[0].yyyy 143: FRC TEMP[9].x, TEMP[9].xxxx 144: USNE TEMP[10].x, TEMP[0].wwww, IMM[5].xxxx 145: UIF TEMP[10].xxxx :0 146: MOV TEMP[10].x, TEMP[9].xxxx 147: ELSE :0 148: MOV TEMP[10].x, -TEMP[9].xxxx 149: ENDIF 150: MUL TEMP[9].x, TEMP[10].xxxx, IMM[3].yyyy 151: FSGE TEMP[10].xy, TEMP[0].xzxx, -TEMP[0].xzxx 152: AND TEMP[10].xy, TEMP[10].xyyy, IMM[5].yyyy 153: INEG TEMP[10].xy, TEMP[10].xyyy 154: MOV TEMP[3].xy, TEMP[10].xyxx 155: ABS TEMP[10].xy, TEMP[0].xzzz 156: FRC TEMP[10].xy, TEMP[10].xyyy 157: USNE TEMP[11].x, TEMP[3].xxxx, IMM[5].xxxx 158: UIF TEMP[11].xxxx :0 159: MOV TEMP[11].x, TEMP[10].xxxx 160: ELSE :0 161: MOV TEMP[11].x, -TEMP[10].xxxx 162: ENDIF 163: MOV TEMP[0].x, TEMP[11].xxxx 164: USNE TEMP[3].x, TEMP[3].yyyy, IMM[5].xxxx 165: UIF TEMP[3].xxxx :0 166: MOV TEMP[3].x, TEMP[10].yyyy 167: ELSE :0 168: MOV TEMP[3].x, -TEMP[10].yyyy 169: ENDIF 170: MOV TEMP[0].z, TEMP[3].xxxx 171: MUL TEMP[3].xy, TEMP[0].xzzz, IMM[3].yyyy 172: SIN TEMP[10].x, TEMP[3].xxxx 173: SIN TEMP[10].y, TEMP[9].xxxx 174: SIN TEMP[10].z, TEMP[3].yyyy 175: ADD TEMP[0].x, TEMP[10].xxxx, TEMP[10].yyyy 176: MUL TEMP[3].x, TEMP[0].xxxx, CONST[1][49].yyyy 177: ABS TEMP[9].x, TEMP[0].xxxx 178: MUL TEMP[0].x, TEMP[9].xxxx, CONST[1][48].zzzz 179: MAD TEMP[3].x, CONST[1][49].xxxx, TEMP[10].zzzz, TEMP[3].xxxx 180: MOV TEMP[2].y, TEMP[3].xxxx 181: MAD TEMP[3].x, CONST[1][46].yyyy, IMM[6].wwww, IMM[8].wwww 182: MUL TEMP[2].x, TEMP[0].xxxx, TEMP[3].xxxx 183: MUL TEMP[0].xyz, TEMP[2].xyzz, IN[5].xxxx 184: MOV TEMP[1].w, IN[0].wwww 185: MOV TEMP[0].w, IMM[0].zzzz 186: ADD TEMP[0], TEMP[0], TEMP[1] 187: MUL TEMP[1], TEMP[0].yyyy, CONST[1][1] 188: MAD TEMP[1], CONST[1][0], TEMP[0].xxxx, TEMP[1] 189: MAD TEMP[1], CONST[1][2], TEMP[0].zzzz, TEMP[1] 190: MAD TEMP[0], CONST[1][3], TEMP[0].wwww, TEMP[1] 191: MUL TEMP[1], TEMP[0].yyyy, CONST[2][1] 192: MAD TEMP[1], CONST[2][0], TEMP[0].xxxx, TEMP[1] 193: MAD TEMP[1], CONST[2][2], TEMP[0].zzzz, TEMP[1] 194: MAD TEMP[0], CONST[2][3], TEMP[0].wwww, TEMP[1] 195: MOV TEMP[1].xw, TEMP[0].xxxw 196: MOV TEMP[1].y, -TEMP[0].yyyy 197: MAD TEMP[0].x, TEMP[0].zzzz, IMM[0].xxxx, -TEMP[0].wwww 198: MOV TEMP[1].z, TEMP[0].xxxx 199: MOV OUT[1], TEMP[4] 200: MOV OUT[2], TEMP[5] 201: MOV OUT[3], TEMP[6] 202: MOV OUT[4], TEMP[7] 203: MOV OUT[5], TEMP[8] 204: MOV OUT[0], TEMP[1] 205: END radeonsi: Compiling shader 49 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !1 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0) %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4) %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48) %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52) %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56) %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300) %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 740) %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 744) %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 748) %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 768) %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 772) %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 776) %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 780) %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 784) %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 788) %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 792) %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 796) %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 800) %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 804) %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 816) %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 832) %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 836) %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 840) %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 844) %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 896) %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 900) %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 904) %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 912) %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 916) %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 920) %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !1 %65 = call float @llvm.SI.load.const(<16 x i8> %64, i32 0) %66 = call float @llvm.SI.load.const(<16 x i8> %64, i32 4) %67 = call float @llvm.SI.load.const(<16 x i8> %64, i32 8) %68 = call float @llvm.SI.load.const(<16 x i8> %64, i32 12) %69 = call float @llvm.SI.load.const(<16 x i8> %64, i32 16) %70 = call float @llvm.SI.load.const(<16 x i8> %64, i32 20) %71 = call float @llvm.SI.load.const(<16 x i8> %64, i32 24) %72 = call float @llvm.SI.load.const(<16 x i8> %64, i32 28) %73 = call float @llvm.SI.load.const(<16 x i8> %64, i32 32) %74 = call float @llvm.SI.load.const(<16 x i8> %64, i32 36) %75 = call float @llvm.SI.load.const(<16 x i8> %64, i32 40) %76 = call float @llvm.SI.load.const(<16 x i8> %64, i32 44) %77 = call float @llvm.SI.load.const(<16 x i8> %64, i32 48) %78 = call float @llvm.SI.load.const(<16 x i8> %64, i32 52) %79 = call float @llvm.SI.load.const(<16 x i8> %64, i32 56) %80 = call float @llvm.SI.load.const(<16 x i8> %64, i32 60) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !1 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %13) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !1 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %14) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !1 %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %15) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = extractelement <4 x float> %96, i32 3 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !1 %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %16) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4, !amdgpu.uniform !0 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !1 %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %17) %109 = extractelement <4 x float> %108, i32 0 %110 = extractelement <4 x float> %108, i32 1 %111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5, !amdgpu.uniform !0 %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !1 %113 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %18) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6, !amdgpu.uniform !0 %117 = load <16 x i8>, <16 x i8> addrspace(2)* %116, align 16, !tbaa !1 %118 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %117, i32 0, i32 %19) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = fmul float %91, 2.000000e+00 %122 = fadd float %121, -1.000000e+00 %123 = fmul float %92, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %93, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %97, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %98, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %99, 2.000000e+00 %132 = fadd float %131, -1.000000e+00 %133 = fmul float %100, 2.000000e+00 %134 = fadd float %133, -1.000000e+00 %135 = fmul float %122, %130 %136 = fmul float %124, %132 %137 = fmul float %126, %128 %138 = fmul float %128, %124 %139 = fsub float %138, %135 %140 = fmul float %130, %126 %141 = fsub float %140, %136 %142 = fmul float %132, %122 %143 = fsub float %142, %137 %144 = fmul float %134, %139 %145 = fmul float %134, %141 %146 = fmul float %134, %143 %147 = fmul float %130, %144 %148 = fmul float %132, %145 %149 = fmul float %128, %146 %150 = fmul float %146, %132 %151 = fsub float %150, %147 %152 = fmul float %144, %128 %153 = fsub float %152, %148 %154 = fmul float %145, %130 %155 = fsub float %154, %149 %156 = fmul float %134, %151 %157 = fmul float %134, %153 %158 = fmul float %134, %155 %159 = fmul float %157, %26 %160 = fmul float %157, %28 %161 = fmul float %22, %156 %162 = fadd float %161, %159 %163 = fmul float %24, %156 %164 = fadd float %163, %160 %165 = fmul float %30, %158 %166 = fadd float %165, %162 %167 = fmul float %32, %158 %168 = fadd float %167, %164 %169 = fmul float %146, %26 %170 = fmul float %146, %28 %171 = fmul float %22, %145 %172 = fadd float %171, %169 %173 = fmul float %24, %145 %174 = fadd float %173, %170 %175 = fmul float %30, %144 %176 = fadd float %175, %172 %177 = fmul float %32, %144 %178 = fadd float %177, %174 %179 = fmul float %130, %26 %180 = fmul float %130, %28 %181 = fmul float %22, %128 %182 = fadd float %181, %179 %183 = fmul float %24, %128 %184 = fadd float %183, %180 %185 = fmul float %30, %132 %186 = fadd float %185, %182 %187 = fmul float %32, %132 %188 = fadd float %187, %184 %189 = fmul float %134, %52 %190 = fmul float %166, %166 %191 = fmul float %176, %176 %192 = fadd float %191, %190 %193 = fmul float %186, %186 %194 = fadd float %192, %193 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %195 = fcmp ogt float %194, 0.000000e+00 br i1 %195, label %IF49, label %ELSE50 ENDIF: ; preds = %IF49, %ELSE50, %main_body %temp16.0 = phi float [ 0.000000e+00, %main_body ], [ %205, %IF49 ], [ %209, %ELSE50 ] %196 = fmul float %temp16.0, %166 %197 = fmul float %temp16.0, %176 %198 = fmul float %temp16.0, %186 %199 = fmul float %178, %178 %200 = fmul float %168, %168 %201 = fadd float %200, %199 %202 = fmul float %188, %188 %203 = fadd float %201, %202 br i1 false, label %ENDIF51, label %ELSE53 IF49: ; preds = %ELSE %204 = call float @llvm.sqrt.f32(float %194) %205 = fdiv float 1.000000e+00, %204 br label %ENDIF ELSE50: ; preds = %ELSE %206 = fcmp ogt float %194, 0.000000e+00 %207 = select i1 %206, float 1.000000e+00, float %194 %208 = fcmp oge float %207, 0.000000e+00 %.op = fmul float %207, 0x47EFFFFFE0000000 %209 = select i1 %208, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF ELSE53: ; preds = %ENDIF %210 = fcmp ogt float %203, 0.000000e+00 br i1 %210, label %IF55, label %ELSE56 ENDIF51: ; preds = %IF55, %ELSE56, %ENDIF %temp24.1 = phi float [ 0.000000e+00, %ENDIF ], [ %383, %IF55 ], [ %387, %ELSE56 ] %211 = fmul float %temp24.1, %168 %212 = fmul float %temp24.1, %178 %213 = fmul float %temp24.1, %188 %214 = fmul float %119, %53 %215 = fadd float %214, %56 %216 = fmul float %120, %54 %217 = fadd float %216, %55 %218 = fmul float %38, 0x3FF35C2900000000 %219 = fadd float %218, %41 %220 = fmul float %84, %60 %221 = fadd float %220, %57 %222 = fmul float %85, %61 %223 = fadd float %222, %58 %224 = fmul float %86, %62 %225 = fadd float %224, %59 %226 = fmul float %49, %221 %227 = fadd float %226, %219 %228 = fsub float -0.000000e+00, %227 %229 = fcmp oge float %227, %228 %230 = call float @llvm.fabs.f32(float %227) %231 = call float @llvm.floor.f32(float %230) %232 = fsub float %230, %231 %233 = fsub float -0.000000e+00, %232 %temp40.1 = select i1 %229, float %232, float %233 %234 = fmul float %temp40.1, 0x401921FB60000000 %235 = fmul float %50, %221 %236 = fadd float %235, %219 %237 = fmul float %236, 5.000000e-01 %238 = fsub float -0.000000e+00, %237 %239 = fcmp oge float %237, %238 %240 = call float @llvm.fabs.f32(float %237) %241 = call float @llvm.floor.f32(float %240) %242 = fsub float %240, %241 %243 = fsub float -0.000000e+00, %242 %temp44.1 = select i1 %239, float %242, float %243 %244 = fmul float %temp44.1, 0x401921FB60000000 %245 = call float @llvm.sin.f32(float %234) %246 = call float @llvm.sin.f32(float %244) %247 = fadd float %246, %245 %248 = fmul float %247, %51 %249 = fmul float %219, 0x3FE89D8A00000000 %250 = fadd float %219, %42 %251 = fsub float -0.000000e+00, %219 %252 = fcmp oge float %219, %251 %253 = fsub float -0.000000e+00, %249 %254 = fcmp oge float %249, %253 %255 = call float @llvm.fabs.f32(float %219) %256 = call float @llvm.fabs.f32(float %249) %257 = call float @llvm.floor.f32(float %255) %258 = fsub float %255, %257 %259 = call float @llvm.floor.f32(float %256) %260 = fsub float %256, %259 %261 = fsub float -0.000000e+00, %258 %temp44.2 = select i1 %252, float %258, float %261 %262 = fsub float -0.000000e+00, %260 %temp44.3 = select i1 %254, float %260, float %262 %263 = fmul float %temp44.2, 0x401921FB60000000 %264 = fmul float %temp44.3, 0x401921FB60000000 %265 = call float @llvm.sin.f32(float %263) %266 = call float @llvm.sin.f32(float %264) %267 = fadd float %266, %265 %268 = fadd float %267, 1.000000e+00 %269 = fmul float %221, %48 %270 = fmul float %268, %269 %271 = fmul float %270, 0x3FC99999A0000000 %272 = fadd float %271, %248 %273 = fmul float %40, %221 %274 = fsub float %273, %250 %275 = fmul float %43, %221 %276 = fsub float %275, %250 %277 = fmul float %45, %221 %278 = fsub float %277, %250 %279 = fmul float %276, 0x3FCA1F58C0000000 %280 = fmul float %274, 0x3FDD1745C0000000 %281 = fsub float -0.000000e+00, %280 %282 = fcmp oge float %280, %281 %283 = call float @llvm.fabs.f32(float %280) %284 = call float @llvm.floor.f32(float %283) %285 = fsub float %283, %284 %286 = fsub float -0.000000e+00, %285 %temp40.2 = select i1 %282, float %285, float %286 %287 = fmul float %temp40.2, 0x401921FB60000000 %288 = fsub float -0.000000e+00, %279 %289 = fcmp oge float %279, %288 %290 = fsub float -0.000000e+00, %278 %291 = fcmp oge float %278, %290 %292 = call float @llvm.fabs.f32(float %279) %293 = call float @llvm.fabs.f32(float %278) %294 = call float @llvm.floor.f32(float %292) %295 = fsub float %292, %294 %296 = call float @llvm.floor.f32(float %293) %297 = fsub float %293, %296 %298 = fsub float -0.000000e+00, %295 %temp44.4 = select i1 %289, float %295, float %298 %299 = fsub float -0.000000e+00, %297 %temp12.0 = select i1 %291, float %297, float %299 %300 = fmul float %temp44.4, 0x401921FB60000000 %301 = fmul float %temp12.0, 0x401921FB60000000 %302 = call float @llvm.sin.f32(float %300) %303 = call float @llvm.sin.f32(float %287) %304 = call float @llvm.sin.f32(float %301) %305 = fadd float %302, %303 %306 = fmul float %305, %47 %307 = call float @llvm.fabs.f32(float %305) %308 = fmul float %307, %44 %309 = fmul float %46, %304 %310 = fadd float %309, %306 %311 = fmul float %39, -4.000000e+01 %312 = fadd float %311, -1.000000e+01 %313 = fmul float %308, %312 %314 = fmul float %313, %114 %315 = fmul float %310, %114 %316 = fmul float %272, %114 %317 = fadd float %314, %221 %318 = fadd float %315, %223 %319 = fadd float %316, %225 %320 = fadd float %87, 0.000000e+00 %321 = fmul float %318, %26 %322 = fmul float %318, %27 %323 = fmul float %318, %28 %324 = fmul float %318, %29 %325 = fmul float %22, %317 %326 = fadd float %325, %321 %327 = fmul float %23, %317 %328 = fadd float %327, %322 %329 = fmul float %24, %317 %330 = fadd float %329, %323 %331 = fmul float %25, %317 %332 = fadd float %331, %324 %333 = fmul float %30, %319 %334 = fadd float %333, %326 %335 = fmul float %31, %319 %336 = fadd float %335, %328 %337 = fmul float %32, %319 %338 = fadd float %337, %330 %339 = fmul float %33, %319 %340 = fadd float %339, %332 %341 = fmul float %34, %320 %342 = fadd float %341, %334 %343 = fmul float %35, %320 %344 = fadd float %343, %336 %345 = fmul float %36, %320 %346 = fadd float %345, %338 %347 = fmul float %37, %320 %348 = fadd float %347, %340 %349 = fmul float %344, %69 %350 = fmul float %344, %70 %351 = fmul float %344, %71 %352 = fmul float %344, %72 %353 = fmul float %65, %342 %354 = fadd float %353, %349 %355 = fmul float %66, %342 %356 = fadd float %355, %350 %357 = fmul float %67, %342 %358 = fadd float %357, %351 %359 = fmul float %68, %342 %360 = fadd float %359, %352 %361 = fmul float %73, %346 %362 = fadd float %361, %354 %363 = fmul float %74, %346 %364 = fadd float %363, %356 %365 = fmul float %75, %346 %366 = fadd float %365, %358 %367 = fmul float %76, %346 %368 = fadd float %367, %360 %369 = fmul float %77, %348 %370 = fadd float %369, %362 %371 = fmul float %78, %348 %372 = fadd float %371, %364 %373 = fmul float %79, %348 %374 = fadd float %373, %366 %375 = fmul float %80, %348 %376 = fadd float %375, %368 %377 = fsub float -0.000000e+00, %372 %378 = fmul float %374, 2.000000e+00 %379 = fsub float %378, %376 %380 = bitcast i32 %11 to float %381 = insertvalue <{ float, float, float }> undef, float %380, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %196, float %197, float %198, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %211, float %212, float %213, float %189) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %215, float %217, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %104, float %105, float %110, float %109) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %114, float %115, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %370, float %377, float %379, float %376) ret <{ float, float, float }> %381 IF55: ; preds = %ELSE53 %382 = call float @llvm.sqrt.f32(float %203) %383 = fdiv float 1.000000e+00, %382 br label %ENDIF51 ELSE56: ; preds = %ELSE53 %384 = fcmp ogt float %203, 0.000000e+00 %385 = select i1 %384, float 1.000000e+00, float %203 %386 = fcmp oge float %385, 0.000000e+00 %.op89 = fmul float %385, 0x47EFFFFFE0000000 %387 = select i1 %386, float %.op89, float 0xC7EFFFFFE0000000 br label %ENDIF51 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #0 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { nounwind readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=74, shader=73) [traceshaders] glDeleteShader(shader=73) [...] [traceshaders] glCreateShader(GL_FRAGMENT_SHADER) → shader=89 [traceshaders] glShaderSource(shader=89) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[63]; }; uniform sampler2D tex0_X_smp0; uniform sampler2D tex0; layout(location = 0) in vec4 VtxGeoOutput0; vec4 Input0; layout(location = 1) in vec4 VtxGeoOutput1; vec4 Input1; layout(location = 2) in vec4 VtxGeoOutput2; vec4 Input2; layout(location = 0) out vec4 PixOutput0; #define Output0 PixOutput0 layout(location = 1) out vec4 PixOutput1; #define Output1 PixOutput1 vec4 Temp[2]; ivec4 Temp_int[2]; uvec4 Temp_uint[2]; void main() { Input0 = VtxGeoOutput0; Input1 = VtxGeoOutput1; Input2 = VtxGeoOutput2; Temp[0].x = Input0.x * Input1.y; Temp[0].x = Input1.x * Input0.y + (-Temp[0].x); Temp[0].y = Temp[0].x * Input1.w; Temp[0].x = Input0.z; Temp[0].z = Input1.z; Temp[0].w = dot(Temp[0].xyz, Temp[0].xyz); Temp[0].w = ( ( Temp[0].w < 0.0 ) ? 0.0 : ( ( Temp[0].w > 0.0 ) ? inversesqrt( Temp[0].w ) : ( 3.4028235E+38 * sign( Temp[0].w ) ) ) ); Temp[0].xyz = Temp[0].www * Temp[0].xyz; Output0.xyz = Temp[0].xyz * vec3(uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u)) + vec3(uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u)); Output0.xyz = clamp(Output0.xyz, 0.0, 1.0); Temp[0].x = Const0[45].x + uintBitsToFloat(3225419776u); Temp[0].x = Temp[0].x * uintBitsToFloat(989855744u); Temp[0].x = clamp(Temp[0].x, 0.0, 1.0); Output0.w = ( ( ( Temp[0].x ) < 0.0 ) ? 0.0 : sqrt( Temp[0].x ) ); Temp[0] = texture(tex0_X_smp0, Input2.xy); Temp[1] = Temp[0].yyyy * Const0[59]; Temp[1] = Const0[58] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[60] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[61] * Temp[0].wwww + Temp[1]; Output1 = Temp[0] + Const0[62]; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=89) [traceshaders] glCreateProgram() = 90 [traceshaders] glAttachShader(program=90, shader=89) [traceshaders] glLinkProgram(program=90) SHADER KEY prolog.color_two_side = 0 prolog.poly_stipple = 0 prolog.force_persample_interp = 0 epilog.spi_shader_col_format = 0x0 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 0 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..62] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 340282346638528859811704183484516925440.0000, 0.5000, -3.0000} IMM[1] UINT32 {0, 720, 944, 928} IMM[2] FLT32 { 0.0020, 0.0000, 0.0000, 0.0000} IMM[3] UINT32 {960, 976, 992, 0} 0: MUL TEMP[0].x, IN[0].xxxx, IN[1].yyyy 1: MAD TEMP[0].x, IN[1].xxxx, IN[0].yyyy, -TEMP[0].xxxx 2: MUL TEMP[1].x, TEMP[0].xxxx, IN[1].wwww 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[0].x, IN[0].zzzz 5: MOV TEMP[0].z, IN[1].zzzz 6: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 7: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx 8: UIF TEMP[2].xxxx :0 9: MOV TEMP[2].x, IMM[0].xxxx 10: ELSE :0 11: FSLT TEMP[3].x, IMM[0].xxxx, TEMP[1].xxxx 12: UIF TEMP[3].xxxx :0 13: RSQ TEMP[3].x, TEMP[1].xxxx 14: ELSE :0 15: SSG TEMP[1].x, TEMP[1].xxxx 16: MUL TEMP[3].x, IMM[0].yyyy, TEMP[1].xxxx 17: ENDIF 18: MOV TEMP[2].x, TEMP[3].xxxx 19: ENDIF 20: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 21: MAD TEMP[1].xyz, TEMP[0].xyzz, IMM[0].zzzz, IMM[0].zzzz 22: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz 23: ADD TEMP[0].x, CONST[1][45].xxxx, IMM[0].wwww 24: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 25: MOV_SAT TEMP[0].x, TEMP[0].xxxx 26: FSLT TEMP[2].x, TEMP[0].xxxx, IMM[0].xxxx 27: UIF TEMP[2].xxxx :0 28: MOV TEMP[2].x, IMM[0].xxxx 29: ELSE :0 30: SQRT TEMP[2].x, TEMP[0].xxxx 31: ENDIF 32: MOV TEMP[1].w, TEMP[2].xxxx 33: MOV TEMP[2].xy, IN[2].xyyy 34: TEX TEMP[2], TEMP[2], SAMP[0], 2D 35: MUL TEMP[3], TEMP[2].yyyy, CONST[1][59] 36: MAD TEMP[3], CONST[1][58], TEMP[2].xxxx, TEMP[3] 37: MAD TEMP[3], CONST[1][60], TEMP[2].zzzz, TEMP[3] 38: MAD TEMP[0], CONST[1][61], TEMP[2].wwww, TEMP[3] 39: ADD TEMP[0], TEMP[0], CONST[1][62] 40: MOV OUT[0], TEMP[1] 41: MOV OUT[1], TEMP[0] 42: END radeonsi: Compiling shader 57 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !1 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 720) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 928) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 932) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 936) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 940) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 944) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 948) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 952) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 956) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 960) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 964) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 968) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 972) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 976) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 980) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 984) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 988) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 992) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 996) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1000) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1004) %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !1 %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 3, !amdgpu.uniform !0 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !1 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %55 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %56 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %57 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %58 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %59 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %60 = fmul float %51, %55 %61 = fmul float %54, %52 %62 = fsub float %61, %60 %63 = fmul float %62, %57 %64 = fmul float %53, %53 %65 = fmul float %63, %63 %66 = fadd float %65, %64 %67 = fmul float %56, %56 %68 = fadd float %66, %67 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %69 = fcmp ogt float %68, 0.000000e+00 br i1 %69, label %IF17, label %ELSE18 ENDIF: ; preds = %IF17, %ELSE18, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %140, %IF17 ], [ %144, %ELSE18 ] %70 = fmul float %temp8.0, %53 %71 = fmul float %temp8.0, %63 %72 = fmul float %temp8.0, %56 %73 = fmul float %70, 5.000000e-01 %74 = fadd float %73, 5.000000e-01 %75 = fmul float %71, 5.000000e-01 %76 = fadd float %75, 5.000000e-01 %77 = fmul float %72, 5.000000e-01 %78 = fadd float %77, 5.000000e-01 %79 = call float @llvm.AMDGPU.clamp.(float %74, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDGPU.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %81 = call float @llvm.AMDGPU.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) %82 = fadd float %25, -3.000000e+00 %83 = fmul float %82, 1.953125e-03 %84 = call float @llvm.AMDGPU.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) %85 = fcmp olt float %84, 0.000000e+00 %86 = call float @llvm.sqrt.f32(float %84) %temp8.1 = select i1 %85, float 0.000000e+00, float %86 %87 = bitcast float %58 to i32 %88 = bitcast float %59 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %90, <8 x i32> %47, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = extractelement <4 x float> %91, i32 3 %96 = fmul float %93, %30 %97 = fmul float %93, %31 %98 = fmul float %93, %32 %99 = fmul float %93, %33 %100 = fmul float %26, %92 %101 = fadd float %100, %96 %102 = fmul float %27, %92 %103 = fadd float %102, %97 %104 = fmul float %28, %92 %105 = fadd float %104, %98 %106 = fmul float %29, %92 %107 = fadd float %106, %99 %108 = fmul float %34, %94 %109 = fadd float %108, %101 %110 = fmul float %35, %94 %111 = fadd float %110, %103 %112 = fmul float %36, %94 %113 = fadd float %112, %105 %114 = fmul float %37, %94 %115 = fadd float %114, %107 %116 = fmul float %38, %95 %117 = fadd float %116, %109 %118 = fmul float %39, %95 %119 = fadd float %118, %111 %120 = fmul float %40, %95 %121 = fadd float %120, %113 %122 = fmul float %41, %95 %123 = fadd float %122, %115 %124 = fadd float %117, %42 %125 = fadd float %119, %43 %126 = fadd float %121, %44 %127 = fadd float %123, %45 %128 = bitcast float %5 to i32 %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %128, 10 %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %79, 11 %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %80, 12 %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %81, 13 %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %temp8.1, 14 %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %124, 15 %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %125, 16 %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %126, 17 %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %127, 18 %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138 IF17: ; preds = %ELSE %139 = call float @llvm.sqrt.f32(float %68) %140 = fdiv float 1.000000e+00, %139 br label %ENDIF ELSE18: ; preds = %ELSE %141 = fcmp ogt float %68, 0.000000e+00 %142 = select i1 %141, float 1.000000e+00, float %68 %143 = fcmp oge float %142, 0.000000e+00 %.op = fmul float %142, 0x47EFFFFFE0000000 %144 = select i1 %143, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=90, shader=89) [traceshaders] glDeleteShader(shader=89) [...] [traceshaders] glCreateShader(GL_FRAGMENT_SHADER) → shader=381 [traceshaders] glShaderSource(shader=381) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[46]; }; uniform cbuffer_2 { // PSOffsetConstants vec4 Const2[3]; }; uniform sampler2D tex0_X_smp0; uniform sampler2D tex0; uniform sampler2D tex1_X_smp1; uniform sampler2D tex1; uniform sampler2D tex2_X_smp2; uniform sampler2D tex2; uniform sampler2D tex3_X_smp3; uniform sampler2D tex3; uniform sampler2D tex4_X_smp4; uniform sampler2D tex4; layout(location = 0) in vec4 VtxGeoOutput0; vec4 Input0; layout(location = 1) in vec4 VtxGeoOutput1; vec4 Input1; layout(location = 3) in vec4 VtxGeoOutput3; vec4 Input3; layout(location = 4) in vec4 VtxGeoOutput4; vec4 Input4; layout(location = 0) out vec4 PixOutput0; #define Output0 PixOutput0 vec4 Temp[5]; ivec4 Temp_int[5]; uvec4 Temp_uint[5]; void main() { Input0 = VtxGeoOutput0; Input1 = VtxGeoOutput1; Input3 = VtxGeoOutput3; Input4 = VtxGeoOutput4; Temp[0].x = dot(Input4.xyz, Input4.xyz); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Temp[0].xyz = Temp[0].xxx * Input4.xyz; Temp[0].xyz = Temp[0].zzz * vec3(uintBitsToFloat(0u), uintBitsToFloat(0u), uintBitsToFloat(1073741824u)) + (-Temp[0].xyz); Temp[1].x = dot(Temp[0].yz, vec2(uintBitsToFloat(1062274540u), uintBitsToFloat(1058262330u))); Temp[1].x = clamp(Temp[1].x, 0.0, 1.0); Temp[1].y = dot(Temp[0].zxy, vec3(uintBitsToFloat(1058262330u), uintBitsToFloat(3207922931u), uintBitsToFloat(3201369580u))); Temp[1].y = clamp(Temp[1].y, 0.0, 1.0); Temp[1].z = dot(Temp[0].xyz, vec3(uintBitsToFloat(1060439283u), uintBitsToFloat(3201369580u), uintBitsToFloat(1058262330u))); Temp[1].z = clamp(Temp[1].z, 0.0, 1.0); Temp[0].xyz = max(Temp[1].xyz, vec3(uintBitsToFloat(897988541u), uintBitsToFloat(897988541u), uintBitsToFloat(897988541u))); InstrHelper.x = ( ( Temp[0].x ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].x ); InstrHelper.y = ( ( Temp[0].y ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].y ); InstrHelper.z = ( ( Temp[0].z ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].z ); Temp[0].xyz = vec3(InstrHelper.xyz); Temp[0].w = Const0[29].x + uintBitsToFloat(1065353216u); Temp[0].xyz = Temp[0].xyz * Temp[0].www; Temp[0].xyz = exp2(Temp[0].xyz); Temp[1].xyz = texture(tex1_X_smp1, Input0.xy).xyz; Temp[1].xyz = Temp[1].xyz * Const0[44].xyz; Temp[0].x = dot(Temp[1].xyz, Temp[0].xyz); Temp[0].y = dot(Temp[1].xyz, vec3(uintBitsToFloat(1051372203u), uintBitsToFloat(1051372203u), uintBitsToFloat(1051372203u))); InstrHelper.z = ( Input3.x ) / ( ( Input3.w == 0.0 ) ? 3.0E-37 : ( Input3.w ) ); InstrHelper.w = ( Input3.y ) / ( ( Input3.w == 0.0 ) ? 3.0E-37 : ( Input3.w ) ); Temp[0].zw = vec2(InstrHelper.zw); Temp[0].zw = Temp[0].zw * Const2[0].xy + Const2[0].wz; Temp[1].xyz = texture(tex4_X_smp4, Temp[0].zw).xyz; Temp[2].xyz = texture(tex3_X_smp3, Temp[0].zw).xyz; Temp[3].xyz = texture(tex0_X_smp0, Input0.xy).xyz; Temp[3] = Temp[3].xyzz * Const0[45].xyzz; Temp[1] = Temp[3] * Temp[0].xxxx + Temp[1].xyzz; Temp[0] = Temp[3].xyww * Temp[0].yyyy + Temp[2].xyzz; Temp[2].xy = Input1.xy * Const0[28].xy; Temp[2].xyz = texture(tex2_X_smp2, Temp[2].xy).xyz; Temp[3] = Temp[2].xyzz * Temp[2].xyzz; Temp[4] = Temp[2].xyzz * Temp[3]; Temp[2].w = dot(Temp[4].xyw, vec3(uintBitsToFloat(1065353216u), uintBitsToFloat(1065353216u), uintBitsToFloat(1065353216u))); Temp[3] = (-Temp[3].xyww) * Temp[2].xyzz + Temp[2].wwww; Temp[2] = Temp[2].xyzz * Const0[22].xyzz; Temp[3] = Const0[28].zzzz * Temp[3] + Temp[4]; Temp[3] = Temp[3] * Const0[24].xyzz; Temp[3] = Temp[3] * Const0[28].wwww; Temp[1] = Temp[1] * Temp[3]; Temp[0] = Temp[2] * Temp[0] + Temp[1]; Output0 = Temp[0] + Const0[21].xyzz; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=381) [traceshaders] glCreateProgram() = 382 [traceshaders] glAttachShader(program=382, shader=381) [traceshaders] glLinkProgram(program=382) SHADER KEY prolog.color_two_side = 0 prolog.poly_stipple = 0 prolog.force_persample_interp = 0 epilog.spi_shader_col_format = 0x0 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 0 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL IN[3], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0..45] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 340282346638528859811704183484516925440.0000, 2.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, -0.7071, -0.4082} IMM[2] FLT32 { 0.7071, -0.4082, 0.5774, -340282346638528859811704183484516925440.0000} IMM[3] UINT32 {1, 464, 704, 0} IMM[4] FLT32 { 1.0000, 0.3333, 0.0000, 0.0000} IMM[5] UINT32 {720, 448, 352, 384} IMM[6] UINT32 {336, 0, 0, 0} 0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz 1: FSLT TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MOV TEMP[1].x, IMM[0].xxxx 4: ELSE :0 5: FSLT TEMP[2].x, IMM[0].xxxx, TEMP[0].xxxx 6: UIF TEMP[2].xxxx :0 7: RSQ TEMP[2].x, TEMP[0].xxxx 8: ELSE :0 9: SSG TEMP[3].x, TEMP[0].xxxx 10: MUL TEMP[2].x, IMM[0].yyyy, TEMP[3].xxxx 11: ENDIF 12: MOV TEMP[1].x, TEMP[2].xxxx 13: ENDIF 14: MUL TEMP[0].xyz, TEMP[1].xxxx, IN[3].xyzz 15: MAD TEMP[0].xyz, TEMP[0].zzzz, IMM[0].xxzz, -TEMP[0].xyzz 16: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[1].xyyy 17: MOV_SAT TEMP[1].x, TEMP[1].xxxx 18: DP3 TEMP[2].x, TEMP[0].zxyy, IMM[1].yzww 19: MOV_SAT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[1].y, TEMP[2].xxxx 21: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[2].xyzz 22: MOV_SAT TEMP[2].x, TEMP[2].xxxx 23: MOV TEMP[1].z, TEMP[2].xxxx 24: MAX TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww 25: FSGE TEMP[2].x, IMM[0].xxxx, TEMP[0].xxxx 26: UIF TEMP[2].xxxx :0 27: MOV TEMP[2].x, IMM[2].wwww 28: ELSE :0 29: LG2 TEMP[2].x, TEMP[0].xxxx 30: ENDIF 31: MOV TEMP[2].x, TEMP[2].xxxx 32: FSGE TEMP[3].x, IMM[0].xxxx, TEMP[0].yyyy 33: UIF TEMP[3].xxxx :0 34: MOV TEMP[3].x, IMM[2].wwww 35: ELSE :0 36: LG2 TEMP[3].x, TEMP[0].yyyy 37: ENDIF 38: MOV TEMP[2].y, TEMP[3].xxxx 39: FSGE TEMP[3].x, IMM[0].xxxx, TEMP[0].zzzz 40: UIF TEMP[3].xxxx :0 41: MOV TEMP[3].x, IMM[2].wwww 42: ELSE :0 43: LG2 TEMP[3].x, TEMP[0].zzzz 44: ENDIF 45: MOV TEMP[2].z, TEMP[3].xxxx 46: ADD TEMP[3].x, CONST[2][29].xxxx, IMM[4].xxxx 47: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[3].xxxx 48: EX2 TEMP[3].x, TEMP[0].xxxx 49: EX2 TEMP[3].y, TEMP[0].yyyy 50: EX2 TEMP[3].z, TEMP[0].zzzz 51: MOV TEMP[4].xy, IN[0].xyyy 52: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D 53: MUL TEMP[1].xyz, TEMP[4].xyzz, CONST[2][44].xyzz 54: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[3].xyzz 55: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[4].yyyy 56: FSEQ TEMP[4].x, IN[2].wwww, IMM[0].xxxx 57: UIF TEMP[4].xxxx :0 58: MOV TEMP[4].x, IMM[4].zzzz 59: ELSE :0 60: MOV TEMP[4].x, IN[2].wwww 61: ENDIF 62: RCP TEMP[4].x, TEMP[4].xxxx 63: MUL TEMP[4].x, IN[2].xxxx, TEMP[4].xxxx 64: MOV TEMP[2].z, TEMP[4].xxxx 65: FSEQ TEMP[4].x, IN[2].wwww, IMM[0].xxxx 66: UIF TEMP[4].xxxx :0 67: MOV TEMP[4].x, IMM[4].zzzz 68: ELSE :0 69: MOV TEMP[4].x, IN[2].wwww 70: ENDIF 71: RCP TEMP[4].x, TEMP[4].xxxx 72: MUL TEMP[4].x, IN[2].yyyy, TEMP[4].xxxx 73: MOV TEMP[2].w, TEMP[4].xxxx 74: MAD TEMP[2].xy, TEMP[2].zwww, CONST[1][0].xyyy, CONST[1][0].wzzz 75: MOV TEMP[4].xy, TEMP[2].xyyy 76: TEX TEMP[4].xyz, TEMP[4], SAMP[4], 2D 77: MOV TEMP[2].xy, TEMP[2].xyyy 78: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 79: MOV TEMP[5].xy, IN[0].xyyy 80: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 81: MUL TEMP[5], TEMP[5].xyzz, CONST[2][45].xyzz 82: MAD TEMP[1], TEMP[5], TEMP[0].xxxx, TEMP[4].xyzz 83: MAD TEMP[0], TEMP[5].xyww, TEMP[3].xxxx, TEMP[2].xyzz 84: MUL TEMP[2].xy, IN[1].xyyy, CONST[2][28].xyyy 85: MOV TEMP[3].xy, TEMP[2].xyyy 86: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 87: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].xyzz 88: MUL TEMP[4], TEMP[3].xyzz, TEMP[5] 89: DP3 TEMP[6].x, TEMP[4].xyww, IMM[4].xxxx 90: MAD TEMP[5], -TEMP[5].xyww, TEMP[3].xyzz, TEMP[6].xxxx 91: MUL TEMP[2], TEMP[3].xyzz, CONST[2][22].xyzz 92: MAD TEMP[5], CONST[2][28].zzzz, TEMP[5], TEMP[4] 93: MUL TEMP[5], TEMP[5], CONST[2][24].xyzz 94: MUL TEMP[5], TEMP[5], CONST[2][28].wwww 95: MUL TEMP[1], TEMP[1], TEMP[5] 96: MAD TEMP[0], TEMP[2], TEMP[0], TEMP[1] 97: ADD TEMP[0], TEMP[0], CONST[2][21].xyzz 98: MOV OUT[0], TEMP[0] 99: END radeonsi: Compiling shader 208 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !1 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !1 %31 = call float @llvm.SI.load.const(<16 x i8> %30, i32 336) %32 = call float @llvm.SI.load.const(<16 x i8> %30, i32 340) %33 = call float @llvm.SI.load.const(<16 x i8> %30, i32 344) %34 = call float @llvm.SI.load.const(<16 x i8> %30, i32 352) %35 = call float @llvm.SI.load.const(<16 x i8> %30, i32 356) %36 = call float @llvm.SI.load.const(<16 x i8> %30, i32 360) %37 = call float @llvm.SI.load.const(<16 x i8> %30, i32 384) %38 = call float @llvm.SI.load.const(<16 x i8> %30, i32 388) %39 = call float @llvm.SI.load.const(<16 x i8> %30, i32 392) %40 = call float @llvm.SI.load.const(<16 x i8> %30, i32 448) %41 = call float @llvm.SI.load.const(<16 x i8> %30, i32 452) %42 = call float @llvm.SI.load.const(<16 x i8> %30, i32 456) %43 = call float @llvm.SI.load.const(<16 x i8> %30, i32 460) %44 = call float @llvm.SI.load.const(<16 x i8> %30, i32 464) %45 = call float @llvm.SI.load.const(<16 x i8> %30, i32 704) %46 = call float @llvm.SI.load.const(<16 x i8> %30, i32 708) %47 = call float @llvm.SI.load.const(<16 x i8> %30, i32 712) %48 = call float @llvm.SI.load.const(<16 x i8> %30, i32 720) %49 = call float @llvm.SI.load.const(<16 x i8> %30, i32 724) %50 = call float @llvm.SI.load.const(<16 x i8> %30, i32 728) %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !1 %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 3, !amdgpu.uniform !0 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !1 %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !1 %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 7, !amdgpu.uniform !0 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !1 %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !1 %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11, !amdgpu.uniform !0 %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !1 %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !1 %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 15, !amdgpu.uniform !0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !1 %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8, !amdgpu.uniform !0 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !1 %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 19, !amdgpu.uniform !0 %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !1 %76 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %77 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %80 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %81 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %82 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %86 = fmul float %83, %83 %87 = fmul float %84, %84 %88 = fadd float %87, %86 %89 = fmul float %85, %85 %90 = fadd float %88, %89 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %91 = fcmp ogt float %90, 0.000000e+00 br i1 %91, label %IF29, label %ELSE30 ENDIF: ; preds = %IF29, %ELSE30, %main_body %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %122, %IF29 ], [ %126, %ELSE30 ] %92 = fmul float %temp4.0, %83 %93 = fmul float %temp4.0, %84 %94 = fmul float %temp4.0, %85 %95 = fmul float %94, 0.000000e+00 %96 = fsub float %95, %92 %97 = fmul float %94, 0.000000e+00 %98 = fsub float %97, %93 %99 = fmul float %94, 2.000000e+00 %100 = fsub float %99, %94 %101 = fmul float %98, 0x3FEA20BD80000000 %102 = fmul float %100, 0x3FE279A740000000 %103 = fadd float %101, %102 %104 = call float @llvm.AMDGPU.clamp.(float %103, float 0.000000e+00, float 1.000000e+00) %105 = fmul float %100, 0x3FE279A740000000 %106 = fmul float %96, 0xBFE6A09E60000000 %107 = fadd float %106, %105 %108 = fmul float %98, 0xBFDA20BD80000000 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) %111 = fmul float %96, 0x3FE6A09E60000000 %112 = fmul float %98, 0xBFDA20BD80000000 %113 = fadd float %112, %111 %114 = fmul float %100, 0x3FE279A740000000 %115 = fadd float %113, %114 %116 = call float @llvm.AMDGPU.clamp.(float %115, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.maxnum.f32(float %104, float 0x3EB0C6F7A0000000) %118 = call float @llvm.maxnum.f32(float %110, float 0x3EB0C6F7A0000000) %119 = call float @llvm.maxnum.f32(float %116, float 0x3EB0C6F7A0000000) %120 = fcmp ugt float %117, 0.000000e+00 br i1 %120, label %ELSE33, label %ENDIF31 IF29: ; preds = %ELSE %121 = call float @llvm.sqrt.f32(float %90) %122 = fdiv float 1.000000e+00, %121 br label %ENDIF ELSE30: ; preds = %ELSE %123 = fcmp ogt float %90, 0.000000e+00 %124 = select i1 %123, float 1.000000e+00, float %90 %125 = fcmp oge float %124, 0.000000e+00 %.op = fmul float %124, 0x47EFFFFFE0000000 %126 = select i1 %125, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF ELSE33: ; preds = %ENDIF %127 = call float @llvm.log2.f32(float %117) br label %ENDIF31 ENDIF31: ; preds = %ENDIF, %ELSE33 %temp8.1 = phi float [ %127, %ELSE33 ], [ 0xC7EFFFFFE0000000, %ENDIF ] %128 = fcmp ugt float %118, 0.000000e+00 br i1 %128, label %ELSE36, label %ENDIF34 ELSE36: ; preds = %ENDIF31 %129 = call float @llvm.log2.f32(float %118) br label %ENDIF34 ENDIF34: ; preds = %ENDIF31, %ELSE36 %temp12.0 = phi float [ %129, %ELSE36 ], [ 0xC7EFFFFFE0000000, %ENDIF31 ] %130 = fcmp ugt float %119, 0.000000e+00 br i1 %130, label %ELSE39, label %ENDIF37 ELSE39: ; preds = %ENDIF34 %131 = call float @llvm.log2.f32(float %119) br label %ENDIF37 ENDIF37: ; preds = %ENDIF34, %ELSE39 %temp12.1 = phi float [ %131, %ELSE39 ], [ 0xC7EFFFFFE0000000, %ENDIF34 ] %132 = fadd float %44, 1.000000e+00 %133 = fmul float %temp8.1, %132 %134 = fmul float %temp12.0, %132 %135 = fmul float %temp12.1, %132 %136 = call float @llvm.exp2.f32(float %133) %137 = call float @llvm.exp2.f32(float %134) %138 = call float @llvm.exp2.f32(float %135) %139 = bitcast float %76 to i32 %140 = bitcast float %77 to i32 %141 = insertelement <2 x i32> undef, i32 %139, i32 0 %142 = insertelement <2 x i32> %141, i32 %140, i32 1 %143 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %142, <8 x i32> %57, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = fmul float %144, %45 %148 = fmul float %145, %46 %149 = fmul float %146, %47 %150 = fmul float %147, %136 %151 = fmul float %148, %137 %152 = fadd float %151, %150 %153 = fmul float %149, %138 %154 = fadd float %152, %153 %155 = fmul float %147, 0x3FD5555560000000 %156 = fmul float %148, 0x3FD5555560000000 %157 = fadd float %156, %155 %158 = fmul float %149, 0x3FD5555560000000 %159 = fadd float %157, %158 %160 = fcmp oeq float %82, 0.000000e+00 %.op46 = fdiv float 1.000000e+00, %82 %161 = select i1 %160, float 0x47840FD020000000, float %.op46 %162 = fmul float %80, %161 %163 = fcmp oeq float %82, 0.000000e+00 %.op47 = fdiv float 1.000000e+00, %82 %164 = select i1 %163, float 0x47840FD020000000, float %.op47 %165 = fmul float %81, %164 %166 = fmul float %162, %25 %167 = fadd float %166, %28 %168 = fmul float %165, %26 %169 = fadd float %168, %27 %170 = bitcast float %167 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %173, <8 x i32> %72, <4 x i32> %75, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = bitcast float %167 to i32 %179 = bitcast float %169 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %181, <8 x i32> %67, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = extractelement <4 x float> %182, i32 2 %186 = bitcast float %76 to i32 %187 = bitcast float %77 to i32 %188 = insertelement <2 x i32> undef, i32 %186, i32 0 %189 = insertelement <2 x i32> %188, i32 %187, i32 1 %190 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %189, <8 x i32> %52, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 1 %193 = extractelement <4 x float> %190, i32 2 %194 = fmul float %191, %48 %195 = fmul float %192, %49 %196 = fmul float %193, %50 %197 = fmul float %193, %50 %198 = fmul float %194, %154 %199 = fadd float %198, %175 %200 = fmul float %195, %154 %201 = fadd float %200, %176 %202 = fmul float %196, %154 %203 = fadd float %202, %177 %204 = fmul float %197, %154 %205 = fadd float %204, %177 %206 = fmul float %194, %159 %207 = fadd float %206, %183 %208 = fmul float %195, %159 %209 = fadd float %208, %184 %210 = fmul float %197, %159 %211 = fadd float %210, %185 %212 = fmul float %197, %159 %213 = fadd float %212, %185 %214 = fmul float %78, %40 %215 = fmul float %79, %41 %216 = bitcast float %214 to i32 %217 = bitcast float %215 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %219, <8 x i32> %62, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %221 = extractelement <4 x float> %220, i32 0 %222 = extractelement <4 x float> %220, i32 1 %223 = extractelement <4 x float> %220, i32 2 %224 = fmul float %221, %221 %225 = fmul float %222, %222 %226 = fmul float %223, %223 %227 = fmul float %223, %223 %228 = fmul float %221, %224 %229 = fmul float %222, %225 %230 = fmul float %223, %226 %231 = fmul float %223, %227 %232 = fadd float %229, %228 %233 = fadd float %232, %231 %234 = fmul float %224, %221 %235 = fsub float %233, %234 %236 = fmul float %225, %222 %237 = fsub float %233, %236 %238 = fmul float %227, %223 %239 = fsub float %233, %238 %240 = fmul float %227, %223 %241 = fsub float %233, %240 %242 = fmul float %221, %34 %243 = fmul float %222, %35 %244 = fmul float %223, %36 %245 = fmul float %223, %36 %246 = fmul float %42, %235 %247 = fadd float %246, %228 %248 = fmul float %42, %237 %249 = fadd float %248, %229 %250 = fmul float %42, %239 %251 = fadd float %250, %230 %252 = fmul float %42, %241 %253 = fadd float %252, %231 %254 = fmul float %247, %37 %255 = fmul float %249, %38 %256 = fmul float %251, %39 %257 = fmul float %253, %39 %258 = fmul float %254, %43 %259 = fmul float %255, %43 %260 = fmul float %256, %43 %261 = fmul float %257, %43 %262 = fmul float %199, %258 %263 = fmul float %201, %259 %264 = fmul float %203, %260 %265 = fmul float %205, %261 %266 = fmul float %242, %207 %267 = fadd float %266, %262 %268 = fmul float %243, %209 %269 = fadd float %268, %263 %270 = fmul float %244, %211 %271 = fadd float %270, %264 %272 = fmul float %245, %213 %273 = fadd float %272, %265 %274 = fadd float %267, %31 %275 = fadd float %269, %32 %276 = fadd float %271, %33 %277 = fadd float %273, %33 %278 = bitcast float %5 to i32 %279 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %278, 10 %280 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %279, float %274, 11 %281 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %280, float %275, 12 %282 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %281, float %276, 13 %283 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %282, float %277, 14 %284 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %283, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %284 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.exp2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=382, shader=381) [traceshaders] glDeleteShader(shader=381) [...] [traceshaders] glCreateShader(GL_VERTEX_SHADER) → shader=383 [traceshaders] glShaderSource(shader=383) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; out gl_PerVertex { vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[];}; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[46]; }; uniform cbuffer_1 { // VSOffsetConstants vec4 Const1[5]; }; layout(location = 0) in vec4 dcl_Input0; vec4 Input0; layout(location = 1) in vec4 dcl_Input1; vec4 Input1; layout(location = 2) in vec4 dcl_Input2; vec4 Input2; layout(location = 4) in vec4 dcl_Input4; vec4 Input4; layout(location = 5) in vec4 dcl_Input5; vec4 Input5; layout(location = 6) in vec4 dcl_Input6; vec4 Input6; layout(location = 7) in vec4 dcl_Input7; vec4 Input7; layout(location = 0) out vec4 VtxGeoOutput0; #define Output0 VtxGeoOutput0 layout(location = 1) out vec4 VtxGeoOutput1; #define Output1 VtxGeoOutput1 layout(location = 2) out vec4 VtxGeoOutput2; #define Output2 VtxGeoOutput2 layout(location = 3) out vec4 VtxGeoOutput3; #define Output3 VtxGeoOutput3 layout(location = 4) out vec4 VtxGeoOutput4; #define Output4 VtxGeoOutput4 #undef Output5 #define Output5 phase0_Output5 vec4 phase0_Output5; vec4 Temp[4]; ivec4 Temp_int[4]; uvec4 Temp_uint[4]; void main() { Input0 = dcl_Input0; Input1 = dcl_Input1; Input2 = dcl_Input2; Input4 = dcl_Input4; Input5 = dcl_Input5; Input6 = dcl_Input6; Input7 = dcl_Input7; Output0.xy = Input7.xy * Const0[36].xy + Const0[36].wz; Output1.xy = Input4.xy; Output1.zw = Input5.yx; Output2.xy = Input6.xy; Output2.zw = vec2(uintBitsToFloat(0u), uintBitsToFloat(0u)); Temp[0].x = Const0[18].w * uintBitsToFloat(1067114824u) + Const0[30].w; Temp[0].y = uintBitsToFloat((Temp[0].x>=(-Temp[0].x)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].x)); Temp[0].y = (floatBitsToInt(Temp[0]).y != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].yz = Temp[0].yx * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1061481552u)); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].yz = sin(Temp[0].yz); Temp[0].y = Temp[0].z + Temp[0].y; Temp[0].y = Temp[0].y + uintBitsToFloat(1065353216u); Temp[1].xyz = Input0.xyz * Const0[41].xyz + Const0[40].xyz; Temp[0].z = Temp[1].x * Const0[33].z; Temp[0].y = Temp[0].y * Temp[0].z; Temp[0].z = Const0[34].x * Temp[1].x + Temp[0].x; Temp[0].z = Temp[0].z * uintBitsToFloat(1056964608u); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].w = Const0[33].w * Temp[1].x + Temp[0].x; Temp[0].x = Temp[0].x + Const0[32].x; Temp[2].x = uintBitsToFloat((Temp[0].w>=(-Temp[0].w)) ? 0xFFFFFFFFu : 0u); Temp[0].w = fract(abs(Temp[0].w)); Temp[0].w = (floatBitsToInt(Temp[2]).x != 0) ? Temp[0].w : (-Temp[0].w); Temp[0].w = Temp[0].w * uintBitsToFloat(1086918619u); Temp[0].zw = sin(Temp[0].zw); Temp[0].z = Temp[0].z + Temp[0].w; Temp[0].z = Temp[0].z * Const0[34].y; Temp[2].z = Temp[0].y * uintBitsToFloat(1045220557u) + Temp[0].z; Temp[0].y = Const0[30].z * Temp[1].x + (-Temp[0].x); Temp[0].xz = Const0[32].yw * Temp[1].xx + (-Temp[0].xx); Temp[0].xy = Temp[0].xy * vec2(uintBitsToFloat(1045494470u), uintBitsToFloat(1055439406u)); Temp[0].w = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].w = uintBitsToFloat((Temp[0].x>=(-Temp[0].x)) ? 0xFFFFFFFFu : 0u); Temp[0].x = fract(abs(Temp[0].x)); Temp[0].x = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].x : (-Temp[0].x); Temp[0].xy = Temp[0].xy * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].xy = sin(Temp[0].xy); Temp[0].x = Temp[0].x + Temp[0].y; Temp[0].y = Temp[0].x * Const0[33].y; Temp[0].x = abs(Temp[0].x) * Const0[32].z; Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].z = sin(Temp[0].z); Temp[2].y = Const0[33].x * Temp[0].z + Temp[0].y; Temp[0].y = Const0[30].y * uintBitsToFloat(3256877056u) + uintBitsToFloat(3240099840u); Temp[2].x = Temp[0].x * Temp[0].y; Temp[0].xyz = Temp[2].xyz * Input6.xxx; Temp[1].w = Input0.w; Temp[0].w = uintBitsToFloat(0u); Temp[0] = Temp[0] + Temp[1]; Temp[1] = Temp[0].yyyy * Const0[1]; Temp[1] = Const0[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[2] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[3] * Temp[0].wwww + Temp[1]; Temp[1] = Temp[0].yyyy * Const1[1]; Temp[1] = Const1[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const1[2] * Temp[0].zzzz + Temp[1]; Temp[1] = Const1[3] * Temp[0].wwww + Temp[1]; Temp[0].xyz = Temp[0].xyz * Const1[4].www; Output3 = Temp[1]; Output5 = Temp[1]; Temp[1].xyz = (-Temp[0].yyy) * Const0[38].xyz; Temp[0].xyw = Const0[37].xyz * (-Temp[0].xxx) + Temp[1].xyz; Temp[0].xyz = Const0[39].xyz * (-Temp[0].zzz) + Temp[0].xyw; Temp[1].xyz = Input1.yzx * vec3(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec3(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[2] = Input2 * vec4(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec4(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[3].xyz = Temp[1].xyz * Temp[2].zxy; Temp[1].xyz = Temp[2].yzx * Temp[1].yzx + (-Temp[3].xyz); Temp[1].xyz = Temp[2].www * Temp[1].xyz; Temp[3].xyz = Temp[2].yzx * Temp[1].zxy; Temp[3].xyz = Temp[1].yzx * Temp[2].zxy + (-Temp[3].xyz); Output4.y = dot(Temp[1].xyz, Temp[0].xyz); Temp[1].xyz = Temp[2].www * Temp[3].xyz; Output4.z = dot(Temp[2].xyz, Temp[0].xyz); Output4.x = dot(Temp[1].xyz, Temp[0].xyz); Output4.w = uintBitsToFloat(1065353216u); gl_Position = vec4(phase0_Output5); gl_Position.y = -gl_Position.y; gl_Position.z = gl_Position.z * 2.0 - gl_Position.w; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=383) [traceshaders] glCreateProgram() = 384 [traceshaders] glAttachShader(program=384, shader=383) [traceshaders] glLinkProgram(program=384) SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[1][0..45] DCL CONST[2][0..4] DCL TEMP[0..9], LOCAL IMM[0] UINT32 {0, 576, 288, 480} IMM[1] FLT32 { 0.0000, 1.2100, 6.2832, 0.7692} IMM[2] UINT32 {4294967295, 656, 640, 528} IMM[3] INT32 {0, 0, 0, 0} IMM[4] FLT32 { 1.0000, 0.5000, 0.2000, -40.0000} IMM[5] UINT32 {544, 512, 16, 32} IMM[6] FLT32 { 0.2041, 0.4545, -10.0000, 2.0000} IMM[7] UINT32 {48, 1, 64, 608} IMM[8] UINT32 {592, 624, 0, 0} IMM[9] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[6].xyyy, CONST[1][36].xyyy, CONST[1][36].wzzz 1: MOV TEMP[1].xy, IN[3].xyxx 2: MOV TEMP[1].zw, IN[4].xxyx 3: MOV TEMP[2].xy, IN[5].xyxx 4: MOV TEMP[2].zw, IMM[1].xxxx 5: MAD TEMP[3].x, CONST[1][18].wwww, IMM[1].yyyy, CONST[1][30].wwww 6: FSGE TEMP[4].x, TEMP[3].xxxx, -TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: MOV TEMP[4].x, IMM[2].xxxx 9: ELSE :0 10: MOV TEMP[4].x, IMM[0].xxxx 11: ENDIF 12: MOV TEMP[3].y, TEMP[4].xxxx 13: ABS TEMP[4].x, TEMP[3].xxxx 14: FRC TEMP[4].x, TEMP[4].xxxx 15: USNE TEMP[5].x, TEMP[3].yyyy, IMM[3].xxxx 16: UIF TEMP[5].xxxx :0 17: MOV TEMP[5].x, TEMP[4].xxxx 18: ELSE :0 19: MOV TEMP[5].x, -TEMP[4].xxxx 20: ENDIF 21: MOV TEMP[3].y, TEMP[5].xxxx 22: MUL TEMP[4].xy, TEMP[3].yxxx, IMM[1].zwww 23: FSGE TEMP[5].x, TEMP[4].yyyy, -TEMP[4].yyyy 24: UIF TEMP[5].xxxx :0 25: MOV TEMP[5].x, IMM[2].xxxx 26: ELSE :0 27: MOV TEMP[5].x, IMM[0].xxxx 28: ENDIF 29: MOV TEMP[3].w, TEMP[5].xxxx 30: ABS TEMP[5].x, TEMP[4].yyyy 31: FRC TEMP[5].x, TEMP[5].xxxx 32: USNE TEMP[6].x, TEMP[3].wwww, IMM[3].xxxx 33: UIF TEMP[6].xxxx :0 34: MOV TEMP[6].x, TEMP[5].xxxx 35: ELSE :0 36: MOV TEMP[6].x, -TEMP[5].xxxx 37: ENDIF 38: MUL TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz 39: SIN TEMP[4].x, TEMP[4].xxxx 40: SIN TEMP[4].y, TEMP[5].xxxx 41: ADD TEMP[4].x, TEMP[4].yyyy, TEMP[4].xxxx 42: ADD TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx 43: MAD TEMP[5].xyz, IN[0].xyzz, CONST[1][41].xyzz, CONST[1][40].xyzz 44: MUL TEMP[6].x, TEMP[5].xxxx, CONST[1][33].zzzz 45: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 46: MAD TEMP[6].x, CONST[1][34].xxxx, TEMP[5].xxxx, TEMP[3].xxxx 47: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].yyyy 48: FSGE TEMP[7].x, TEMP[6].xxxx, -TEMP[6].xxxx 49: UIF TEMP[7].xxxx :0 50: MOV TEMP[7].x, IMM[2].xxxx 51: ELSE :0 52: MOV TEMP[7].x, IMM[0].xxxx 53: ENDIF 54: MOV TEMP[3].w, TEMP[7].xxxx 55: ABS TEMP[6].x, TEMP[6].xxxx 56: FRC TEMP[6].x, TEMP[6].xxxx 57: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 58: UIF TEMP[7].xxxx :0 59: MOV TEMP[7].x, TEMP[6].xxxx 60: ELSE :0 61: MOV TEMP[7].x, -TEMP[6].xxxx 62: ENDIF 63: MUL TEMP[6].x, TEMP[7].xxxx, IMM[1].zzzz 64: MAD TEMP[7].x, CONST[1][33].wwww, TEMP[5].xxxx, TEMP[3].xxxx 65: ADD TEMP[3].x, TEMP[3].xxxx, CONST[1][32].xxxx 66: FSGE TEMP[8].x, TEMP[7].xxxx, -TEMP[7].xxxx 67: UIF TEMP[8].xxxx :0 68: MOV TEMP[8].x, IMM[2].xxxx 69: ELSE :0 70: MOV TEMP[8].x, IMM[0].xxxx 71: ENDIF 72: MOV TEMP[8].x, TEMP[8].xxxx 73: ABS TEMP[7].x, TEMP[7].xxxx 74: FRC TEMP[7].x, TEMP[7].xxxx 75: USNE TEMP[9].x, TEMP[8].xxxx, IMM[3].xxxx 76: UIF TEMP[9].xxxx :0 77: MOV TEMP[9].x, TEMP[7].xxxx 78: ELSE :0 79: MOV TEMP[9].x, -TEMP[7].xxxx 80: ENDIF 81: MUL TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz 82: SIN TEMP[6].x, TEMP[6].xxxx 83: SIN TEMP[6].y, TEMP[7].xxxx 84: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy 85: MUL TEMP[6].x, TEMP[6].xxxx, CONST[1][34].yyyy 86: MAD TEMP[4].x, TEMP[4].xxxx, IMM[4].zzzz, TEMP[6].xxxx 87: MOV TEMP[8].z, TEMP[4].xxxx 88: MAD TEMP[4].x, CONST[1][30].zzzz, TEMP[5].xxxx, -TEMP[3].xxxx 89: MOV TEMP[3].y, TEMP[4].xxxx 90: MAD TEMP[4].xy, CONST[1][32].ywww, TEMP[5].xxxx, -TEMP[3].xxxx 91: MOV TEMP[3].x, TEMP[4].xxyx 92: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[6].xyyy 93: FSGE TEMP[6].x, TEMP[3].yyyy, -TEMP[3].yyyy 94: UIF TEMP[6].xxxx :0 95: MOV TEMP[6].x, IMM[2].xxxx 96: ELSE :0 97: MOV TEMP[6].x, IMM[0].xxxx 98: ENDIF 99: MOV TEMP[3].w, TEMP[6].xxxx 100: ABS TEMP[6].x, TEMP[3].yyyy 101: FRC TEMP[6].x, TEMP[6].xxxx 102: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 103: UIF TEMP[7].xxxx :0 104: MOV TEMP[7].x, TEMP[6].xxxx 105: ELSE :0 106: MOV TEMP[7].x, -TEMP[6].xxxx 107: ENDIF 108: MOV TEMP[3].y, TEMP[7].xxxx 109: FSGE TEMP[6].x, TEMP[3].xxxx, -TEMP[3].xxxx 110: UIF TEMP[6].xxxx :0 111: MOV TEMP[6].x, IMM[2].xxxx 112: ELSE :0 113: MOV TEMP[6].x, IMM[0].xxxx 114: ENDIF 115: MOV TEMP[3].w, TEMP[6].xxxx 116: ABS TEMP[6].x, TEMP[3].xxxx 117: FRC TEMP[3].x, TEMP[6].xxxx 118: USNE TEMP[6].x, TEMP[3].wwww, IMM[3].xxxx 119: UIF TEMP[6].xxxx :0 120: MOV TEMP[6].x, TEMP[3].xxxx 121: ELSE :0 122: MOV TEMP[6].x, -TEMP[3].xxxx 123: ENDIF 124: MOV TEMP[3].x, TEMP[6].xxxx 125: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[1].zzzz 126: SIN TEMP[6].x, TEMP[3].xxxx 127: SIN TEMP[6].y, TEMP[3].yyyy 128: ADD TEMP[3].x, TEMP[6].xxxx, TEMP[6].yyyy 129: MUL TEMP[6].x, TEMP[3].xxxx, CONST[1][33].yyyy 130: ABS TEMP[7].x, TEMP[3].xxxx 131: MUL TEMP[3].x, TEMP[7].xxxx, CONST[1][32].zzzz 132: FSGE TEMP[7].x, TEMP[4].yyyy, -TEMP[4].yyyy 133: UIF TEMP[7].xxxx :0 134: MOV TEMP[7].x, IMM[2].xxxx 135: ELSE :0 136: MOV TEMP[7].x, IMM[0].xxxx 137: ENDIF 138: MOV TEMP[3].w, TEMP[7].xxxx 139: ABS TEMP[4].x, TEMP[4].yyyy 140: FRC TEMP[4].x, TEMP[4].xxxx 141: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 142: UIF TEMP[7].xxxx :0 143: MOV TEMP[7].x, TEMP[4].xxxx 144: ELSE :0 145: MOV TEMP[7].x, -TEMP[4].xxxx 146: ENDIF 147: MUL TEMP[4].x, TEMP[7].xxxx, IMM[1].zzzz 148: SIN TEMP[4].x, TEMP[4].xxxx 149: MAD TEMP[4].x, CONST[1][33].xxxx, TEMP[4].xxxx, TEMP[6].xxxx 150: MOV TEMP[8].y, TEMP[4].xxxx 151: MAD TEMP[4].x, CONST[1][30].yyyy, IMM[4].wwww, IMM[6].zzzz 152: MUL TEMP[8].x, TEMP[3].xxxx, TEMP[4].xxxx 153: MUL TEMP[3].xyz, TEMP[8].xyzz, IN[5].xxxx 154: MOV TEMP[5].w, IN[0].wwww 155: MOV TEMP[3].w, IMM[1].xxxx 156: ADD TEMP[3], TEMP[3], TEMP[5] 157: MUL TEMP[5], TEMP[3].yyyy, CONST[1][1] 158: MAD TEMP[5], CONST[1][0], TEMP[3].xxxx, TEMP[5] 159: MAD TEMP[5], CONST[1][2], TEMP[3].zzzz, TEMP[5] 160: MAD TEMP[3], CONST[1][3], TEMP[3].wwww, TEMP[5] 161: MUL TEMP[5], TEMP[3].yyyy, CONST[2][1] 162: MAD TEMP[5], CONST[2][0], TEMP[3].xxxx, TEMP[5] 163: MAD TEMP[5], CONST[2][2], TEMP[3].zzzz, TEMP[5] 164: MAD TEMP[5], CONST[2][3], TEMP[3].wwww, TEMP[5] 165: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[2][4].wwww 166: MOV TEMP[4], TEMP[5] 167: MOV TEMP[6], TEMP[5] 168: MUL TEMP[5].xyz, -TEMP[3].yyyy, CONST[1][38].xyzz 169: MAD TEMP[7].xyz, CONST[1][37].xyzz, -TEMP[3].xxxx, TEMP[5].xyzz 170: MAD TEMP[3].xyz, CONST[1][39].xyzz, -TEMP[3].zzzz, TEMP[7].xyzz 171: MAD TEMP[5].xyz, IN[1].yzxx, IMM[6].wwww, IMM[9].xxxx 172: MAD TEMP[8], IN[2], IMM[6].wwww, IMM[9].xxxx 173: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[8].zxyy 174: MAD TEMP[5].xyz, TEMP[8].yzxx, TEMP[5].yzxx, -TEMP[7].xyzz 175: MUL TEMP[5].xyz, TEMP[8].wwww, TEMP[5].xyzz 176: MUL TEMP[7].xyz, TEMP[8].yzxx, TEMP[5].zxyy 177: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[8].zxyy, -TEMP[7].xyzz 178: DP3 TEMP[9].x, TEMP[5].xyzz, TEMP[3].xyzz 179: MOV TEMP[9].y, TEMP[9].xxxx 180: MUL TEMP[5].xyz, TEMP[8].wwww, TEMP[7].xyzz 181: DP3 TEMP[7].x, TEMP[8].xyzz, TEMP[3].xyzz 182: MOV TEMP[9].z, TEMP[7].xxxx 183: DP3 TEMP[9].x, TEMP[5].xyzz, TEMP[3].xyzz 184: MOV TEMP[9].w, IMM[4].xxxx 185: MOV TEMP[3].xw, TEMP[6].xxxw 186: MOV TEMP[3].y, -TEMP[6].yyyy 187: MAD TEMP[5].x, TEMP[6].zzzz, IMM[6].wwww, -TEMP[5].wwww 188: MOV TEMP[3].z, TEMP[5].xxxx 189: MOV OUT[1], TEMP[0] 190: MOV OUT[2], TEMP[1] 191: MOV OUT[3], TEMP[2] 192: MOV OUT[4], TEMP[4] 193: MOV OUT[5], TEMP[9] 194: MOV OUT[0], TEMP[3] 195: END radeonsi: Compiling shader 209 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !1 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0) %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4) %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48) %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52) %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56) %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300) %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 484) %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 488) %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 492) %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 512) %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 516) %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 520) %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 524) %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 528) %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 532) %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 536) %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 540) %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 544) %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 548) %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 576) %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 580) %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 584) %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 588) %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 592) %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 596) %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 600) %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 608) %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 612) %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 616) %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 624) %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 628) %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 632) %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 640) %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 644) %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 648) %68 = call float @llvm.SI.load.const(<16 x i8> %21, i32 656) %69 = call float @llvm.SI.load.const(<16 x i8> %21, i32 660) %70 = call float @llvm.SI.load.const(<16 x i8> %21, i32 664) %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !1 %73 = call float @llvm.SI.load.const(<16 x i8> %72, i32 0) %74 = call float @llvm.SI.load.const(<16 x i8> %72, i32 4) %75 = call float @llvm.SI.load.const(<16 x i8> %72, i32 8) %76 = call float @llvm.SI.load.const(<16 x i8> %72, i32 12) %77 = call float @llvm.SI.load.const(<16 x i8> %72, i32 16) %78 = call float @llvm.SI.load.const(<16 x i8> %72, i32 20) %79 = call float @llvm.SI.load.const(<16 x i8> %72, i32 24) %80 = call float @llvm.SI.load.const(<16 x i8> %72, i32 28) %81 = call float @llvm.SI.load.const(<16 x i8> %72, i32 32) %82 = call float @llvm.SI.load.const(<16 x i8> %72, i32 36) %83 = call float @llvm.SI.load.const(<16 x i8> %72, i32 40) %84 = call float @llvm.SI.load.const(<16 x i8> %72, i32 44) %85 = call float @llvm.SI.load.const(<16 x i8> %72, i32 48) %86 = call float @llvm.SI.load.const(<16 x i8> %72, i32 52) %87 = call float @llvm.SI.load.const(<16 x i8> %72, i32 56) %88 = call float @llvm.SI.load.const(<16 x i8> %72, i32 60) %89 = call float @llvm.SI.load.const(<16 x i8> %72, i32 76) %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !1 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %13) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = extractelement <4 x float> %92, i32 3 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !1 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %14) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !1 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %15) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = extractelement <4 x float> %105, i32 3 %110 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !1 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %111, i32 0, i32 %16) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !1 %117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %116, i32 0, i32 %17) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5, !amdgpu.uniform !0 %121 = load <16 x i8>, <16 x i8> addrspace(2)* %120, align 16, !tbaa !1 %122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %121, i32 0, i32 %18) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6, !amdgpu.uniform !0 %126 = load <16 x i8>, <16 x i8> addrspace(2)* %125, align 16, !tbaa !1 %127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %126, i32 0, i32 %19) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = fmul float %128, %52 %131 = fadd float %130, %55 %132 = fmul float %129, %53 %133 = fadd float %132, %54 %134 = fmul float %38, 0x3FF35C2900000000 %135 = fadd float %134, %41 %136 = fsub float -0.000000e+00, %135 %137 = fcmp oge float %135, %136 %138 = call float @llvm.fabs.f32(float %135) %139 = call float @llvm.floor.f32(float %138) %140 = fsub float %138, %139 %141 = fsub float -0.000000e+00, %140 %temp20.0 = select i1 %137, float %140, float %141 %142 = fmul float %temp20.0, 0x401921FB60000000 %143 = fmul float %135, 0x3FE89D8A00000000 %144 = fsub float -0.000000e+00, %143 %145 = fcmp oge float %143, %144 %146 = call float @llvm.fabs.f32(float %143) %147 = call float @llvm.floor.f32(float %146) %148 = fsub float %146, %147 %149 = fsub float -0.000000e+00, %148 %temp24.0 = select i1 %145, float %148, float %149 %150 = fmul float %temp24.0, 0x401921FB60000000 %151 = call float @llvm.sin.f32(float %142) %152 = call float @llvm.sin.f32(float %150) %153 = fadd float %152, %151 %154 = fadd float %153, 1.000000e+00 %155 = fmul float %93, %68 %156 = fadd float %155, %65 %157 = fmul float %94, %69 %158 = fadd float %157, %66 %159 = fmul float %95, %70 %160 = fadd float %159, %67 %161 = fmul float %156, %48 %162 = fmul float %154, %161 %163 = fmul float %50, %156 %164 = fadd float %163, %135 %165 = fmul float %164, 5.000000e-01 %166 = fsub float -0.000000e+00, %165 %167 = fcmp oge float %165, %166 %168 = call float @llvm.fabs.f32(float %165) %169 = call float @llvm.floor.f32(float %168) %170 = fsub float %168, %169 %171 = fsub float -0.000000e+00, %170 %temp28.1 = select i1 %167, float %170, float %171 %172 = fmul float %temp28.1, 0x401921FB60000000 %173 = fmul float %49, %156 %174 = fadd float %173, %135 %175 = fadd float %135, %42 %176 = fsub float -0.000000e+00, %174 %177 = fcmp oge float %174, %176 %178 = call float @llvm.fabs.f32(float %174) %179 = call float @llvm.floor.f32(float %178) %180 = fsub float %178, %179 %181 = fsub float -0.000000e+00, %180 %temp36.0 = select i1 %177, float %180, float %181 %182 = fmul float %temp36.0, 0x401921FB60000000 %183 = call float @llvm.sin.f32(float %172) %184 = call float @llvm.sin.f32(float %182) %185 = fadd float %183, %184 %186 = fmul float %185, %51 %187 = fmul float %162, 0x3FC99999A0000000 %188 = fadd float %187, %186 %189 = fmul float %40, %156 %190 = fsub float %189, %175 %191 = fmul float %43, %156 %192 = fsub float %191, %175 %193 = fmul float %45, %156 %194 = fsub float %193, %175 %195 = fmul float %192, 0x3FCA1F58C0000000 %196 = fmul float %190, 0x3FDD1745C0000000 %197 = fsub float -0.000000e+00, %196 %198 = fcmp oge float %196, %197 %199 = call float @llvm.fabs.f32(float %196) %200 = call float @llvm.floor.f32(float %199) %201 = fsub float %199, %200 %202 = fsub float -0.000000e+00, %201 %temp28.2 = select i1 %198, float %201, float %202 %203 = fsub float -0.000000e+00, %195 %204 = fcmp oge float %195, %203 %205 = call float @llvm.fabs.f32(float %195) %206 = call float @llvm.floor.f32(float %205) %207 = fsub float %205, %206 %208 = fsub float -0.000000e+00, %207 %temp24.3 = select i1 %204, float %207, float %208 %209 = fmul float %temp24.3, 0x401921FB60000000 %210 = fmul float %temp28.2, 0x401921FB60000000 %211 = call float @llvm.sin.f32(float %209) %212 = call float @llvm.sin.f32(float %210) %213 = fadd float %211, %212 %214 = fmul float %213, %47 %215 = call float @llvm.fabs.f32(float %213) %216 = fmul float %215, %44 %217 = fsub float -0.000000e+00, %194 %218 = fcmp oge float %194, %217 %219 = call float @llvm.fabs.f32(float %194) %220 = call float @llvm.floor.f32(float %219) %221 = fsub float %219, %220 %222 = fsub float -0.000000e+00, %221 %temp28.4 = select i1 %218, float %221, float %222 %223 = fmul float %temp28.4, 0x401921FB60000000 %224 = call float @llvm.sin.f32(float %223) %225 = fmul float %46, %224 %226 = fadd float %225, %214 %227 = fmul float %39, -4.000000e+01 %228 = fadd float %227, -1.000000e+01 %229 = fmul float %216, %228 %230 = fmul float %229, %123 %231 = fmul float %226, %123 %232 = fmul float %188, %123 %233 = fadd float %230, %156 %234 = fadd float %231, %158 %235 = fadd float %232, %160 %236 = fadd float %96, 0.000000e+00 %237 = fmul float %234, %26 %238 = fmul float %234, %27 %239 = fmul float %234, %28 %240 = fmul float %234, %29 %241 = fmul float %22, %233 %242 = fadd float %241, %237 %243 = fmul float %23, %233 %244 = fadd float %243, %238 %245 = fmul float %24, %233 %246 = fadd float %245, %239 %247 = fmul float %25, %233 %248 = fadd float %247, %240 %249 = fmul float %30, %235 %250 = fadd float %249, %242 %251 = fmul float %31, %235 %252 = fadd float %251, %244 %253 = fmul float %32, %235 %254 = fadd float %253, %246 %255 = fmul float %33, %235 %256 = fadd float %255, %248 %257 = fmul float %34, %236 %258 = fadd float %257, %250 %259 = fmul float %35, %236 %260 = fadd float %259, %252 %261 = fmul float %36, %236 %262 = fadd float %261, %254 %263 = fmul float %37, %236 %264 = fadd float %263, %256 %265 = fmul float %260, %77 %266 = fmul float %260, %78 %267 = fmul float %260, %79 %268 = fmul float %260, %80 %269 = fmul float %73, %258 %270 = fadd float %269, %265 %271 = fmul float %74, %258 %272 = fadd float %271, %266 %273 = fmul float %75, %258 %274 = fadd float %273, %267 %275 = fmul float %76, %258 %276 = fadd float %275, %268 %277 = fmul float %81, %262 %278 = fadd float %277, %270 %279 = fmul float %82, %262 %280 = fadd float %279, %272 %281 = fmul float %83, %262 %282 = fadd float %281, %274 %283 = fmul float %84, %262 %284 = fadd float %283, %276 %285 = fmul float %85, %264 %286 = fadd float %285, %278 %287 = fmul float %86, %264 %288 = fadd float %287, %280 %289 = fmul float %87, %264 %290 = fadd float %289, %282 %291 = fmul float %88, %264 %292 = fadd float %291, %284 %293 = fmul float %258, %89 %294 = fmul float %260, %89 %295 = fmul float %262, %89 %296 = fmul float %294, %59 %297 = fsub float -0.000000e+00, %296 %298 = fmul float %294, %60 %299 = fsub float -0.000000e+00, %298 %300 = fmul float %294, %61 %301 = fsub float -0.000000e+00, %300 %302 = fmul float %293, %56 %303 = fsub float %297, %302 %304 = fmul float %293, %57 %305 = fsub float %299, %304 %306 = fmul float %293, %58 %307 = fsub float %301, %306 %308 = fmul float %295, %62 %309 = fsub float %303, %308 %310 = fmul float %295, %63 %311 = fsub float %305, %310 %312 = fmul float %295, %64 %313 = fsub float %307, %312 %314 = fmul float %101, 2.000000e+00 %315 = fadd float %314, -1.000000e+00 %316 = fmul float %102, 2.000000e+00 %317 = fadd float %316, -1.000000e+00 %318 = fmul float %100, 2.000000e+00 %319 = fadd float %318, -1.000000e+00 %320 = fmul float %106, 2.000000e+00 %321 = fadd float %320, -1.000000e+00 %322 = fmul float %107, 2.000000e+00 %323 = fadd float %322, -1.000000e+00 %324 = fmul float %108, 2.000000e+00 %325 = fadd float %324, -1.000000e+00 %326 = fmul float %109, 2.000000e+00 %327 = fadd float %326, -1.000000e+00 %328 = fmul float %315, %325 %329 = fmul float %317, %321 %330 = fmul float %319, %323 %331 = fmul float %323, %317 %332 = fsub float %331, %328 %333 = fmul float %325, %319 %334 = fsub float %333, %329 %335 = fmul float %321, %315 %336 = fsub float %335, %330 %337 = fmul float %327, %332 %338 = fmul float %327, %334 %339 = fmul float %327, %336 %340 = fmul float %323, %339 %341 = fmul float %325, %337 %342 = fmul float %321, %338 %343 = fmul float %338, %325 %344 = fsub float %343, %340 %345 = fmul float %339, %321 %346 = fsub float %345, %341 %347 = fmul float %337, %323 %348 = fsub float %347, %342 %349 = fmul float %337, %309 %350 = fmul float %338, %311 %351 = fadd float %350, %349 %352 = fmul float %339, %313 %353 = fadd float %351, %352 %354 = fmul float %327, %344 %355 = fmul float %327, %346 %356 = fmul float %327, %348 %357 = fmul float %321, %309 %358 = fmul float %323, %311 %359 = fadd float %358, %357 %360 = fmul float %325, %313 %361 = fadd float %359, %360 %362 = fmul float %354, %309 %363 = fmul float %355, %311 %364 = fadd float %363, %362 %365 = fmul float %356, %313 %366 = fadd float %364, %365 %367 = fsub float -0.000000e+00, %288 %368 = fmul float %290, 2.000000e+00 %369 = fsub float %368, %292 %370 = bitcast i32 %11 to float %371 = insertvalue <{ float, float, float }> undef, float %370, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %131, float %133, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %113, float %114, float %119, float %118) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %123, float %124, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %286, float %288, float %290, float %292) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %366, float %353, float %361, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %286, float %367, float %369, float %292) ret <{ float, float, float }> %371 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #0 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { nounwind readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=384, shader=383) [traceshaders] glDeleteShader(shader=383) [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=15 [traceshaders] glUseProgramStages(pipeline=15, stages=vert, program=74) [...] [traceshaders] glBindProgramPipeline(pipeline=15) radeonsi: Compiling shader 389 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %39, float %40, 18 %42 = add i32 %15, %12 %43 = bitcast i32 %42 to float %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %41, float %43, 19 %45 = add i32 %15, %12 %46 = bitcast i32 %45 to float %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %44, float %46, 20 %48 = add i32 %15, %12 %49 = bitcast i32 %48 to float %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %47, float %49, 21 %51 = add i32 %15, %12 %52 = bitcast i32 %51 to float %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %50, float %52, 22 %54 = add i32 %15, %12 %55 = bitcast i32 %54 to float %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %53, float %55, 23 %57 = add i32 %15, %12 %58 = bitcast i32 %57 to float %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %56, float %58, 24 %60 = add i32 %15, %12 %61 = bitcast i32 %60 to float %62 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %59, float %61, 25 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %62 } Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v10, v4 ; 7E140304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C00A0105 00000000 s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C00A0305 00000010 s_load_dwordx4 s[16:19], s[10:11], 0x50 ; C00A0405 00000050 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[17:20], v4, s[4:7], 0 idxen ; E00C2000 80011104 s_load_dwordx4 s[4:7], s[10:11], 0x20 ; C00A0105 00000020 buffer_load_format_xyzw v[25:28], v5, s[12:15], 0 idxen ; E00C2000 80031905 s_load_dwordx4 s[12:15], s[10:11], 0x30 ; C00A0305 00000030 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[28:31], v6, s[4:7], 0 idxen ; E00C2000 80011C06 s_load_dwordx4 s[4:7], s[10:11], 0x40 ; C00A0105 00000040 s_load_dwordx4 s[8:11], s[10:11], 0x60 ; C00A0205 00000060 buffer_load_format_xyzw v[3:6], v7, s[12:15], 0 idxen ; E00C2000 80030307 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 s_nop 0 ; BF800000 buffer_load_format_xyzw v[11:14], v9, s[16:19], 0 idxen ; E00C2000 80040B09 s_nop 0 ; BF800000 buffer_load_format_xyzw v[21:24], v10, s[8:11], 0 idxen ; E00C2000 8002150A s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C00A0401 00000010 s_nop 0 ; BF800000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s5, s[16:19], 0x10 ; C0220148 00000010 s_buffer_load_dword s4, s[16:19], 0x0 ; C0220108 00000000 s_buffer_load_dword s0, s[16:19], 0x20 ; C0220008 00000020 s_waitcnt vmcnt(2) ; BF8C0F72 v_add_f32_e32 v7, v26, v26 ; 020E351A v_add_f32_e32 v0, v25, v25 ; 02003319 s_waitcnt vmcnt(1) ; BF8C0F71 v_mad_f32 v13, 2.0, v27, -1.0 ; D1C1000D 03CE36F4 v_mad_f32 v1, 2.0, v25, -1.0 ; D1C10001 03CE32F4 v_mad_f32 v15, 2.0, v30, -1.0 ; D1C1000F 03CE3CF4 v_mad_f32 v8, 2.0, v29, -1.0 ; D1C10008 03CE3AF4 v_mad_f32 v7, v7, v15, -v15 ; D1C10007 843E1F07 v_mad_f32 v9, 2.0, v26, -1.0 ; D1C10009 03CE34F4 v_add_f32_e32 v10, v27, v27 ; 0214371B v_mad_f32 v7, v8, v13, -v7 ; D1C10007 841E1B08 v_add_f32_e32 v25, v28, v28 ; 0232391C v_mad_f32 v14, 2.0, v28, -1.0 ; D1C1000E 03CE38F4 v_mad_f32 v0, v0, v8, -v8 ; D1C10000 84221100 v_add_f32_e32 v28, v31, v31 ; 02383F1F v_mad_f32 v0, v14, v9, -v0 ; D1C10000 8402130E v_mad_f32 v9, v10, v14, -v14 ; D1C10009 843A1D0A v_add_f32_e32 v26, v30, v30 ; 02343D1E s_waitcnt vmcnt(0) ; BF8C0F70 v_mad_f32 v24, v28, v7, -v7 ; D1C10018 841E0F1C v_mad_f32 v1, v15, v1, -v9 ; D1C10001 8426030F v_mad_f32 v16, v28, v0, -v0 ; D1C10010 8402011C v_mad_f32 v0, v26, v24, -v24 ; D1C10000 8462311A v_add_f32_e32 v27, v29, v29 ; 02363B1D v_mad_f32 v0, v16, v14, -v0 ; D1C10000 84021D10 v_mad_f32 v1, v28, v1, -v1 ; D1C10001 8406031C v_mad_f32 v9, v27, v16, -v16 ; D1C10009 8442211B v_mad_f32 v7, v25, v1, -v1 ; D1C10007 84060319 v_mad_f32 v9, v1, v15, -v9 ; D1C10009 84261F01 v_mad_f32 v0, v28, v0, -v0 ; D1C10000 8402011C v_mad_f32 v7, v24, v8, -v7 ; D1C10007 841E1118 v_mad_f32 v25, v28, v9, -v9 ; D1C10019 8426131C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s5, v0 ; 0A140005 v_mul_f32_e32 v9, s5, v1 ; 0A120205 v_mad_f32 v26, v28, v7, -v7 ; D1C1001A 841E0F1C v_mac_f32_e32 v10, s4, v25 ; 2C143204 v_mad_f32 v7, v27, s5, -s5 ; D1C10007 80140B1B v_mac_f32_e32 v9, s4, v24 ; 2C123004 v_mac_f32_e32 v10, s0, v26 ; 2C143400 v_mac_f32_e32 v7, s4, v14 ; 2C0E1C04 v_mac_f32_e32 v9, s0, v16 ; 2C122000 v_mul_f32_e32 v27, v10, v10 ; 0A36150A v_mac_f32_e32 v7, s0, v15 ; 2C0E1E00 v_mac_f32_e32 v27, v9, v9 ; 2C361309 v_mac_f32_e32 v27, v7, v7 ; 2C360F07 v_mad_f32 v23, 2.0, v31, -1.0 ; D1C10017 03CE3EF4 v_cmp_nlt_f32_e32 vcc, 0, v27 ; 7C9C3680 s_and_saveexec_b64 s[6:7], vcc ; BE86206A s_xor_b64 s[6:7], exec, s[6:7] ; 8886067E v_cmp_le_f32_e32 vcc, 0, v27 ; 7C863680 v_mul_f32_e32 v13, 0x7f7fffff, v27 ; 0A1A36FF 7F7FFFFF v_mov_b32_e32 v28, 0xff7fffff ; 7E3802FF FF7FFFFF v_cndmask_b32_e32 v13, v28, v13, vcc ; 001A1B1C s_or_saveexec_b64 s[6:7], s[6:7] ; BE862106 s_buffer_load_dword s11, s[16:19], 0x8 ; C02202C8 00000008 s_buffer_load_dword s12, s[16:19], 0x18 ; C0220308 00000018 s_buffer_load_dword s1, s[16:19], 0x28 ; C0220048 00000028 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[6:7] ; 88FE067E v_rsq_f32_e32 v13, v27 ; 7E1A491B s_or_b64 exec, exec, s[6:7] ; 87FE067E s_load_dwordx4 s[44:47], s[2:3], 0x20 ; C00A0B01 00000020 s_buffer_load_dword s41, s[16:19], 0x2ec ; C0220A48 000002EC s_buffer_load_dword s42, s[16:19], 0x348 ; C0220A88 00000348 s_buffer_load_dword s43, s[16:19], 0x34c ; C0220AC8 0000034C s_buffer_load_dword s48, s[16:19], 0x390 ; C0220C08 00000390 s_buffer_load_dword s49, s[16:19], 0x394 ; C0220C48 00000394 s_buffer_load_dword s50, s[16:19], 0x398 ; C0220C88 00000398 v_mul_f32_e32 v1, s12, v1 ; 0A02020C v_mul_f32_e32 v0, s12, v0 ; 0A00000C v_mac_f32_e32 v1, s11, v24 ; 2C02300B v_mac_f32_e32 v0, s11, v25 ; 2C00320B v_mul_f32_e32 v8, s12, v8 ; 0A10100C v_mac_f32_e32 v1, s1, v16 ; 2C022001 v_mac_f32_e32 v8, s11, v14 ; 2C101C0B v_mac_f32_e32 v0, s1, v26 ; 2C003401 v_mul_f32_e32 v25, v1, v1 ; 0A320301 v_mac_f32_e32 v8, s1, v15 ; 2C101E01 v_mac_f32_e32 v25, v0, v0 ; 2C320100 v_mac_f32_e32 v25, v8, v8 ; 2C321108 v_cmp_nlt_f32_e32 vcc, 0, v25 ; 7C9C3280 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[52:53], vcc ; BEB4206A s_xor_b64 s[52:53], exec, s[52:53] ; 88B4347E v_cmp_le_f32_e32 vcc, 0, v25 ; 7C863280 v_mul_f32_e32 v14, 0x7f7fffff, v25 ; 0A1C32FF 7F7FFFFF v_mov_b32_e32 v15, 0xff7fffff ; 7E1E02FF FF7FFFFF v_cndmask_b32_e32 v14, v15, v14, vcc ; 001C1D0F s_or_saveexec_b64 s[52:53], s[52:53] ; BEB42134 s_buffer_load_dword s29, s[16:19], 0x4 ; C0220748 00000004 s_buffer_load_dword s2, s[16:19], 0xc ; C0220088 0000000C s_buffer_load_dword s31, s[16:19], 0x14 ; C02207C8 00000014 s_buffer_load_dword s30, s[16:19], 0x1c ; C0220788 0000001C s_buffer_load_dword s10, s[16:19], 0x24 ; C0220288 00000024 s_buffer_load_dword s3, s[16:19], 0x2c ; C02200C8 0000002C s_buffer_load_dword s6, s[16:19], 0x30 ; C0220188 00000030 s_buffer_load_dword s7, s[16:19], 0x34 ; C02201C8 00000034 s_buffer_load_dword s8, s[16:19], 0x38 ; C0220208 00000038 s_buffer_load_dword s9, s[16:19], 0x3c ; C0220248 0000003C s_buffer_load_dword s54, s[16:19], 0x12c ; C0220D88 0000012C s_buffer_load_dword s32, s[16:19], 0x2e4 ; C0220808 000002E4 s_buffer_load_dword s39, s[16:19], 0x2e8 ; C02209C8 000002E8 s_buffer_load_dword s40, s[16:19], 0x300 ; C0220A08 00000300 s_buffer_load_dword s38, s[16:19], 0x304 ; C0220988 00000304 s_buffer_load_dword s34, s[16:19], 0x308 ; C0220888 00000308 s_buffer_load_dword s37, s[16:19], 0x30c ; C0220948 0000030C s_buffer_load_dword s33, s[16:19], 0x310 ; C0220848 00000310 s_buffer_load_dword s35, s[16:19], 0x314 ; C02208C8 00000314 s_buffer_load_dword s56, s[16:19], 0x318 ; C0220E08 00000318 s_buffer_load_dword s55, s[16:19], 0x31c ; C0220DC8 0000031C s_buffer_load_dword s51, s[16:19], 0x320 ; C0220CC8 00000320 s_buffer_load_dword s36, s[16:19], 0x324 ; C0220908 00000324 s_buffer_load_dword s57, s[16:19], 0x330 ; C0220E48 00000330 s_buffer_load_dword s58, s[16:19], 0x340 ; C0220E88 00000340 s_buffer_load_dword s59, s[16:19], 0x344 ; C0220EC8 00000344 s_buffer_load_dword s60, s[16:19], 0x380 ; C0220F08 00000380 s_buffer_load_dword s61, s[16:19], 0x384 ; C0220F48 00000384 s_buffer_load_dword s62, s[16:19], 0x388 ; C0220F88 00000388 s_buffer_load_dword s13, s[44:47], 0x0 ; C0220356 00000000 s_buffer_load_dword s14, s[44:47], 0x4 ; C0220396 00000004 s_buffer_load_dword s15, s[44:47], 0x8 ; C02203D6 00000008 s_buffer_load_dword s16, s[44:47], 0xc ; C0220416 0000000C s_buffer_load_dword s17, s[44:47], 0x10 ; C0220456 00000010 s_buffer_load_dword s19, s[44:47], 0x14 ; C02204D6 00000014 s_buffer_load_dword s21, s[44:47], 0x18 ; C0220556 00000018 s_buffer_load_dword s23, s[44:47], 0x1c ; C02205D6 0000001C s_buffer_load_dword s18, s[44:47], 0x20 ; C0220496 00000020 s_buffer_load_dword s20, s[44:47], 0x24 ; C0220516 00000024 s_buffer_load_dword s22, s[44:47], 0x28 ; C0220596 00000028 s_buffer_load_dword s24, s[44:47], 0x2c ; C0220616 0000002C s_buffer_load_dword s25, s[44:47], 0x30 ; C0220656 00000030 s_buffer_load_dword s26, s[44:47], 0x34 ; C0220696 00000034 s_buffer_load_dword s27, s[44:47], 0x38 ; C02206D6 00000038 s_buffer_load_dword s28, s[44:47], 0x3c ; C0220716 0000003C v_mov_b32_e32 v24, s41 ; 7E300229 v_mov_b32_e32 v15, s42 ; 7E1E022A v_mov_b32_e32 v16, s43 ; 7E20022B v_mov_b32_e32 v26, s48 ; 7E340230 v_mov_b32_e32 v27, s49 ; 7E360231 v_mov_b32_e32 v28, s50 ; 7E380232 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[52:53] ; 88FE347E v_rsq_f32_e32 v14, v25 ; 7E1C4919 s_or_b64 exec, exec, s[52:53] ; 87FE347E v_mac_f32_e32 v15, s59, v22 ; 2C1E2C3B v_mov_b32_e32 v22, 0x3f9ae148 ; 7E2C02FF 3F9AE148 v_mad_f32 v17, v26, v17, s60 ; D1C10011 00F2231A v_mac_f32_e32 v24, s54, v22 ; 2C302C36 v_mad_f32 v22, s55, v17, v24 ; D1C10016 04622237 v_floor_f32_e64 v25, |v22| ; D15F0119 00000116 v_sub_f32_e64 v25, |v22|, v25 ; D1020119 00023316 v_bfrev_b32_e32 v26, 1 ; 7E345881 v_mad_f32 v18, v27, v18, s61 ; D1C10012 00F6251B v_xor_b32_e32 v27, v25, v26 ; 2A363519 v_cmp_ge_f32_e64 vcc, v22, -v22 ; D046006A 40022D16 v_cndmask_b32_e32 v22, v27, v25, vcc ; 002C331B v_mad_f32 v25, s51, v17, v24 ; D1C10019 04622233 v_mul_f32_e32 v25, 0.5, v25 ; 0A3232F0 v_floor_f32_e64 v27, |v25| ; D15F011B 00000119 v_sub_f32_e64 v27, |v25|, v27 ; D102011B 00023719 v_mad_f32 v19, v28, v19, s62 ; D1C10013 00FA271C v_xor_b32_e32 v28, v27, v26 ; 2A38351B v_cmp_ge_f32_e64 vcc, v25, -v25 ; D046006A 40023319 v_cndmask_b32_e32 v25, v28, v27, vcc ; 0032371C v_floor_f32_e64 v27, |v24| ; D15F011B 00000118 v_sub_f32_e64 v27, |v24|, v27 ; D102011B 00023718 v_xor_b32_e32 v28, v27, v26 ; 2A38351B v_cmp_ge_f32_e64 vcc, v24, -v24 ; D046006A 40023118 v_cndmask_b32_e32 v27, v28, v27, vcc ; 0036371C v_mul_f32_e32 v28, 0x3f44ec50, v24 ; 0A3830FF 3F44EC50 v_floor_f32_e64 v29, |v28| ; D15F011D 0000011C v_sub_f32_e64 v29, |v28|, v29 ; D102011D 00023B1C v_xor_b32_e32 v30, v29, v26 ; 2A3C351D v_cmp_ge_f32_e64 vcc, v28, -v28 ; D046006A 4002391C v_add_f32_e32 v24, s40, v24 ; 02303028 v_cndmask_b32_e32 v28, v30, v29, vcc ; 00383B1E v_mad_f32 v29, s39, v17, -v24 ; D1C1001D 84622227 v_mul_f32_e32 v29, 0x3ee8ba2e, v29 ; 0A3A3AFF 3EE8BA2E v_floor_f32_e64 v30, |v29| ; D15F011E 0000011D v_sub_f32_e64 v30, |v29|, v30 ; D102011E 00023D1D v_xor_b32_e32 v31, v30, v26 ; 2A3E351E v_cmp_ge_f32_e64 vcc, v29, -v29 ; D046006A 40023B1D v_cndmask_b32_e32 v29, v31, v30, vcc ; 003A3D1F v_mad_f32 v30, s38, v17, -v24 ; D1C1001E 84622226 v_mul_f32_e32 v30, 0x3e50fac6, v30 ; 0A3C3CFF 3E50FAC6 v_floor_f32_e64 v31, |v30| ; D15F011F 0000011E v_sub_f32_e64 v31, |v30|, v31 ; D102011F 00023F1E v_xor_b32_e32 v32, v31, v26 ; 2A40351F v_cmp_ge_f32_e64 vcc, v30, -v30 ; D046006A 40023D1E v_cndmask_b32_e32 v30, v32, v31, vcc ; 003C3F20 v_mov_b32_e32 v31, 0x40c90fdb ; 7E3E02FF 40C90FDB v_mul_f32_e32 v22, v31, v22 ; 0A2C2D1F v_mov_b32_e32 v32, 0x3e22f983 ; 7E4002FF 3E22F983 v_mul_f32_e32 v25, v31, v25 ; 0A32331F v_mul_f32_e32 v22, v32, v22 ; 0A2C2D20 v_mul_f32_e32 v25, v32, v25 ; 0A323320 v_fract_f32_e32 v22, v22 ; 7E2C3716 v_fract_f32_e32 v25, v25 ; 7E323719 v_sin_f32_e32 v22, v22 ; 7E2C5316 v_sin_f32_e32 v25, v25 ; 7E325319 v_mad_f32 v24, s37, v17, -v24 ; D1C10018 84622225 v_floor_f32_e64 v33, |v24| ; D15F0121 00000118 v_sub_f32_e64 v33, |v24|, v33 ; D1020121 00024318 v_add_f32_e32 v22, v22, v25 ; 022C3316 v_mul_f32_e32 v25, v31, v29 ; 0A323B1F v_mul_f32_e32 v29, v31, v30 ; 0A3A3D1F v_xor_b32_e32 v34, v33, v26 ; 2A443521 v_cmp_ge_f32_e64 vcc, v24, -v24 ; D046006A 40023118 v_cndmask_b32_e32 v24, v34, v33, vcc ; 00304322 v_mul_f32_e32 v29, v32, v29 ; 0A3A3B20 v_mul_f32_e32 v25, v32, v25 ; 0A323320 v_mul_f32_e32 v24, v31, v24 ; 0A30311F v_fract_f32_e32 v29, v29 ; 7E3A371D v_fract_f32_e32 v25, v25 ; 7E323719 v_mul_f32_e32 v10, v10, v13 ; 0A141B0A v_mul_f32_e32 v9, v9, v13 ; 0A121B09 v_mul_f32_e32 v7, v7, v13 ; 0A0E1B07 exp 15, 32, 0, 0, 0, v10, v9, v7, v0 ; C400020F 0007090A v_mul_f32_e32 v24, v32, v24 ; 0A303120 s_waitcnt expcnt(0) ; BF8C0F0F v_mul_f32_e32 v7, v31, v27 ; 0A0E371F v_mul_f32_e32 v9, v31, v28 ; 0A12391F v_sin_f32_e32 v29, v29 ; 7E3A531D v_sin_f32_e32 v25, v25 ; 7E325319 v_fract_f32_e32 v24, v24 ; 7E303718 v_mul_f32_e32 v7, v32, v7 ; 0A0E0F20 v_mul_f32_e32 v9, v32, v9 ; 0A121320 v_fract_f32_e32 v7, v7 ; 7E0E3707 v_fract_f32_e32 v9, v9 ; 7E123709 v_sin_f32_e32 v24, v24 ; 7E305318 v_sin_f32_e32 v7, v7 ; 7E0E5307 v_sin_f32_e32 v9, v9 ; 7E125309 v_add_f32_e32 v25, v25, v29 ; 02323B19 v_mul_f32_e32 v29, s35, v25 ; 0A3A3223 v_mac_f32_e32 v29, s33, v24 ; 2C3A3021 v_mov_b32_e32 v24, 0xc1200000 ; 7E3002FF C1200000 v_mov_b32_e32 v30, 0xc2200000 ; 7E3C02FF C2200000 v_mac_f32_e32 v16, s58, v21 ; 2C202A3A v_mul_f32_e32 v21, s56, v17 ; 0A2A2238 v_add_f32_e32 v7, v7, v9 ; 020E1307 v_mul_f32_e64 v25, |v25|, s34 ; D1050119 00004519 v_mac_f32_e32 v24, s32, v30 ; 2C303C20 v_mac_f32_e32 v18, v11, v29 ; 2C243B0B v_mul_f32_e32 v24, v24, v25 ; 0A303318 v_mul_f32_e32 v22, s36, v22 ; 0A2C2C24 v_mac_f32_e32 v21, v21, v7 ; 2C2A0F15 v_madmk_f32_e32 v7, v21, 0x3e4ccccd, v22 ; 2E0E2D15 3E4CCCCD v_mul_f32_e32 v30, s31, v18 ; 0A3C241F v_mac_f32_e32 v17, v11, v24 ; 2C22310B v_mul_f32_e32 v29, s5, v18 ; 0A3A2405 v_mac_f32_e32 v30, s29, v17 ; 2C3C221D v_mac_f32_e32 v19, v11, v7 ; 2C260F0B v_mul_f32_e32 v33, s12, v18 ; 0A42240C v_mac_f32_e32 v29, s4, v17 ; 2C3A2204 v_add_f32_e32 v20, 0, v20 ; 02282880 v_mac_f32_e32 v30, s10, v19 ; 2C3C260A v_mac_f32_e32 v30, s7, v20 ; 2C3C2807 v_mul_f32_e32 v18, s30, v18 ; 0A24241E v_mac_f32_e32 v33, s11, v17 ; 2C42220B v_mac_f32_e32 v29, s0, v19 ; 2C3A2600 v_mac_f32_e32 v18, s2, v17 ; 2C242202 v_mac_f32_e32 v33, s1, v19 ; 2C422601 v_mul_f32_e32 v23, s57, v23 ; 0A2E2E39 v_mul_f32_e32 v0, v0, v14 ; 0A001D00 v_mul_f32_e32 v1, v1, v14 ; 0A021D01 v_mul_f32_e32 v8, v8, v14 ; 0A101D08 v_mac_f32_e32 v29, s6, v20 ; 2C3A2806 v_mul_f32_e32 v9, s19, v30 ; 0A123C13 v_mul_f32_e32 v10, s21, v30 ; 0A143C15 v_mul_f32_e32 v13, s23, v30 ; 0A1A3C17 v_mac_f32_e32 v18, s3, v19 ; 2C242603 v_mul_f32_e32 v7, s17, v30 ; 0A0E3C11 exp 15, 33, 0, 0, 0, v0, v1, v8, v23 ; C400021F 17080100 v_mac_f32_e32 v33, s8, v20 ; 2C422808 v_mac_f32_e32 v9, s14, v29 ; 2C123A0E v_mac_f32_e32 v10, s15, v29 ; 2C143A0F v_mac_f32_e32 v13, s16, v29 ; 2C1A3A10 v_mac_f32_e32 v7, s13, v29 ; 2C0E3A0D exp 15, 34, 0, 0, 0, v16, v15, v0, v0 ; C400022F 00000F10 v_mac_f32_e32 v18, s9, v20 ; 2C242809 v_mac_f32_e32 v9, s20, v33 ; 2C124214 v_mac_f32_e32 v10, s22, v33 ; 2C144216 v_mac_f32_e32 v13, s24, v33 ; 2C1A4218 v_mac_f32_e32 v7, s18, v33 ; 2C0E4212 v_mac_f32_e32 v9, s26, v18 ; 2C12241A v_mac_f32_e32 v10, s27, v18 ; 2C14241B v_mac_f32_e32 v13, s28, v18 ; 2C1A241C v_mov_b32_e32 v14, 0 ; 7E1C0280 exp 15, 35, 0, 0, 0, v3, v4, v6, v5 ; C400023F 05060403 v_mac_f32_e32 v7, s25, v18 ; 2C0E2419 v_xor_b32_e32 v9, v9, v26 ; 2A123509 exp 15, 36, 0, 0, 0, v11, v12, v14, v14 ; C400024F 0E0E0C0B s_waitcnt expcnt(0) ; BF8C0F0F v_mad_f32 v0, 2.0, v10, -v13 ; D1C10000 843614F4 exp 15, 12, 0, 1, 0, v7, v9, v0, v13 ; C40008CF 0D000907 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 36 Spilled VGPRs: 0 Code Size: 1920 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 7 ******************** [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=20 [traceshaders] glUseProgramStages(pipeline=20, stages=vert, program=74) [traceshaders] glUseProgramStages(pipeline=20, stages=frag, program=90) [traceshaders] glBindProgramPipeline(pipeline=20) Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v6, v2, 0, 0, [m0] ; D4180002 v_interp_p2_f32 v6, [v6], v3, 0, 0, [m0] ; D4190003 v_interp_p1_f32 v7, v2, 1, 0, [m0] ; D41C0102 v_interp_p2_f32 v7, [v7], v3, 1, 0, [m0] ; D41D0103 v_interp_p1_f32 v0, v2, 2, 0, [m0] ; D4000202 v_interp_p2_f32 v0, [v0], v3, 2, 0, [m0] ; D4010203 v_interp_p1_f32 v8, v2, 0, 1, [m0] ; D4200402 v_interp_p2_f32 v8, [v8], v3, 0, 1, [m0] ; D4210403 v_interp_p1_f32 v9, v2, 1, 1, [m0] ; D4240502 v_interp_p2_f32 v9, [v9], v3, 1, 1, [m0] ; D4250503 v_interp_p1_f32 v1, v2, 2, 1, [m0] ; D4040602 v_interp_p2_f32 v1, [v1], v3, 2, 1, [m0] ; D4050603 v_interp_p1_f32 v10, v2, 3, 1, [m0] ; D4280702 v_interp_p2_f32 v10, [v10], v3, 3, 1, [m0] ; D4290703 v_interp_p1_f32 v4, v2, 0, 2, [m0] ; D4100802 v_interp_p2_f32 v4, [v4], v3, 0, 2, [m0] ; D4110803 v_interp_p1_f32 v5, v2, 1, 2, [m0] ; D4140902 v_mul_f32_e32 v2, v9, v6 ; 0A040D09 s_load_dwordx4 s[24:27], s[2:3], 0x10 ; C00A0601 00000010 v_mad_f32 v2, v8, v7, -v2 ; D1C10002 840A0F08 v_interp_p2_f32 v5, [v5], v3, 1, 2, [m0] ; D4150903 v_mul_f32_e32 v2, v10, v2 ; 0A04050A v_mul_f32_e32 v3, v0, v0 ; 0A060100 v_mac_f32_e32 v3, v2, v2 ; 2C060502 v_mac_f32_e32 v3, v1, v1 ; 2C060301 v_cmp_nlt_f32_e32 vcc, 0, v3 ; 7C9C0680 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[28:29], vcc ; BE9C206A s_xor_b64 s[28:29], exec, s[28:29] ; 889C1C7E v_cmp_le_f32_e32 vcc, 0, v3 ; 7C860680 v_mul_f32_e32 v6, 0x7f7fffff, v3 ; 0A0C06FF 7F7FFFFF v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_cndmask_b32_e32 v6, v7, v6, vcc ; 000C0D07 s_or_saveexec_b64 s[28:29], s[28:29] ; BE9C211C s_buffer_load_dword s23, s[24:27], 0x2d0 ; C02205CC 000002D0 s_buffer_load_dword s17, s[24:27], 0x3a0 ; C022044C 000003A0 s_buffer_load_dword s15, s[24:27], 0x3a4 ; C02203CC 000003A4 s_buffer_load_dword s13, s[24:27], 0x3a8 ; C022034C 000003A8 s_buffer_load_dword s11, s[24:27], 0x3ac ; C02202CC 000003AC s_buffer_load_dword s19, s[24:27], 0x3b0 ; C02204CC 000003B0 s_buffer_load_dword s20, s[24:27], 0x3b4 ; C022050C 000003B4 s_buffer_load_dword s21, s[24:27], 0x3b8 ; C022054C 000003B8 s_buffer_load_dword s22, s[24:27], 0x3bc ; C022058C 000003BC s_buffer_load_dword s18, s[24:27], 0x3c0 ; C022048C 000003C0 s_buffer_load_dword s16, s[24:27], 0x3c4 ; C022040C 000003C4 s_buffer_load_dword s14, s[24:27], 0x3c8 ; C022038C 000003C8 s_buffer_load_dword s12, s[24:27], 0x3cc ; C022030C 000003CC s_buffer_load_dword s9, s[24:27], 0x3d0 ; C022024C 000003D0 s_buffer_load_dword s8, s[24:27], 0x3d4 ; C022020C 000003D4 s_buffer_load_dword s7, s[24:27], 0x3d8 ; C02201CC 000003D8 s_buffer_load_dword s0, s[24:27], 0x3dc ; C022000C 000003DC s_buffer_load_dword s1, s[24:27], 0x3e0 ; C022004C 000003E0 s_buffer_load_dword s2, s[24:27], 0x3e4 ; C022008C 000003E4 s_buffer_load_dword s3, s[24:27], 0x3e8 ; C02200CC 000003E8 s_buffer_load_dword s6, s[24:27], 0x3ec ; C022018C 000003EC s_load_dwordx8 s[32:39], s[4:5], 0x0 ; C00E0802 00000000 s_nop 0 ; BF800000 s_load_dwordx4 s[24:27], s[4:5], 0x30 ; C00A0602 00000030 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[28:29] ; 88FE1C7E v_rsq_f32_e32 v6, v3 ; 7E0C4903 s_or_b64 exec, exec, s[28:29] ; 87FE1C7E v_mul_f32_e32 v2, v2, v6 ; 0A040D02 v_mul_f32_e32 v1, v1, v6 ; 0A020D01 v_mad_f32 v2, 0.5, v2, 0.5 ; D1C10002 03C204F0 v_mad_f32 v11, 0.5, v1, 0.5 ; D1C1000B 03C202F0 v_add_f32_e64 v1, 0, v2 clamp ; D1018001 00020480 v_mov_b32_e32 v2, 0xc0400000 ; 7E0402FF C0400000 v_add_f32_e32 v2, s23, v2 ; 02040417 v_mul_f32_e32 v2, 0x3b000000, v2 ; 0A0404FF 3B000000 v_add_f32_e64 v2, 0, v2 clamp ; D1018002 00020480 image_sample v[7:10], v[4:5], s[32:39], s[24:27] dmask:0xf ; F0800F00 00C80704 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C880480 v_sqrt_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v0, v0, v6 ; 0A000D00 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v5, s20, v8 ; 0A0A1014 v_mul_f32_e32 v6, s21, v8 ; 0A0C1015 v_cndmask_b32_e64 v3, v2, 0, vcc ; D1000003 01A90102 v_mul_f32_e32 v2, s19, v8 ; 0A041013 v_mul_f32_e32 v8, s22, v8 ; 0A101016 v_mac_f32_e32 v2, s17, v7 ; 2C040E11 v_mac_f32_e32 v5, s15, v7 ; 2C0A0E0F v_mac_f32_e32 v6, s13, v7 ; 2C0C0E0D v_mac_f32_e32 v8, s11, v7 ; 2C100E0B v_mac_f32_e32 v2, s18, v9 ; 2C041212 v_mac_f32_e32 v5, s16, v9 ; 2C0A1210 v_mac_f32_e32 v6, s14, v9 ; 2C0C120E v_mac_f32_e32 v8, s12, v9 ; 2C10120C v_mad_f32 v0, 0.5, v0, 0.5 ; D1C10000 03C200F0 v_mac_f32_e32 v5, s8, v10 ; 2C0A1408 v_mac_f32_e32 v6, s7, v10 ; 2C0C1407 v_mac_f32_e32 v2, s9, v10 ; 2C041409 v_mac_f32_e32 v8, s0, v10 ; 2C101400 v_add_f32_e32 v4, s1, v2 ; 02080401 v_add_f32_e64 v0, 0, v0 clamp ; D1018000 00020080 v_add_f32_e32 v5, s2, v5 ; 020A0A02 v_add_f32_e32 v6, s3, v6 ; 020C0C03 v_add_f32_e32 v7, s6, v8 ; 020E1006 v_add_f32_e64 v2, 0, v11 clamp ; D1018002 00021680 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; C400040F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e64 v0, v4, v5 ; D2960000 00020B04 v_cvt_pkrtz_f16_f32_e64 v1, v6, v7 ; D2960001 00020F06 exp 15, 1, 1, 1, 1, v0, v1, v0, v0 ; C4001C1F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 80 VGPRS: 16 Spilled VGPRs: 0 Code Size: 616 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=105 [traceshaders] glUseProgramStages(pipeline=105, stages=vert, program=384) [traceshaders] glUseProgramStages(pipeline=105, stages=frag, program=382) [traceshaders] glBindProgramPipeline(pipeline=105) Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v10, v4 ; 7E140304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C00A0105 00000000 s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C00A0305 00000010 s_load_dwordx4 s[16:19], s[10:11], 0x20 ; C00A0405 00000020 v_mov_b32_e32 v1, 0x3f9ae148 ; 7E0202FF 3F9AE148 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[11:14], v4, s[4:7], 0 idxen ; E00C2000 80010B04 s_nop 0 ; BF800000 buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 s_load_dwordx4 s[12:15], s[10:11], 0x30 ; C00A0305 00000030 buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 s_load_dwordx4 s[16:19], s[10:11], 0x40 ; C00A0405 00000040 s_load_dwordx4 s[4:7], s[2:3], 0x10 ; C00A0101 00000010 s_load_dwordx4 s[0:3], s[2:3], 0x20 ; C00A0001 00000020 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 buffer_load_format_xyzw v[18:21], v7, s[12:15], 0 idxen ; E00C2000 80031207 s_load_dwordx4 s[12:15], s[10:11], 0x50 ; C00A0305 00000050 s_load_dwordx4 s[8:11], s[10:11], 0x60 ; C00A0205 00000060 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[20:23], v8, s[16:19], 0 idxen ; E00C2000 80041408 s_buffer_load_dword s27, s[4:7], 0x1ec ; C02206C2 000001EC s_buffer_load_dword s51, s[4:7], 0x280 ; C0220CC2 00000280 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[22:25], v9, s[12:15], 0 idxen ; E00C2000 80031609 s_nop 0 ; BF800000 buffer_load_format_xyzw v[7:10], v10, s[8:11], 0 idxen ; E00C2000 8002070A s_buffer_load_dword s11, s[4:7], 0x12c ; C02202C2 0000012C v_mov_b32_e32 v0, s27 ; 7E00021B s_waitcnt vmcnt(0) ; BF8C0F70 v_bfrev_b32_e32 v9, 1 ; 7E125881 s_buffer_load_dword s54, s[4:7], 0x290 ; C0220D82 00000290 s_buffer_load_dword s36, s[4:7], 0x220 ; C0220902 00000220 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s11, v1 ; 2C00020B v_floor_f32_e64 v1, |v0| ; D15F0101 00000100 v_sub_f32_e64 v1, |v0|, v1 ; D1020101 00020300 v_xor_b32_e32 v10, v1, v9 ; 2A141301 v_cmp_ge_f32_e64 vcc, v0, -v0 ; D046006A 40020100 v_cndmask_b32_e32 v1, v10, v1, vcc ; 0002030A v_mul_f32_e32 v10, 0x3f44ec50, v0 ; 0A1400FF 3F44EC50 v_floor_f32_e64 v24, |v10| ; D15F0118 0000010A v_sub_f32_e64 v24, |v10|, v24 ; D1020118 0002310A v_xor_b32_e32 v25, v24, v9 ; 2A321318 v_cmp_ge_f32_e64 vcc, v10, -v10 ; D046006A 4002150A v_cndmask_b32_e32 v10, v25, v24, vcc ; 00143119 v_mov_b32_e32 v24, s51 ; 7E300233 s_buffer_load_dword s28, s[4:7], 0x200 ; C0220702 00000200 s_buffer_load_dword s35, s[4:7], 0x21c ; C02208C2 0000021C s_buffer_load_dword s26, s[4:7], 0x1e8 ; C0220682 000001E8 s_buffer_load_dword s29, s[4:7], 0x204 ; C0220742 00000204 s_buffer_load_dword s31, s[4:7], 0x20c ; C02207C2 0000020C s_buffer_load_dword s41, s[4:7], 0x24c ; C0220A42 0000024C s_buffer_load_dword s38, s[4:7], 0x240 ; C0220982 00000240 s_buffer_load_dword s40, s[4:7], 0x248 ; C0220A02 00000248 s_buffer_load_dword s39, s[4:7], 0x244 ; C02209C2 00000244 s_buffer_load_dword s52, s[4:7], 0x284 ; C0220D02 00000284 s_buffer_load_dword s55, s[4:7], 0x294 ; C0220DC2 00000294 s_buffer_load_dword s33, s[4:7], 0x214 ; C0220842 00000214 s_buffer_load_dword s53, s[4:7], 0x288 ; C0220D42 00000288 s_buffer_load_dword s32, s[4:7], 0x210 ; C0220802 00000210 s_buffer_load_dword s20, s[4:7], 0x0 ; C0220502 00000000 s_buffer_load_dword s21, s[4:7], 0x4 ; C0220542 00000004 s_buffer_load_dword s22, s[4:7], 0x8 ; C0220582 00000008 s_buffer_load_dword s23, s[4:7], 0xc ; C02205C2 0000000C s_buffer_load_dword s24, s[4:7], 0x10 ; C0220602 00000010 s_buffer_load_dword s16, s[4:7], 0x14 ; C0220402 00000014 s_buffer_load_dword s17, s[4:7], 0x18 ; C0220442 00000018 s_buffer_load_dword s18, s[4:7], 0x1c ; C0220482 0000001C s_buffer_load_dword s19, s[4:7], 0x20 ; C02204C2 00000020 s_buffer_load_dword s12, s[4:7], 0x24 ; C0220302 00000024 s_buffer_load_dword s13, s[4:7], 0x28 ; C0220342 00000028 s_buffer_load_dword s14, s[4:7], 0x2c ; C0220382 0000002C s_buffer_load_dword s15, s[4:7], 0x30 ; C02203C2 00000030 s_buffer_load_dword s8, s[4:7], 0x34 ; C0220202 00000034 s_buffer_load_dword s9, s[4:7], 0x38 ; C0220242 00000038 s_buffer_load_dword s10, s[4:7], 0x3c ; C0220282 0000003C s_buffer_load_dword s25, s[4:7], 0x1e4 ; C0220642 000001E4 s_buffer_load_dword s30, s[4:7], 0x208 ; C0220782 00000208 s_buffer_load_dword s34, s[4:7], 0x218 ; C0220882 00000218 s_buffer_load_dword s37, s[4:7], 0x224 ; C0220942 00000224 s_buffer_load_dword s42, s[4:7], 0x250 ; C0220A82 00000250 s_buffer_load_dword s43, s[4:7], 0x254 ; C0220AC2 00000254 s_buffer_load_dword s44, s[4:7], 0x258 ; C0220B02 00000258 s_buffer_load_dword s45, s[4:7], 0x260 ; C0220B42 00000260 s_buffer_load_dword s46, s[4:7], 0x264 ; C0220B82 00000264 s_buffer_load_dword s47, s[4:7], 0x268 ; C0220BC2 00000268 s_buffer_load_dword s48, s[4:7], 0x270 ; C0220C02 00000270 s_buffer_load_dword s49, s[4:7], 0x274 ; C0220C42 00000274 s_buffer_load_dword s50, s[4:7], 0x278 ; C0220C82 00000278 s_buffer_load_dword s4, s[4:7], 0x298 ; C0220102 00000298 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v30, s53 ; 7E3C0235 s_buffer_load_dword s57, s[0:3], 0x10 ; C0220E40 00000010 s_buffer_load_dword s58, s[0:3], 0x14 ; C0220E80 00000014 v_mac_f32_e32 v24, s54, v11 ; 2C301636 v_mad_f32 v11, s36, v24, v0 ; D1C1000B 04023024 v_mul_f32_e32 v11, 0.5, v11 ; 0A1616F0 v_floor_f32_e64 v25, |v11| ; D15F0119 0000010B v_sub_f32_e64 v25, |v11|, v25 ; D1020119 0002330B v_xor_b32_e32 v26, v25, v9 ; 2A341319 v_cmp_ge_f32_e64 vcc, v11, -v11 ; D046006A 4002170B v_cndmask_b32_e32 v11, v26, v25, vcc ; 0016331A v_add_f32_e32 v25, s28, v0 ; 0232001C v_mac_f32_e32 v0, s35, v24 ; 2C003023 v_floor_f32_e64 v26, |v0| ; D15F011A 00000100 v_sub_f32_e64 v26, |v0|, v26 ; D102011A 00023500 v_xor_b32_e32 v27, v26, v9 ; 2A36131A v_cmp_ge_f32_e64 vcc, v0, -v0 ; D046006A 40020100 v_cndmask_b32_e32 v0, v27, v26, vcc ; 0000351B v_mad_f32 v26, s26, v24, -v25 ; D1C1001A 8466301A v_mul_f32_e32 v26, 0x3ee8ba2e, v26 ; 0A3434FF 3EE8BA2E v_floor_f32_e64 v27, |v26| ; D15F011B 0000011A v_sub_f32_e64 v27, |v26|, v27 ; D102011B 0002371A v_xor_b32_e32 v28, v27, v9 ; 2A38131B v_cmp_ge_f32_e64 vcc, v26, -v26 ; D046006A 4002351A v_cndmask_b32_e32 v26, v28, v27, vcc ; 0034371C v_mad_f32 v27, s29, v24, -v25 ; D1C1001B 8466301D v_mul_f32_e32 v27, 0x3e50fac6, v27 ; 0A3636FF 3E50FAC6 v_floor_f32_e64 v28, |v27| ; D15F011C 0000011B v_sub_f32_e64 v28, |v27|, v28 ; D102011C 0002391B v_xor_b32_e32 v29, v28, v9 ; 2A3A131C v_cmp_ge_f32_e64 vcc, v27, -v27 ; D046006A 4002371B v_mad_f32 v25, s31, v24, -v25 ; D1C10019 8466301F v_cndmask_b32_e32 v27, v29, v28, vcc ; 0036391D v_floor_f32_e64 v28, |v25| ; D15F011C 00000119 v_sub_f32_e64 v28, |v25|, v28 ; D102011C 00023919 v_xor_b32_e32 v29, v28, v9 ; 2A3A131C v_cmp_ge_f32_e64 vcc, v25, -v25 ; D046006A 40023319 v_cndmask_b32_e32 v25, v29, v28, vcc ; 0032391D v_mov_b32_e32 v28, s41 ; 7E380229 v_mac_f32_e32 v28, s38, v7 ; 2C380E26 v_mov_b32_e32 v7, s40 ; 7E0E0228 v_mac_f32_e32 v7, s39, v8 ; 2C0E1027 v_mov_b32_e32 v8, s52 ; 7E100234 v_mac_f32_e32 v8, s55, v12 ; 2C101837 v_mov_b32_e32 v12, 0x40c90fdb ; 7E1802FF 40C90FDB v_mul_f32_e32 v11, v12, v11 ; 0A16170C v_mov_b32_e32 v29, 0x3e22f983 ; 7E3A02FF 3E22F983 v_mul_f32_e32 v0, v12, v0 ; 0A00010C v_mul_f32_e32 v11, v29, v11 ; 0A16171D v_mul_f32_e32 v0, v29, v0 ; 0A00011D v_fract_f32_e32 v11, v11 ; 7E16370B v_fract_f32_e32 v0, v0 ; 7E003700 v_sin_f32_e32 v11, v11 ; 7E16530B v_sin_f32_e32 v0, v0 ; 7E005300 v_mul_f32_e32 v26, v12, v26 ; 0A34350C v_mul_f32_e32 v26, v29, v26 ; 0A34351D v_fract_f32_e32 v26, v26 ; 7E34371A v_add_f32_e32 v0, v0, v11 ; 02001700 v_mul_f32_e32 v11, v12, v27 ; 0A16370C v_mul_f32_e32 v11, v29, v11 ; 0A16171D v_fract_f32_e32 v11, v11 ; 7E16370B v_mul_f32_e32 v25, v12, v25 ; 0A32330C v_mul_f32_e32 v25, v29, v25 ; 0A32331D v_mul_f32_e32 v1, v12, v1 ; 0A02030C v_mul_f32_e32 v10, v12, v10 ; 0A14150C v_sin_f32_e32 v11, v11 ; 7E16530B v_sin_f32_e32 v26, v26 ; 7E34531A v_fract_f32_e32 v25, v25 ; 7E323719 v_mul_f32_e32 v1, v29, v1 ; 0A02031D v_mul_f32_e32 v10, v29, v10 ; 0A14151D v_fract_f32_e32 v1, v1 ; 7E023701 v_fract_f32_e32 v10, v10 ; 7E14370A v_sin_f32_e32 v25, v25 ; 7E325319 v_sin_f32_e32 v1, v1 ; 7E025301 v_sin_f32_e32 v10, v10 ; 7E14530A v_add_f32_e32 v11, v26, v11 ; 0216171A v_mul_f32_e32 v26, s33, v11 ; 0A341621 v_mac_f32_e32 v26, s32, v25 ; 2C343220 v_mov_b32_e32 v25, 0xc1200000 ; 7E3202FF C1200000 v_mov_b32_e32 v27, 0xc2200000 ; 7E3602FF C2200000 v_mac_f32_e32 v30, s4, v13 ; 2C3C1A04 v_add_f32_e32 v1, v1, v10 ; 02021501 v_mul_f32_e32 v13, s34, v24 ; 0A1A3022 v_mul_f32_e64 v11, |v11|, s30 ; D105010B 00003D0B v_mac_f32_e32 v25, s25, v27 ; 2C323619 v_mac_f32_e32 v8, v22, v26 ; 2C103516 v_mul_f32_e32 v11, v25, v11 ; 0A161719 v_mac_f32_e32 v13, v13, v1 ; 2C1A030D v_mul_f32_e32 v0, s37, v0 ; 0A000025 v_madmk_f32_e32 v0, v13, 0x3e4ccccd, v0 ; 2E00010D 3E4CCCCD v_mac_f32_e32 v24, v22, v11 ; 2C301716 v_mul_f32_e32 v27, s16, v8 ; 0A361010 v_mul_f32_e32 v26, s24, v8 ; 0A341018 v_mul_f32_e32 v31, s17, v8 ; 0A3E1011 v_mul_f32_e32 v8, s18, v8 ; 0A101012 v_mac_f32_e32 v30, v22, v0 ; 2C3C0116 v_mac_f32_e32 v27, s21, v24 ; 2C363015 s_buffer_load_dword s59, s[0:3], 0x18 ; C0220EC0 00000018 s_buffer_load_dword s60, s[0:3], 0x1c ; C0220F00 0000001C v_mac_f32_e32 v26, s20, v24 ; 2C343014 v_mac_f32_e32 v31, s22, v24 ; 2C3E3016 v_mac_f32_e32 v8, s23, v24 ; 2C103017 s_buffer_load_dword s5, s[0:3], 0x0 ; C0220140 00000000 s_buffer_load_dword s6, s[0:3], 0x4 ; C0220180 00000004 s_buffer_load_dword s7, s[0:3], 0x8 ; C02201C0 00000008 s_buffer_load_dword s56, s[0:3], 0xc ; C0220E00 0000000C v_mac_f32_e32 v27, s12, v30 ; 2C363C0C v_add_f32_e32 v0, 0, v14 ; 02001C80 v_mac_f32_e32 v26, s19, v30 ; 2C343C13 v_mac_f32_e32 v31, s13, v30 ; 2C3E3C0D v_mac_f32_e32 v8, s14, v30 ; 2C103C0E v_mac_f32_e32 v27, s8, v0 ; 2C360008 s_buffer_load_dword s61, s[0:3], 0x20 ; C0220F40 00000020 s_buffer_load_dword s62, s[0:3], 0x24 ; C0220F80 00000024 s_buffer_load_dword s63, s[0:3], 0x28 ; C0220FC0 00000028 s_buffer_load_dword s64, s[0:3], 0x2c ; C0221000 0000002C s_buffer_load_dword s65, s[0:3], 0x30 ; C0221040 00000030 s_buffer_load_dword s66, s[0:3], 0x34 ; C0221080 00000034 s_buffer_load_dword s67, s[0:3], 0x38 ; C02210C0 00000038 s_buffer_load_dword s72, s[0:3], 0x3c ; C0221200 0000003C s_buffer_load_dword s0, s[0:3], 0x4c ; C0220000 0000004C v_mac_f32_e32 v26, s15, v0 ; 2C34000F v_mac_f32_e32 v31, s9, v0 ; 2C3E0009 v_mac_f32_e32 v8, s10, v0 ; 2C10000A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s57, v27 ; 0A003639 exp 15, 32, 0, 0, 0, v28, v7, v0, v0 ; C400020F 0000071C exp 15, 33, 0, 0, 0, v18, v19, v21, v20 ; C400021F 14151312 v_mul_f32_e32 v14, s58, v27 ; 0A1C363A s_waitcnt expcnt(0) ; BF8C0F0F v_mul_f32_e32 v18, s59, v27 ; 0A24363B v_mul_f32_e32 v19, s60, v27 ; 0A26363C v_add_f32_e32 v1, v16, v16 ; 02022110 v_add_f32_e32 v10, v17, v17 ; 02142311 v_mad_f32 v11, 2.0, v17, -1.0 ; D1C1000B 03CE22F4 v_add_f32_e32 v17, v5, v5 ; 02220B05 v_mad_f32 v5, 2.0, v5, -1.0 ; D1C10005 03CE0AF4 v_add_f32_e32 v12, v15, v15 ; 02181F0F v_mad_f32 v13, 2.0, v15, -1.0 ; D1C1000D 03CE1EF4 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_mac_f32_e32 v0, s5, v26 ; 2C003405 v_mac_f32_e32 v14, s6, v26 ; 2C1C3406 v_mac_f32_e32 v18, s7, v26 ; 2C243407 v_mac_f32_e32 v19, s56, v26 ; 2C263438 v_mad_f32 v7, 2.0, v16, -1.0 ; D1C10007 03CE20F4 v_add_f32_e32 v16, v3, v3 ; 02200703 exp 15, 34, 0, 0, 0, v22, v23, v15, v15 ; C400022F 0F0F1716 v_mad_f32 v3, 2.0, v3, -1.0 ; D1C10003 03CE06F4 s_waitcnt expcnt(0) ; BF8C0F0F v_add_f32_e32 v15, v4, v4 ; 021E0904 v_mad_f32 v4, 2.0, v4, -1.0 ; D1C10004 03CE08F4 v_mad_f32 v1, v1, v5, -v5 ; D1C10001 84160B01 v_mac_f32_e32 v0, s61, v31 ; 2C003E3D v_mac_f32_e32 v14, s62, v31 ; 2C1C3E3E v_mac_f32_e32 v18, s63, v31 ; 2C243E3F v_mac_f32_e32 v19, s64, v31 ; 2C263E40 v_mul_f32_e32 v20, s0, v27 ; 0A283600 v_mad_f32 v10, v10, v3, -v3 ; D1C1000A 840E070A v_mad_f32 v1, v4, v11, -v1 ; D1C10001 84061704 v_add_f32_e32 v6, v6, v6 ; 020C0D06 v_mad_f32 v11, v12, v4, -v4 ; D1C1000B 8412090C v_mad_f32 v10, v5, v13, -v10 ; D1C1000A 842A1B05 v_mad_f32 v7, v3, v7, -v11 ; D1C10007 842E0F03 v_mad_f32 v1, v6, v1, -v1 ; D1C10001 84060306 v_mac_f32_e32 v0, s65, v8 ; 2C001041 v_mac_f32_e32 v14, s66, v8 ; 2C1C1042 v_mac_f32_e32 v18, s67, v8 ; 2C241043 v_mac_f32_e32 v19, s72, v8 ; 2C261048 v_mul_f32_e32 v8, s0, v26 ; 0A103400 v_mul_f32_e32 v22, s45, v20 ; 0A2C282D v_mul_f32_e32 v23, s46, v20 ; 0A2E282E v_mad_f32 v7, v6, v7, -v7 ; D1C10007 841E0F06 v_mad_f32 v12, v17, v1, -v1 ; D1C1000C 84060311 v_mul_f32_e32 v21, s0, v31 ; 0A2A3E00 v_mul_f32_e32 v20, s47, v20 ; 0A28282F v_mad_f32 v22, -v8, s42, -v22 ; D1C10016 A4585508 v_mad_f32 v10, v6, v10, -v10 ; D1C1000A 842A1506 v_mad_f32 v3, v7, v3, -v12 ; D1C10003 84320707 v_mad_f32 v12, v16, v10, -v10 ; D1C1000C 842A1510 v_mad_f32 v23, -v8, s43, -v23 ; D1C10017 A45C5708 v_mad_f32 v8, -v8, s44, -v20 ; D1C10008 A4505908 v_mad_f32 v20, -v21, s48, v22 ; D1C10014 24586115 v_mad_f32 v11, v15, v7, -v7 ; D1C1000B 841E0F0F v_mad_f32 v12, v1, v4, -v12 ; D1C1000C 84320901 v_mad_f32 v22, -v21, s49, v23 ; D1C10016 245C6315 v_mul_f32_e32 v1, v20, v1 ; 0A020314 v_mad_f32 v11, v10, v5, -v11 ; D1C1000B 842E0B0A v_mac_f32_e32 v1, v22, v10 ; 2C021516 v_mad_f32 v10, v16, v20, -v20 ; D1C1000A 84522910 v_mac_f32_e32 v10, v22, v4 ; 2C140916 v_mad_f32 v4, v6, v11, -v11 ; D1C10004 842E1706 v_mul_f32_e32 v4, v20, v4 ; 0A080914 v_mad_f32 v3, v6, v3, -v3 ; D1C10003 840E0706 v_mad_f32 v8, -v21, s50, v8 ; D1C10008 24206515 v_mac_f32_e32 v4, v22, v3 ; 2C080716 v_mad_f32 v3, v6, v12, -v12 ; D1C10003 84321906 v_mac_f32_e32 v4, v8, v3 ; 2C080708 v_mac_f32_e32 v1, v8, v7 ; 2C020F08 v_mac_f32_e32 v10, v8, v5 ; 2C140B08 exp 15, 35, 0, 0, 0, v0, v14, v18, v19 ; C400023F 13120E00 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 36, 0, 0, 0, v4, v1, v10, v3 ; C400024F 030A0104 s_waitcnt expcnt(0) ; BF8C0F0F v_xor_b32_e32 v1, v14, v9 ; 2A02130E v_mad_f32 v3, 2.0, v18, -v19 ; D1C10003 844E24F4 exp 15, 12, 0, 1, 0, v0, v1, v3, v19 ; C40008CF 13030100 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 32 Spilled VGPRs: 0 Code Size: 1840 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 8 ******************** Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v0, v2, 0, 0, [m0] ; D4000002 v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; D4010003 v_interp_p1_f32 v1, v2, 1, 0, [m0] ; D4040102 v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; D4050103 v_interp_p1_f32 v7, v2, 0, 1, [m0] ; D41C0402 v_interp_p2_f32 v7, [v7], v3, 0, 1, [m0] ; D41D0403 v_interp_p1_f32 v8, v2, 1, 1, [m0] ; D4200502 v_interp_p2_f32 v8, [v8], v3, 1, 1, [m0] ; D4210503 v_interp_p1_f32 v5, v2, 0, 2, [m0] ; D4140802 v_interp_p2_f32 v5, [v5], v3, 0, 2, [m0] ; D4150803 v_interp_p1_f32 v4, v2, 1, 2, [m0] ; D4100902 v_interp_p2_f32 v4, [v4], v3, 1, 2, [m0] ; D4110903 v_interp_p1_f32 v6, v2, 3, 2, [m0] ; D4180B02 v_interp_p2_f32 v6, [v6], v3, 3, 2, [m0] ; D4190B03 v_interp_p1_f32 v9, v2, 0, 3, [m0] ; D4240C02 v_interp_p2_f32 v9, [v9], v3, 0, 3, [m0] ; D4250C03 v_interp_p1_f32 v10, v2, 1, 3, [m0] ; D4280D02 v_interp_p2_f32 v10, [v10], v3, 1, 3, [m0] ; D4290D03 v_interp_p1_f32 v2, v2, 2, 3, [m0] ; D4080E02 v_mul_f32_e32 v11, v9, v9 ; 0A161309 v_interp_p2_f32 v2, [v2], v3, 2, 3, [m0] ; D4090E03 v_mac_f32_e32 v11, v10, v10 ; 2C16150A v_mac_f32_e32 v11, v2, v2 ; 2C160502 v_writelane_b32 v22, s10, 0 ; D28A0016 0001000A v_cmp_nlt_f32_e32 vcc, 0, v11 ; 7C9C1680 s_and_saveexec_b64 s[0:1], vcc ; BE80206A s_xor_b64 s[0:1], exec, s[0:1] ; 8880007E v_cmp_le_f32_e32 vcc, 0, v11 ; 7C861680 v_mul_f32_e32 v3, 0x7f7fffff, v11 ; 0A0616FF 7F7FFFFF v_mov_b32_e32 v12, 0xff7fffff ; 7E1802FF FF7FFFFF v_cndmask_b32_e32 v3, v12, v3, vcc ; 0006070C s_or_saveexec_b64 s[0:1], s[0:1] ; BE802100 s_xor_b64 exec, exec, s[0:1] ; 88FE007E v_rsq_f32_e32 v3, v11 ; 7E06490B s_or_b64 exec, exec, s[0:1] ; 87FE007E s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C00A0401 00000010 v_mul_f32_e32 v9, v9, v3 ; 0A120709 v_mul_f32_e32 v11, v2, v3 ; 0A160702 v_mul_f32_e32 v10, v10, v3 ; 0A14070A v_mad_f32 v9, 0, v11, -v9 ; D1C10009 84261680 v_mad_f32 v10, 0, v11, -v10 ; D1C1000A 842A1680 v_mac_f32_e32 v11, v2, v3 ; 2C160702 v_mad_f32 v2, -v3, v2, v11 ; D1C10002 242E0503 v_mov_b32_e32 v11, 0x3f13cd3a ; 7E1602FF 3F13CD3A v_mul_f32_e32 v3, v11, v2 ; 0A06050B v_madmk_f32_e32 v12, v10, 0x3f5105ec, v3 ; 2E18070A 3F5105EC v_madmk_f32_e32 v3, v9, 0xbf3504f3, v3 ; 2E060709 BF3504F3 v_mov_b32_e32 v14, 0xbed105ec ; 7E1C02FF BED105EC v_mul_f32_e32 v9, 0x3f3504f3, v9 ; 0A1212FF 3F3504F3 v_mac_f32_e32 v9, v14, v10 ; 2C12150E v_mac_f32_e32 v9, v11, v2 ; 2C12050B v_mac_f32_e32 v3, v14, v10 ; 2C06150E v_add_f32_e64 v12, 0, v12 clamp ; D101800C 00021880 v_max_f32_e32 v10, 0x358637bd, v12 ; 161418FF 358637BD v_add_f32_e64 v3, 0, v3 clamp ; D1018003 00020680 v_add_f32_e64 v9, 0, v9 clamp ; D1018009 00021280 v_mov_b32_e32 v2, 0xff7fffff ; 7E0402FF FF7FFFFF v_cmp_nge_f32_e32 vcc, 0, v10 ; 7C921480 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[6:7], vcc ; BE86206A s_xor_b64 s[6:7], exec, s[6:7] ; 8886067E v_log_f32_e32 v2, v10 ; 7E04430A s_or_b64 exec, exec, s[6:7] ; 87FE067E s_load_dwordx4 s[0:3], s[2:3], 0x20 ; C00A0001 00000020 s_buffer_load_dword s23, s[16:19], 0x8 ; C02205C8 00000008 s_buffer_load_dword s52, s[16:19], 0xc ; C0220D08 0000000C v_max_f32_e32 v10, 0x358637bd, v3 ; 161406FF 358637BD v_cmp_nge_f32_e32 vcc, 0, v10 ; 7C921480 v_mov_b32_e32 v3, 0xff7fffff ; 7E0602FF FF7FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 vcc, vcc ; BEEA206A s_xor_b64 vcc, exec, vcc ; 88EA6A7E v_log_f32_e32 v3, v10 ; 7E06430A s_or_b64 exec, exec, vcc ; 87FE6A7E s_buffer_load_dword s6, s[0:3], 0x150 ; C0220180 00000150 s_buffer_load_dword s9, s[0:3], 0x160 ; C0220240 00000160 s_buffer_load_dword s11, s[0:3], 0x164 ; C02202C0 00000164 s_buffer_load_dword s12, s[0:3], 0x168 ; C0220300 00000168 s_buffer_load_dword s64, s[0:3], 0x180 ; C0221000 00000180 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s6, 3 ; D28A0016 00010606 s_buffer_load_dword s6, s[0:3], 0x154 ; C0220180 00000154 s_buffer_load_dword s65, s[0:3], 0x184 ; C0221040 00000184 s_buffer_load_dword s66, s[0:3], 0x188 ; C0221080 00000188 s_buffer_load_dword s67, s[0:3], 0x1c0 ; C02210C0 000001C0 s_buffer_load_dword s53, s[0:3], 0x1c4 ; C0220D40 000001C4 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s6, 2 ; D28A0016 00010406 s_buffer_load_dword s6, s[0:3], 0x158 ; C0220180 00000158 s_buffer_load_dword s13, s[0:3], 0x1c8 ; C0220340 000001C8 s_buffer_load_dword s8, s[0:3], 0x1cc ; C0220200 000001CC s_buffer_load_dword s72, s[0:3], 0x1d0 ; C0221200 000001D0 s_buffer_load_dword s10, s[0:3], 0x2c0 ; C0220280 000002C0 s_buffer_load_dword s54, s[0:3], 0x2c4 ; C0220D80 000002C4 s_buffer_load_dword s55, s[0:3], 0x2c8 ; C0220DC0 000002C8 s_buffer_load_dword s21, s[0:3], 0x2d0 ; C0220540 000002D0 s_buffer_load_dword s22, s[0:3], 0x2d4 ; C0220580 000002D4 s_buffer_load_dword s20, s[0:3], 0x2d8 ; C0220500 000002D8 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s6, 1 ; D28A0016 00010206 s_load_dwordx4 s[0:3], s[4:5], 0xb0 ; C00A0002 000000B0 s_load_dwordx8 s[56:63], s[4:5], 0x80 ; C00E0E02 00000080 s_buffer_load_dword s15, s[16:19], 0x0 ; C02203C8 00000000 s_buffer_load_dword s14, s[16:19], 0x4 ; C0220388 00000004 s_load_dwordx4 s[32:35], s[4:5], 0xf0 ; C00A0802 000000F0 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s0, 4 ; D28A0016 00010800 v_writelane_b32 v22, s1, 5 ; D28A0016 00010A01 v_writelane_b32 v22, s2, 6 ; D28A0016 00010C02 v_writelane_b32 v22, s3, 7 ; D28A0016 00010E03 v_writelane_b32 v22, s56, 8 ; D28A0016 00011038 v_writelane_b32 v22, s57, 9 ; D28A0016 00011239 v_writelane_b32 v22, s58, 10 ; D28A0016 0001143A v_writelane_b32 v22, s59, 11 ; D28A0016 0001163B v_writelane_b32 v22, s60, 12 ; D28A0016 0001183C v_writelane_b32 v22, s61, 13 ; D28A0016 00011A3D v_writelane_b32 v22, s62, 14 ; D28A0016 00011C3E v_writelane_b32 v22, s63, 15 ; D28A0016 00011E3F s_load_dwordx4 s[48:51], s[4:5], 0x130 ; C00A0C02 00000130 s_load_dwordx4 s[36:39], s[4:5], 0x30 ; C00A0902 00000030 s_load_dwordx8 s[40:47], s[4:5], 0x0 ; C00E0A02 00000000 s_load_dwordx8 s[24:31], s[4:5], 0x40 ; C00E0602 00000040 s_load_dwordx4 s[16:19], s[4:5], 0x70 ; C00A0402 00000070 s_load_dwordx8 s[56:63], s[4:5], 0xc0 ; C00E0E02 000000C0 s_load_dwordx8 s[0:7], s[4:5], 0x100 ; C00E0002 00000100 v_mov_b32_e32 v11, s23 ; 7E160217 v_mov_b32_e32 v10, s52 ; 7E140234 v_max_f32_e32 v12, 0x358637bd, v9 ; 161812FF 358637BD v_cmp_nge_f32_e32 vcc, 0, v12 ; 7C921880 v_mov_b32_e32 v9, 0xff7fffff ; 7E1202FF FF7FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 vcc, vcc ; BEEA206A s_xor_b64 vcc, exec, vcc ; 88EA6A7E v_log_f32_e32 v9, v12 ; 7E12430C s_or_b64 exec, exec, vcc ; 87FE6A7E v_mul_f32_e32 v17, s67, v7 ; 0A220E43 v_rcp_f32_e32 v7, v6 ; 7E0E4506 image_sample v[14:16], v[0:1], s[24:31], s[16:19] dmask:0x7 ; F0800700 00860E00 v_mac_f32_e32 v2, s72, v2 ; 2C040448 v_mac_f32_e32 v3, s72, v3 ; 2C060648 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v19, s10, v14 ; 0A261C0A v_exp_f32_e32 v2, v2 ; 7E044102 v_mul_f32_e32 v18, s53, v8 ; 0A241035 v_mov_b32_e32 v8, 0x7c207e81 ; 7E1002FF 7C207E81 v_cmp_eq_f32_e32 vcc, 0, v6 ; 7C840C80 v_cndmask_b32_e32 v6, v7, v8, vcc ; 000C1107 v_mac_f32_e32 v9, s72, v9 ; 2C121248 v_mul_f32_e32 v20, s54, v15 ; 0A281E36 v_exp_f32_e32 v3, v3 ; 7E064103 v_mul_f32_e32 v2, v2, v19 ; 0A042702 v_mul_f32_e32 v5, v6, v5 ; 0A0A0B06 v_mul_f32_e32 v4, v6, v4 ; 0A080906 v_mac_f32_e32 v2, v3, v20 ; 2C042903 v_mul_f32_e32 v21, s55, v16 ; 0A2A2037 v_exp_f32_e32 v7, v9 ; 7E0E4109 v_mov_b32_e32 v3, 0x3eaaaaab ; 7E0602FF 3EAAAAAB v_mac_f32_e32 v2, v7, v21 ; 2C042B07 v_mul_f32_e32 v7, v3, v19 ; 0A0E2703 v_mac_f32_e32 v10, s15, v5 ; 2C140A0F v_mac_f32_e32 v11, s14, v4 ; 2C16080E image_sample v[4:6], v[10:11], s[0:7], s[48:51] dmask:0x7 ; F0800700 0180040A v_mac_f32_e32 v7, v3, v20 ; 2C0E2903 image_sample v[10:12], v[10:11], s[56:63], s[32:35] dmask:0x7 ; F0800700 010E0A0A s_nop 0 ; BF800000 image_sample v[14:16], v[0:1], s[40:47], s[36:39] dmask:0x7 ; F0800700 012A0E00 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v0, s21, v14 ; 0A001C15 v_mac_f32_e32 v7, v3, v21 ; 2C0E2B03 v_mul_f32_e32 v1, s22, v15 ; 0A021E16 v_mul_f32_e32 v3, s20, v16 ; 0A062014 v_readlane_b32 s16, v22, 8 ; D2890010 00011116 v_readlane_b32 s17, v22, 9 ; D2890011 00011316 v_readlane_b32 s18, v22, 10 ; D2890012 00011516 v_readlane_b32 s19, v22, 11 ; D2890013 00011716 v_readlane_b32 s0, v22, 4 ; D2890000 00010916 v_readlane_b32 s20, v22, 12 ; D2890014 00011916 v_readlane_b32 s1, v22, 5 ; D2890001 00010B16 v_readlane_b32 s21, v22, 13 ; D2890015 00011B16 v_readlane_b32 s2, v22, 6 ; D2890002 00010D16 v_readlane_b32 s22, v22, 14 ; D2890016 00011D16 v_readlane_b32 s3, v22, 7 ; D2890003 00010F16 v_readlane_b32 s23, v22, 15 ; D2890017 00011F16 v_mad_f32 v4, v2, v0, v4 ; D1C10004 04120102 v_mad_f32 v5, v2, v1, v5 ; D1C10005 04160302 v_mac_f32_e32 v6, v2, v3 ; 2C0C0702 v_mad_f32 v8, v7, v0, v10 ; D1C10008 042A0107 v_mad_f32 v9, v7, v1, v11 ; D1C10009 042E0307 image_sample v[0:2], v[17:18], s[16:23], s[0:3] dmask:0x7 ; F0800700 00040011 v_mac_f32_e32 v12, v7, v3 ; 2C180707 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v3, v0, v0 ; 0A060100 v_mul_f32_e32 v7, v1, v1 ; 0A0E0301 v_mul_f32_e32 v11, v3, v0 ; 0A160103 v_mul_f32_e32 v10, v2, v2 ; 0A140502 v_mad_f32 v16, v7, v1, v11 ; D1C10010 042E0307 v_mac_f32_e32 v16, v10, v2 ; 2C20050A v_mad_f32 v3, -v0, v3, v16 ; D1C10003 24420700 v_mul_f32_e32 v14, v7, v1 ; 0A1C0307 v_mad_f32 v7, -v1, v7, v16 ; D1C10007 24420F01 v_mac_f32_e32 v11, s13, v3 ; 2C16060D v_mul_f32_e32 v15, v10, v2 ; 0A1E050A v_mad_f32 v10, -v2, v10, v16 ; D1C1000A 24421502 v_mul_f32_e32 v16, s9, v0 ; 0A200009 v_mul_f32_e32 v0, s64, v11 ; 0A001640 v_mac_f32_e32 v14, s13, v7 ; 2C1C0E0D v_mul_f32_e32 v17, s11, v1 ; 0A22020B v_mul_f32_e32 v1, s65, v14 ; 0A021C41 v_mac_f32_e32 v15, s13, v10 ; 2C1E140D v_mul_f32_e32 v0, s8, v0 ; 0A000008 v_readlane_b32 s0, v22, 3 ; D2890000 00010716 v_mad_f32 v0, v4, v0, s0 ; D1C10000 00020104 v_mul_f32_e32 v18, s12, v2 ; 0A24040C v_mul_f32_e32 v2, s66, v15 ; 0A041E42 v_mul_f32_e32 v1, s8, v1 ; 0A020208 v_readlane_b32 s0, v22, 2 ; D2890000 00010516 v_mad_f32 v1, v5, v1, s0 ; D1C10001 00020305 v_mul_f32_e32 v2, s8, v2 ; 0A040408 v_readlane_b32 s0, v22, 1 ; D2890000 00010316 v_mad_f32 v2, v6, v2, s0 ; D1C10002 00020506 v_mac_f32_e32 v2, v12, v18 ; 2C04250C v_mac_f32_e32 v0, v8, v16 ; 2C002108 v_mac_f32_e32 v1, v9, v17 ; 2C022309 v_readlane_b32 s10, v22, 0 ; D289000A 00010116 v_mov_b32_e32 v3, v2 ; 7E060302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Spilled VGPRs: 0 Code Size: 1356 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** [...]