[...] [traceshaders] glCreateShader(GL_VERTEX_SHADER) → shader=73 [traceshaders] glShaderSource(shader=73) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; out gl_PerVertex { vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[];}; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[63]; }; uniform cbuffer_1 { // VSOffsetConstants vec4 Const1[5]; }; layout(location = 0) in vec4 dcl_Input0; vec4 Input0; layout(location = 1) in vec4 dcl_Input1; vec4 Input1; layout(location = 2) in vec4 dcl_Input2; vec4 Input2; layout(location = 4) in vec4 dcl_Input4; vec4 Input4; layout(location = 5) in vec4 dcl_Input5; vec4 Input5; layout(location = 6) in vec4 dcl_Input6; vec4 Input6; layout(location = 7) in vec4 dcl_Input7; vec4 Input7; layout(location = 0) out vec4 VtxGeoOutput0; #define Output0 VtxGeoOutput0 layout(location = 1) out vec4 VtxGeoOutput1; #define Output1 VtxGeoOutput1 layout(location = 2) out vec4 VtxGeoOutput2; #define Output2 VtxGeoOutput2 layout(location = 3) out vec4 VtxGeoOutput3; #define Output3 VtxGeoOutput3 layout(location = 4) out vec4 VtxGeoOutput4; #define Output4 VtxGeoOutput4 #undef Output5 #define Output5 phase0_Output5 vec4 phase0_Output5; vec4 Temp[4]; ivec4 Temp_int[4]; uvec4 Temp_uint[4]; void main() { Input0 = dcl_Input0; Input1 = dcl_Input1; Input2 = dcl_Input2; Input4 = dcl_Input4; Input5 = dcl_Input5; Input6 = dcl_Input6; Input7 = dcl_Input7; Temp[0].xyz = Input1.xyz * vec3(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec3(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[1] = Input2 * vec4(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec4(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[2].xyz = Temp[0].xyz * Temp[1].yzx; Temp[0].xyz = Temp[1].xyz * Temp[0].yzx + (-Temp[2].xyz); Temp[0].xyz = Temp[1].www * Temp[0].xyz; Temp[2].xyz = Temp[1].yzx * Temp[0].xyz; Temp[2].xyz = Temp[0].zxy * Temp[1].zxy + (-Temp[2].xyz); Temp[2].xyz = Temp[1].www * Temp[2].xyz; Temp[2].yw = Temp[2].yy * Const0[1].xz; Temp[2].xy = Const0[0].xz * Temp[2].xx + Temp[2].yw; Temp[2].xy = Const0[2].xz * Temp[2].zz + Temp[2].xy; Temp[0].zw = Temp[0].zz * Const0[1].xz; Temp[0].yz = Const0[0].xz * Temp[0].yy + Temp[0].zw; Temp[0].xy = Const0[2].xz * Temp[0].xx + Temp[0].yz; Temp[2].z = Temp[0].x; Temp[3].xy = Temp[1].yy * Const0[1].xz; Temp[1].xy = Const0[0].xz * Temp[1].xx + Temp[3].xy; Temp[0].xw = Const0[2].xz * Temp[1].zz + Temp[1].xy; Output1.w = Temp[1].w * Const0[51].x; Temp[2].w = Temp[0].x; Temp[0].x = dot(Temp[2].xzw, Temp[2].xzw); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Output0.xyz = Temp[0].xxx * Temp[2].xzw; Temp[0].z = Temp[2].y; Temp[0].x = dot(Temp[0].yzw, Temp[0].yzw); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Output1.xyz = Temp[0].xxx * Temp[0].zyw; Output2.xy = Input7.xy * Const0[52].xy + Const0[52].wz; Output3.xy = Input4.xy; Output3.zw = Input5.yx; Output4.xy = Input6.xy; Output4.zw = vec2(uintBitsToFloat(0u), uintBitsToFloat(0u)); Temp[0].x = Const0[18].w * uintBitsToFloat(1067114824u) + Const0[46].w; Temp[1].xyz = Input0.xyz * Const0[57].xyz + Const0[56].xyz; Temp[0].y = Const0[49].w * Temp[1].x + Temp[0].x; Temp[0].y = Temp[0].y * uintBitsToFloat(1065353216u); Temp[0].z = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).z != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].y = Temp[0].y * uintBitsToFloat(1086918619u); Temp[0].z = Const0[50].x * Temp[1].x + Temp[0].x; Temp[0].z = Temp[0].z * uintBitsToFloat(1056964608u); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].yz = sin(Temp[0].yz); Temp[0].y = Temp[0].z + Temp[0].y; Temp[0].y = Temp[0].y * Const0[50].y; Temp[0].zw = Temp[0].xx * vec2(uintBitsToFloat(1065353216u), uintBitsToFloat(1061481552u)); Temp[0].x = Temp[0].x + Const0[48].x; Temp[2].xy = uintBitsToFloat(uvec2(greaterThanEqual(Temp[0].zwzz, (-Temp[0].zwzz)).xy) * 0xFFFFFFFFu); Temp[0].zw = fract(abs(Temp[0].zw)); Temp[0].z = (floatBitsToInt(Temp[2]).x != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].w = (floatBitsToInt(Temp[2]).y != 0) ? Temp[0].w : (-Temp[0].w); Temp[0].zw = Temp[0].zw * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].zw = sin(Temp[0].zw); Temp[0].z = Temp[0].w + Temp[0].z; Temp[0].z = Temp[0].z + uintBitsToFloat(1065353216u); Temp[0].w = Temp[1].x * Const0[49].z; Temp[0].z = Temp[0].z * Temp[0].w; Temp[2].z = Temp[0].z * uintBitsToFloat(1045220557u) + Temp[0].y; Temp[0].y = Const0[46].z * Temp[1].x + (-Temp[0].x); Temp[0].xz = Const0[48].yw * Temp[1].xx + (-Temp[0].xx); Temp[0].xyz = Temp[0].xyz * vec3(uintBitsToFloat(1045494470u), uintBitsToFloat(1055439406u), uintBitsToFloat(1065353216u)); Temp[0].w = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].y = Temp[0].y * uintBitsToFloat(1086918619u); Temp[3].xy = uintBitsToFloat(uvec2(greaterThanEqual(Temp[0].xzxx, (-Temp[0].xzxx)).xy) * 0xFFFFFFFFu); Temp[0].xz = fract(abs(Temp[0].xz)); Temp[0].x = (floatBitsToInt(Temp[3]).x != 0) ? Temp[0].x : (-Temp[0].x); Temp[0].z = (floatBitsToInt(Temp[3]).y != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].xz = Temp[0].xz * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].xyz = sin(Temp[0].xyz); Temp[0].x = Temp[0].x + Temp[0].y; Temp[0].y = Temp[0].x * Const0[49].y; Temp[0].x = abs(Temp[0].x) * Const0[48].z; Temp[2].y = Const0[49].x * Temp[0].z + Temp[0].y; Temp[0].y = Const0[46].y * uintBitsToFloat(3256877056u) + uintBitsToFloat(3240099840u); Temp[2].x = Temp[0].x * Temp[0].y; Temp[0].xyz = Temp[2].xyz * Input6.xxx; Temp[1].w = Input0.w; Temp[0].w = uintBitsToFloat(0u); Temp[0] = Temp[0] + Temp[1]; Temp[1] = Temp[0].yyyy * Const0[1]; Temp[1] = Const0[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[2] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[3] * Temp[0].wwww + Temp[1]; Temp[1] = Temp[0].yyyy * Const1[1]; Temp[1] = Const1[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const1[2] * Temp[0].zzzz + Temp[1]; Output5 = Const1[3] * Temp[0].wwww + Temp[1]; gl_Position = vec4(phase0_Output5); gl_Position.y = -gl_Position.y; gl_Position.z = gl_Position.z * 2.0 - gl_Position.w; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=73) [traceshaders] glCreateProgram() = 74 [traceshaders] glAttachShader(program=74, shader=73) [traceshaders] glLinkProgram(program=74) SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[1][0..62] DCL CONST[2][0..4] DCL TEMP[0..11], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.0000, 340282346638528859811704183484516925440.0000} IMM[1] UINT32 {0, 16, 32, 816} IMM[2] UINT32 {832, 288, 736, 912} IMM[3] FLT32 { 1.2100, 6.2832, 0.5000, 1.0000} IMM[4] UINT32 {896, 784, 4294967295, 800} IMM[5] INT32 {0, 1, 0, 0} IMM[6] FLT32 { 1.0000, 0.7692, 0.2000, -40.0000} IMM[7] UINT32 {768, 48, 1, 0} IMM[8] FLT32 { 0.2041, 0.4545, 1.0000, -10.0000} 0: MAD TEMP[0].xyz, IN[1].xyzz, IMM[0].xxxx, IMM[0].yyyy 1: MAD TEMP[1], IN[2], IMM[0].xxxx, IMM[0].yyyy 2: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[1].yzxx 3: MAD TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].yzxx, -TEMP[2].xyzz 4: MUL TEMP[0].xyz, TEMP[1].wwww, TEMP[0].xyzz 5: MUL TEMP[2].xyz, TEMP[1].yzxx, TEMP[0].xyzz 6: MAD TEMP[2].xyz, TEMP[0].zxyy, TEMP[1].zxyy, -TEMP[2].xyzz 7: MUL TEMP[2].xyz, TEMP[1].wwww, TEMP[2].xyzz 8: MUL TEMP[3].xy, TEMP[2].yyyy, CONST[1][1].xzzz 9: MAD TEMP[2].xy, CONST[1][0].xzzz, TEMP[2].xxxx, TEMP[3].xyyy 10: MAD TEMP[2].xy, CONST[1][2].xzzz, TEMP[2].zzzz, TEMP[2].xyyy 11: MUL TEMP[3].xy, TEMP[0].zzzz, CONST[1][1].xzzz 12: MAD TEMP[3].xy, CONST[1][0].xzzz, TEMP[0].yyyy, TEMP[3].xyyy 13: MAD TEMP[0].xy, CONST[1][2].xzzz, TEMP[0].xxxx, TEMP[3].xyyy 14: MOV TEMP[2].z, TEMP[0].xxxx 15: MUL TEMP[3].xy, TEMP[1].yyyy, CONST[1][1].xzzz 16: MAD TEMP[1].xy, CONST[1][0].xzzz, TEMP[1].xxxx, TEMP[3].xyyy 17: MAD TEMP[4].xy, CONST[1][2].xzzz, TEMP[1].zzzz, TEMP[1].xyyy 18: MOV TEMP[0].w, TEMP[4].xxxy 19: MUL TEMP[5].x, TEMP[1].wwww, CONST[1][51].xxxx 20: MOV TEMP[5].w, TEMP[5].xxxx 21: MOV TEMP[2].w, TEMP[4].xxxx 22: DP3 TEMP[0].x, TEMP[2].xzww, TEMP[2].xzww 23: FSLT TEMP[4].x, TEMP[0].xxxx, IMM[0].zzzz 24: UIF TEMP[4].xxxx :0 25: MOV TEMP[4].x, IMM[0].zzzz 26: ELSE :0 27: FSLT TEMP[6].x, IMM[0].zzzz, TEMP[0].xxxx 28: UIF TEMP[6].xxxx :0 29: RSQ TEMP[6].x, TEMP[0].xxxx 30: ELSE :0 31: SSG TEMP[7].x, TEMP[0].xxxx 32: MUL TEMP[6].x, IMM[0].wwww, TEMP[7].xxxx 33: ENDIF 34: MOV TEMP[4].x, TEMP[6].xxxx 35: ENDIF 36: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[2].xzww 37: MOV TEMP[0].z, TEMP[2].yyyy 38: DP3 TEMP[0].x, TEMP[0].yzww, TEMP[0].yzww 39: FSLT TEMP[6].x, TEMP[0].xxxx, IMM[0].zzzz 40: UIF TEMP[6].xxxx :0 41: MOV TEMP[6].x, IMM[0].zzzz 42: ELSE :0 43: FSLT TEMP[7].x, IMM[0].zzzz, TEMP[0].xxxx 44: UIF TEMP[7].xxxx :0 45: RSQ TEMP[7].x, TEMP[0].xxxx 46: ELSE :0 47: SSG TEMP[8].x, TEMP[0].xxxx 48: MUL TEMP[7].x, IMM[0].wwww, TEMP[8].xxxx 49: ENDIF 50: MOV TEMP[6].x, TEMP[7].xxxx 51: ENDIF 52: MUL TEMP[5].xyz, TEMP[6].xxxx, TEMP[0].zyww 53: MAD TEMP[6].xy, IN[6].xyyy, CONST[1][52].xyyy, CONST[1][52].wzzz 54: MOV TEMP[7].xy, IN[3].xyxx 55: MOV TEMP[7].zw, IN[4].xxyx 56: MOV TEMP[8].xy, IN[5].xyxx 57: MOV TEMP[8].zw, IMM[0].zzzz 58: MAD TEMP[0].x, CONST[1][18].wwww, IMM[3].xxxx, CONST[1][46].wwww 59: MAD TEMP[1].xyz, IN[0].xyzz, CONST[1][57].xyzz, CONST[1][56].xyzz 60: MAD TEMP[9].x, CONST[1][49].wwww, TEMP[1].xxxx, TEMP[0].xxxx 61: FSGE TEMP[10].x, TEMP[9].xxxx, -TEMP[9].xxxx 62: UIF TEMP[10].xxxx :0 63: MOV TEMP[10].x, IMM[4].zzzz 64: ELSE :0 65: MOV TEMP[10].x, IMM[1].xxxx 66: ENDIF 67: MOV TEMP[0].z, TEMP[10].xxxx 68: ABS TEMP[9].x, TEMP[9].xxxx 69: FRC TEMP[9].x, TEMP[9].xxxx 70: USNE TEMP[10].x, TEMP[0].zzzz, IMM[5].xxxx 71: UIF TEMP[10].xxxx :0 72: MOV TEMP[10].x, TEMP[9].xxxx 73: ELSE :0 74: MOV TEMP[10].x, -TEMP[9].xxxx 75: ENDIF 76: MUL TEMP[9].x, TEMP[10].xxxx, IMM[3].yyyy 77: MAD TEMP[10].x, CONST[1][50].xxxx, TEMP[1].xxxx, TEMP[0].xxxx 78: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz 79: FSGE TEMP[11].x, TEMP[10].xxxx, -TEMP[10].xxxx 80: UIF TEMP[11].xxxx :0 81: MOV TEMP[11].x, IMM[4].zzzz 82: ELSE :0 83: MOV TEMP[11].x, IMM[1].xxxx 84: ENDIF 85: MOV TEMP[0].w, TEMP[11].xxxx 86: ABS TEMP[10].x, TEMP[10].xxxx 87: FRC TEMP[10].x, TEMP[10].xxxx 88: USNE TEMP[11].x, TEMP[0].wwww, IMM[5].xxxx 89: UIF TEMP[11].xxxx :0 90: MOV TEMP[11].x, TEMP[10].xxxx 91: ELSE :0 92: MOV TEMP[11].x, -TEMP[10].xxxx 93: ENDIF 94: MUL TEMP[10].x, TEMP[11].xxxx, IMM[3].yyyy 95: SIN TEMP[9].x, TEMP[9].xxxx 96: SIN TEMP[9].y, TEMP[10].xxxx 97: ADD TEMP[9].x, TEMP[9].yyyy, TEMP[9].xxxx 98: MUL TEMP[9].x, TEMP[9].xxxx, CONST[1][50].yyyy 99: MUL TEMP[10].xy, TEMP[0].xxxx, IMM[6].xyyy 100: ADD TEMP[0].x, TEMP[0].xxxx, CONST[1][48].xxxx 101: FSGE TEMP[11].xy, TEMP[10].xyxx, -TEMP[10].xyxx 102: AND TEMP[11].xy, TEMP[11].xyyy, IMM[5].yyyy 103: INEG TEMP[11].xy, TEMP[11].xyyy 104: MOV TEMP[2].xy, TEMP[11].xyxx 105: ABS TEMP[10].xy, TEMP[10].xyyy 106: FRC TEMP[10].xy, TEMP[10].xyyy 107: USNE TEMP[11].x, TEMP[2].xxxx, IMM[5].xxxx 108: UIF TEMP[11].xxxx :0 109: MOV TEMP[11].x, TEMP[10].xxxx 110: ELSE :0 111: MOV TEMP[11].x, -TEMP[10].xxxx 112: ENDIF 113: MOV TEMP[0].z, TEMP[11].xxxx 114: USNE TEMP[11].x, TEMP[2].yyyy, IMM[5].xxxx 115: UIF TEMP[11].xxxx :0 116: MOV TEMP[11].x, TEMP[10].yyyy 117: ELSE :0 118: MOV TEMP[11].x, -TEMP[10].yyyy 119: ENDIF 120: MOV TEMP[0].w, TEMP[11].xxxx 121: MUL TEMP[10].xy, TEMP[0].zwww, IMM[3].yyyy 122: SIN TEMP[11].x, TEMP[10].xxxx 123: SIN TEMP[11].y, TEMP[10].yyyy 124: ADD TEMP[10].x, TEMP[11].yyyy, TEMP[11].xxxx 125: ADD TEMP[10].x, TEMP[10].xxxx, IMM[3].wwww 126: MUL TEMP[11].x, TEMP[1].xxxx, CONST[1][49].zzzz 127: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx 128: MAD TEMP[9].x, TEMP[10].xxxx, IMM[6].zzzz, TEMP[9].xxxx 129: MOV TEMP[2].z, TEMP[9].xxxx 130: MAD TEMP[9].x, CONST[1][46].zzzz, TEMP[1].xxxx, -TEMP[0].xxxx 131: MOV TEMP[0].y, TEMP[9].xxxx 132: MAD TEMP[9].xy, CONST[1][48].ywww, TEMP[1].xxxx, -TEMP[0].xxxx 133: MOV TEMP[0].xz, TEMP[9].xxyx 134: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[8].xyzz 135: FSGE TEMP[9].x, TEMP[0].yyyy, -TEMP[0].yyyy 136: UIF TEMP[9].xxxx :0 137: MOV TEMP[9].x, IMM[4].zzzz 138: ELSE :0 139: MOV TEMP[9].x, IMM[1].xxxx 140: ENDIF 141: MOV TEMP[0].w, TEMP[9].xxxx 142: ABS TEMP[9].x, TEMP[0].yyyy 143: FRC TEMP[9].x, TEMP[9].xxxx 144: USNE TEMP[10].x, TEMP[0].wwww, IMM[5].xxxx 145: UIF TEMP[10].xxxx :0 146: MOV TEMP[10].x, TEMP[9].xxxx 147: ELSE :0 148: MOV TEMP[10].x, -TEMP[9].xxxx 149: ENDIF 150: MUL TEMP[9].x, TEMP[10].xxxx, IMM[3].yyyy 151: FSGE TEMP[10].xy, TEMP[0].xzxx, -TEMP[0].xzxx 152: AND TEMP[10].xy, TEMP[10].xyyy, IMM[5].yyyy 153: INEG TEMP[10].xy, TEMP[10].xyyy 154: MOV TEMP[3].xy, TEMP[10].xyxx 155: ABS TEMP[10].xy, TEMP[0].xzzz 156: FRC TEMP[10].xy, TEMP[10].xyyy 157: USNE TEMP[11].x, TEMP[3].xxxx, IMM[5].xxxx 158: UIF TEMP[11].xxxx :0 159: MOV TEMP[11].x, TEMP[10].xxxx 160: ELSE :0 161: MOV TEMP[11].x, -TEMP[10].xxxx 162: ENDIF 163: MOV TEMP[0].x, TEMP[11].xxxx 164: USNE TEMP[3].x, TEMP[3].yyyy, IMM[5].xxxx 165: UIF TEMP[3].xxxx :0 166: MOV TEMP[3].x, TEMP[10].yyyy 167: ELSE :0 168: MOV TEMP[3].x, -TEMP[10].yyyy 169: ENDIF 170: MOV TEMP[0].z, TEMP[3].xxxx 171: MUL TEMP[3].xy, TEMP[0].xzzz, IMM[3].yyyy 172: SIN TEMP[10].x, TEMP[3].xxxx 173: SIN TEMP[10].y, TEMP[9].xxxx 174: SIN TEMP[10].z, TEMP[3].yyyy 175: ADD TEMP[0].x, TEMP[10].xxxx, TEMP[10].yyyy 176: MUL TEMP[3].x, TEMP[0].xxxx, CONST[1][49].yyyy 177: ABS TEMP[9].x, TEMP[0].xxxx 178: MUL TEMP[0].x, TEMP[9].xxxx, CONST[1][48].zzzz 179: MAD TEMP[3].x, CONST[1][49].xxxx, TEMP[10].zzzz, TEMP[3].xxxx 180: MOV TEMP[2].y, TEMP[3].xxxx 181: MAD TEMP[3].x, CONST[1][46].yyyy, IMM[6].wwww, IMM[8].wwww 182: MUL TEMP[2].x, TEMP[0].xxxx, TEMP[3].xxxx 183: MUL TEMP[0].xyz, TEMP[2].xyzz, IN[5].xxxx 184: MOV TEMP[1].w, IN[0].wwww 185: MOV TEMP[0].w, IMM[0].zzzz 186: ADD TEMP[0], TEMP[0], TEMP[1] 187: MUL TEMP[1], TEMP[0].yyyy, CONST[1][1] 188: MAD TEMP[1], CONST[1][0], TEMP[0].xxxx, TEMP[1] 189: MAD TEMP[1], CONST[1][2], TEMP[0].zzzz, TEMP[1] 190: MAD TEMP[0], CONST[1][3], TEMP[0].wwww, TEMP[1] 191: MUL TEMP[1], TEMP[0].yyyy, CONST[2][1] 192: MAD TEMP[1], CONST[2][0], TEMP[0].xxxx, TEMP[1] 193: MAD TEMP[1], CONST[2][2], TEMP[0].zzzz, TEMP[1] 194: MAD TEMP[0], CONST[2][3], TEMP[0].wwww, TEMP[1] 195: MOV TEMP[1].xw, TEMP[0].xxxw 196: MOV TEMP[1].y, -TEMP[0].yyyy 197: MAD TEMP[0].x, TEMP[0].zzzz, IMM[0].xxxx, -TEMP[0].wwww 198: MOV TEMP[1].z, TEMP[0].xxxx 199: MOV OUT[1], TEMP[4] 200: MOV OUT[2], TEMP[5] 201: MOV OUT[3], TEMP[6] 202: MOV OUT[4], TEMP[7] 203: MOV OUT[5], TEMP[8] 204: MOV OUT[0], TEMP[1] 205: END radeonsi: Compiling shader 49 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !1 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0) %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4) %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48) %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52) %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56) %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300) %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 740) %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 744) %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 748) %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 768) %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 772) %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 776) %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 780) %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 784) %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 788) %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 792) %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 796) %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 800) %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 804) %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 816) %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 832) %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 836) %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 840) %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 844) %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 896) %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 900) %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 904) %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 912) %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 916) %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 920) %63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !1 %65 = call float @llvm.SI.load.const(<16 x i8> %64, i32 0) %66 = call float @llvm.SI.load.const(<16 x i8> %64, i32 4) %67 = call float @llvm.SI.load.const(<16 x i8> %64, i32 8) %68 = call float @llvm.SI.load.const(<16 x i8> %64, i32 12) %69 = call float @llvm.SI.load.const(<16 x i8> %64, i32 16) %70 = call float @llvm.SI.load.const(<16 x i8> %64, i32 20) %71 = call float @llvm.SI.load.const(<16 x i8> %64, i32 24) %72 = call float @llvm.SI.load.const(<16 x i8> %64, i32 28) %73 = call float @llvm.SI.load.const(<16 x i8> %64, i32 32) %74 = call float @llvm.SI.load.const(<16 x i8> %64, i32 36) %75 = call float @llvm.SI.load.const(<16 x i8> %64, i32 40) %76 = call float @llvm.SI.load.const(<16 x i8> %64, i32 44) %77 = call float @llvm.SI.load.const(<16 x i8> %64, i32 48) %78 = call float @llvm.SI.load.const(<16 x i8> %64, i32 52) %79 = call float @llvm.SI.load.const(<16 x i8> %64, i32 56) %80 = call float @llvm.SI.load.const(<16 x i8> %64, i32 60) %81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !1 %83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %13) %84 = extractelement <4 x float> %83, i32 0 %85 = extractelement <4 x float> %83, i32 1 %86 = extractelement <4 x float> %83, i32 2 %87 = extractelement <4 x float> %83, i32 3 %88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !1 %90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %14) %91 = extractelement <4 x float> %90, i32 0 %92 = extractelement <4 x float> %90, i32 1 %93 = extractelement <4 x float> %90, i32 2 %94 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %95 = load <16 x i8>, <16 x i8> addrspace(2)* %94, align 16, !tbaa !1 %96 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %95, i32 0, i32 %15) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = extractelement <4 x float> %96, i32 3 %101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !1 %103 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %16) %104 = extractelement <4 x float> %103, i32 0 %105 = extractelement <4 x float> %103, i32 1 %106 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4, !amdgpu.uniform !0 %107 = load <16 x i8>, <16 x i8> addrspace(2)* %106, align 16, !tbaa !1 %108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %107, i32 0, i32 %17) %109 = extractelement <4 x float> %108, i32 0 %110 = extractelement <4 x float> %108, i32 1 %111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5, !amdgpu.uniform !0 %112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !1 %113 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %18) %114 = extractelement <4 x float> %113, i32 0 %115 = extractelement <4 x float> %113, i32 1 %116 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6, !amdgpu.uniform !0 %117 = load <16 x i8>, <16 x i8> addrspace(2)* %116, align 16, !tbaa !1 %118 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %117, i32 0, i32 %19) %119 = extractelement <4 x float> %118, i32 0 %120 = extractelement <4 x float> %118, i32 1 %121 = fmul float %91, 2.000000e+00 %122 = fadd float %121, -1.000000e+00 %123 = fmul float %92, 2.000000e+00 %124 = fadd float %123, -1.000000e+00 %125 = fmul float %93, 2.000000e+00 %126 = fadd float %125, -1.000000e+00 %127 = fmul float %97, 2.000000e+00 %128 = fadd float %127, -1.000000e+00 %129 = fmul float %98, 2.000000e+00 %130 = fadd float %129, -1.000000e+00 %131 = fmul float %99, 2.000000e+00 %132 = fadd float %131, -1.000000e+00 %133 = fmul float %100, 2.000000e+00 %134 = fadd float %133, -1.000000e+00 %135 = fmul float %122, %130 %136 = fmul float %124, %132 %137 = fmul float %126, %128 %138 = fmul float %128, %124 %139 = fsub float %138, %135 %140 = fmul float %130, %126 %141 = fsub float %140, %136 %142 = fmul float %132, %122 %143 = fsub float %142, %137 %144 = fmul float %134, %139 %145 = fmul float %134, %141 %146 = fmul float %134, %143 %147 = fmul float %130, %144 %148 = fmul float %132, %145 %149 = fmul float %128, %146 %150 = fmul float %146, %132 %151 = fsub float %150, %147 %152 = fmul float %144, %128 %153 = fsub float %152, %148 %154 = fmul float %145, %130 %155 = fsub float %154, %149 %156 = fmul float %134, %151 %157 = fmul float %134, %153 %158 = fmul float %134, %155 %159 = fmul float %157, %26 %160 = fmul float %157, %28 %161 = fmul float %22, %156 %162 = fadd float %161, %159 %163 = fmul float %24, %156 %164 = fadd float %163, %160 %165 = fmul float %30, %158 %166 = fadd float %165, %162 %167 = fmul float %32, %158 %168 = fadd float %167, %164 %169 = fmul float %146, %26 %170 = fmul float %146, %28 %171 = fmul float %22, %145 %172 = fadd float %171, %169 %173 = fmul float %24, %145 %174 = fadd float %173, %170 %175 = fmul float %30, %144 %176 = fadd float %175, %172 %177 = fmul float %32, %144 %178 = fadd float %177, %174 %179 = fmul float %130, %26 %180 = fmul float %130, %28 %181 = fmul float %22, %128 %182 = fadd float %181, %179 %183 = fmul float %24, %128 %184 = fadd float %183, %180 %185 = fmul float %30, %132 %186 = fadd float %185, %182 %187 = fmul float %32, %132 %188 = fadd float %187, %184 %189 = fmul float %134, %52 %190 = fmul float %166, %166 %191 = fmul float %176, %176 %192 = fadd float %191, %190 %193 = fmul float %186, %186 %194 = fadd float %192, %193 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %195 = fcmp ogt float %194, 0.000000e+00 br i1 %195, label %IF49, label %ELSE50 ENDIF: ; preds = %IF49, %ELSE50, %main_body %temp16.0 = phi float [ 0.000000e+00, %main_body ], [ %205, %IF49 ], [ %209, %ELSE50 ] %196 = fmul float %temp16.0, %166 %197 = fmul float %temp16.0, %176 %198 = fmul float %temp16.0, %186 %199 = fmul float %178, %178 %200 = fmul float %168, %168 %201 = fadd float %200, %199 %202 = fmul float %188, %188 %203 = fadd float %201, %202 br i1 false, label %ENDIF51, label %ELSE53 IF49: ; preds = %ELSE %204 = call float @llvm.sqrt.f32(float %194) %205 = fdiv float 1.000000e+00, %204 br label %ENDIF ELSE50: ; preds = %ELSE %206 = fcmp ogt float %194, 0.000000e+00 %207 = select i1 %206, float 1.000000e+00, float %194 %208 = fcmp oge float %207, 0.000000e+00 %.op = fmul float %207, 0x47EFFFFFE0000000 %209 = select i1 %208, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF ELSE53: ; preds = %ENDIF %210 = fcmp ogt float %203, 0.000000e+00 br i1 %210, label %IF55, label %ELSE56 ENDIF51: ; preds = %IF55, %ELSE56, %ENDIF %temp24.1 = phi float [ 0.000000e+00, %ENDIF ], [ %383, %IF55 ], [ %387, %ELSE56 ] %211 = fmul float %temp24.1, %168 %212 = fmul float %temp24.1, %178 %213 = fmul float %temp24.1, %188 %214 = fmul float %119, %53 %215 = fadd float %214, %56 %216 = fmul float %120, %54 %217 = fadd float %216, %55 %218 = fmul float %38, 0x3FF35C2900000000 %219 = fadd float %218, %41 %220 = fmul float %84, %60 %221 = fadd float %220, %57 %222 = fmul float %85, %61 %223 = fadd float %222, %58 %224 = fmul float %86, %62 %225 = fadd float %224, %59 %226 = fmul float %49, %221 %227 = fadd float %226, %219 %228 = fsub float -0.000000e+00, %227 %229 = fcmp oge float %227, %228 %230 = call float @llvm.fabs.f32(float %227) %231 = call float @llvm.floor.f32(float %230) %232 = fsub float %230, %231 %233 = fsub float -0.000000e+00, %232 %temp40.1 = select i1 %229, float %232, float %233 %234 = fmul float %temp40.1, 0x401921FB60000000 %235 = fmul float %50, %221 %236 = fadd float %235, %219 %237 = fmul float %236, 5.000000e-01 %238 = fsub float -0.000000e+00, %237 %239 = fcmp oge float %237, %238 %240 = call float @llvm.fabs.f32(float %237) %241 = call float @llvm.floor.f32(float %240) %242 = fsub float %240, %241 %243 = fsub float -0.000000e+00, %242 %temp44.1 = select i1 %239, float %242, float %243 %244 = fmul float %temp44.1, 0x401921FB60000000 %245 = call float @llvm.sin.f32(float %234) %246 = call float @llvm.sin.f32(float %244) %247 = fadd float %246, %245 %248 = fmul float %247, %51 %249 = fmul float %219, 0x3FE89D8A00000000 %250 = fadd float %219, %42 %251 = fsub float -0.000000e+00, %219 %252 = fcmp oge float %219, %251 %253 = fsub float -0.000000e+00, %249 %254 = fcmp oge float %249, %253 %255 = call float @llvm.fabs.f32(float %219) %256 = call float @llvm.fabs.f32(float %249) %257 = call float @llvm.floor.f32(float %255) %258 = fsub float %255, %257 %259 = call float @llvm.floor.f32(float %256) %260 = fsub float %256, %259 %261 = fsub float -0.000000e+00, %258 %temp44.2 = select i1 %252, float %258, float %261 %262 = fsub float -0.000000e+00, %260 %temp44.3 = select i1 %254, float %260, float %262 %263 = fmul float %temp44.2, 0x401921FB60000000 %264 = fmul float %temp44.3, 0x401921FB60000000 %265 = call float @llvm.sin.f32(float %263) %266 = call float @llvm.sin.f32(float %264) %267 = fadd float %266, %265 %268 = fadd float %267, 1.000000e+00 %269 = fmul float %221, %48 %270 = fmul float %268, %269 %271 = fmul float %270, 0x3FC99999A0000000 %272 = fadd float %271, %248 %273 = fmul float %40, %221 %274 = fsub float %273, %250 %275 = fmul float %43, %221 %276 = fsub float %275, %250 %277 = fmul float %45, %221 %278 = fsub float %277, %250 %279 = fmul float %276, 0x3FCA1F58C0000000 %280 = fmul float %274, 0x3FDD1745C0000000 %281 = fsub float -0.000000e+00, %280 %282 = fcmp oge float %280, %281 %283 = call float @llvm.fabs.f32(float %280) %284 = call float @llvm.floor.f32(float %283) %285 = fsub float %283, %284 %286 = fsub float -0.000000e+00, %285 %temp40.2 = select i1 %282, float %285, float %286 %287 = fmul float %temp40.2, 0x401921FB60000000 %288 = fsub float -0.000000e+00, %279 %289 = fcmp oge float %279, %288 %290 = fsub float -0.000000e+00, %278 %291 = fcmp oge float %278, %290 %292 = call float @llvm.fabs.f32(float %279) %293 = call float @llvm.fabs.f32(float %278) %294 = call float @llvm.floor.f32(float %292) %295 = fsub float %292, %294 %296 = call float @llvm.floor.f32(float %293) %297 = fsub float %293, %296 %298 = fsub float -0.000000e+00, %295 %temp44.4 = select i1 %289, float %295, float %298 %299 = fsub float -0.000000e+00, %297 %temp12.0 = select i1 %291, float %297, float %299 %300 = fmul float %temp44.4, 0x401921FB60000000 %301 = fmul float %temp12.0, 0x401921FB60000000 %302 = call float @llvm.sin.f32(float %300) %303 = call float @llvm.sin.f32(float %287) %304 = call float @llvm.sin.f32(float %301) %305 = fadd float %302, %303 %306 = fmul float %305, %47 %307 = call float @llvm.fabs.f32(float %305) %308 = fmul float %307, %44 %309 = fmul float %46, %304 %310 = fadd float %309, %306 %311 = fmul float %39, -4.000000e+01 %312 = fadd float %311, -1.000000e+01 %313 = fmul float %308, %312 %314 = fmul float %313, %114 %315 = fmul float %310, %114 %316 = fmul float %272, %114 %317 = fadd float %314, %221 %318 = fadd float %315, %223 %319 = fadd float %316, %225 %320 = fadd float %87, 0.000000e+00 %321 = fmul float %318, %26 %322 = fmul float %318, %27 %323 = fmul float %318, %28 %324 = fmul float %318, %29 %325 = fmul float %22, %317 %326 = fadd float %325, %321 %327 = fmul float %23, %317 %328 = fadd float %327, %322 %329 = fmul float %24, %317 %330 = fadd float %329, %323 %331 = fmul float %25, %317 %332 = fadd float %331, %324 %333 = fmul float %30, %319 %334 = fadd float %333, %326 %335 = fmul float %31, %319 %336 = fadd float %335, %328 %337 = fmul float %32, %319 %338 = fadd float %337, %330 %339 = fmul float %33, %319 %340 = fadd float %339, %332 %341 = fmul float %34, %320 %342 = fadd float %341, %334 %343 = fmul float %35, %320 %344 = fadd float %343, %336 %345 = fmul float %36, %320 %346 = fadd float %345, %338 %347 = fmul float %37, %320 %348 = fadd float %347, %340 %349 = fmul float %344, %69 %350 = fmul float %344, %70 %351 = fmul float %344, %71 %352 = fmul float %344, %72 %353 = fmul float %65, %342 %354 = fadd float %353, %349 %355 = fmul float %66, %342 %356 = fadd float %355, %350 %357 = fmul float %67, %342 %358 = fadd float %357, %351 %359 = fmul float %68, %342 %360 = fadd float %359, %352 %361 = fmul float %73, %346 %362 = fadd float %361, %354 %363 = fmul float %74, %346 %364 = fadd float %363, %356 %365 = fmul float %75, %346 %366 = fadd float %365, %358 %367 = fmul float %76, %346 %368 = fadd float %367, %360 %369 = fmul float %77, %348 %370 = fadd float %369, %362 %371 = fmul float %78, %348 %372 = fadd float %371, %364 %373 = fmul float %79, %348 %374 = fadd float %373, %366 %375 = fmul float %80, %348 %376 = fadd float %375, %368 %377 = fsub float -0.000000e+00, %372 %378 = fmul float %374, 2.000000e+00 %379 = fsub float %378, %376 %380 = bitcast i32 %11 to float %381 = insertvalue <{ float, float, float }> undef, float %380, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %196, float %197, float %198, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %211, float %212, float %213, float %189) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %215, float %217, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %104, float %105, float %110, float %109) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %114, float %115, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %370, float %377, float %379, float %376) ret <{ float, float, float }> %381 IF55: ; preds = %ELSE53 %382 = call float @llvm.sqrt.f32(float %203) %383 = fdiv float 1.000000e+00, %382 br label %ENDIF51 ELSE56: ; preds = %ELSE53 %384 = fcmp ogt float %203, 0.000000e+00 %385 = select i1 %384, float 1.000000e+00, float %203 %386 = fcmp oge float %385, 0.000000e+00 %.op89 = fmul float %385, 0x47EFFFFFE0000000 %387 = select i1 %386, float %.op89, float 0xC7EFFFFFE0000000 br label %ENDIF51 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #0 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { nounwind readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=74, shader=73) [traceshaders] glDeleteShader(shader=73) [...] [traceshaders] glCreateShader(GL_FRAGMENT_SHADER) → shader=89 [traceshaders] glShaderSource(shader=89) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[63]; }; uniform sampler2D tex0_X_smp0; uniform sampler2D tex0; layout(location = 0) in vec4 VtxGeoOutput0; vec4 Input0; layout(location = 1) in vec4 VtxGeoOutput1; vec4 Input1; layout(location = 2) in vec4 VtxGeoOutput2; vec4 Input2; layout(location = 0) out vec4 PixOutput0; #define Output0 PixOutput0 layout(location = 1) out vec4 PixOutput1; #define Output1 PixOutput1 vec4 Temp[2]; ivec4 Temp_int[2]; uvec4 Temp_uint[2]; void main() { Input0 = VtxGeoOutput0; Input1 = VtxGeoOutput1; Input2 = VtxGeoOutput2; Temp[0].x = Input0.x * Input1.y; Temp[0].x = Input1.x * Input0.y + (-Temp[0].x); Temp[0].y = Temp[0].x * Input1.w; Temp[0].x = Input0.z; Temp[0].z = Input1.z; Temp[0].w = dot(Temp[0].xyz, Temp[0].xyz); Temp[0].w = ( ( Temp[0].w < 0.0 ) ? 0.0 : ( ( Temp[0].w > 0.0 ) ? inversesqrt( Temp[0].w ) : ( 3.4028235E+38 * sign( Temp[0].w ) ) ) ); Temp[0].xyz = Temp[0].www * Temp[0].xyz; Output0.xyz = Temp[0].xyz * vec3(uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u)) + vec3(uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u), uintBitsToFloat(1056964608u)); Output0.xyz = clamp(Output0.xyz, 0.0, 1.0); Temp[0].x = Const0[45].x + uintBitsToFloat(3225419776u); Temp[0].x = Temp[0].x * uintBitsToFloat(989855744u); Temp[0].x = clamp(Temp[0].x, 0.0, 1.0); Output0.w = ( ( ( Temp[0].x ) < 0.0 ) ? 0.0 : sqrt( Temp[0].x ) ); Temp[0] = texture(tex0_X_smp0, Input2.xy); Temp[1] = Temp[0].yyyy * Const0[59]; Temp[1] = Const0[58] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[60] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[61] * Temp[0].wwww + Temp[1]; Output1 = Temp[0] + Const0[62]; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=89) [traceshaders] glCreateProgram() = 90 [traceshaders] glAttachShader(program=90, shader=89) [traceshaders] glLinkProgram(program=90) SHADER KEY prolog.color_two_side = 0 prolog.poly_stipple = 0 prolog.force_persample_interp = 0 epilog.spi_shader_col_format = 0x0 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 0 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SVIEW[0], 2D, FLOAT DCL CONST[1][0..62] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 340282346638528859811704183484516925440.0000, 0.5000, -3.0000} IMM[1] UINT32 {0, 720, 944, 928} IMM[2] FLT32 { 0.0020, 0.0000, 0.0000, 0.0000} IMM[3] UINT32 {960, 976, 992, 0} 0: MUL TEMP[0].x, IN[0].xxxx, IN[1].yyyy 1: MAD TEMP[0].x, IN[1].xxxx, IN[0].yyyy, -TEMP[0].xxxx 2: MUL TEMP[1].x, TEMP[0].xxxx, IN[1].wwww 3: MOV TEMP[0].y, TEMP[1].xxxx 4: MOV TEMP[0].x, IN[0].zzzz 5: MOV TEMP[0].z, IN[1].zzzz 6: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 7: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[0].xxxx 8: UIF TEMP[2].xxxx :0 9: MOV TEMP[2].x, IMM[0].xxxx 10: ELSE :0 11: FSLT TEMP[3].x, IMM[0].xxxx, TEMP[1].xxxx 12: UIF TEMP[3].xxxx :0 13: RSQ TEMP[3].x, TEMP[1].xxxx 14: ELSE :0 15: SSG TEMP[1].x, TEMP[1].xxxx 16: MUL TEMP[3].x, IMM[0].yyyy, TEMP[1].xxxx 17: ENDIF 18: MOV TEMP[2].x, TEMP[3].xxxx 19: ENDIF 20: MUL TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz 21: MAD TEMP[1].xyz, TEMP[0].xyzz, IMM[0].zzzz, IMM[0].zzzz 22: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz 23: ADD TEMP[0].x, CONST[1][45].xxxx, IMM[0].wwww 24: MUL TEMP[0].x, TEMP[0].xxxx, IMM[2].xxxx 25: MOV_SAT TEMP[0].x, TEMP[0].xxxx 26: FSLT TEMP[2].x, TEMP[0].xxxx, IMM[0].xxxx 27: UIF TEMP[2].xxxx :0 28: MOV TEMP[2].x, IMM[0].xxxx 29: ELSE :0 30: SQRT TEMP[2].x, TEMP[0].xxxx 31: ENDIF 32: MOV TEMP[1].w, TEMP[2].xxxx 33: MOV TEMP[2].xy, IN[2].xyyy 34: TEX TEMP[2], TEMP[2], SAMP[0], 2D 35: MUL TEMP[3], TEMP[2].yyyy, CONST[1][59] 36: MAD TEMP[3], CONST[1][58], TEMP[2].xxxx, TEMP[3] 37: MAD TEMP[3], CONST[1][60], TEMP[2].zzzz, TEMP[3] 38: MAD TEMP[0], CONST[1][61], TEMP[2].wwww, TEMP[3] 39: ADD TEMP[0], TEMP[0], CONST[1][62] 40: MOV OUT[0], TEMP[1] 41: MOV OUT[1], TEMP[0] 42: END radeonsi: Compiling shader 57 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !1 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 720) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 928) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 932) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 936) %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 940) %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 944) %31 = call float @llvm.SI.load.const(<16 x i8> %24, i32 948) %32 = call float @llvm.SI.load.const(<16 x i8> %24, i32 952) %33 = call float @llvm.SI.load.const(<16 x i8> %24, i32 956) %34 = call float @llvm.SI.load.const(<16 x i8> %24, i32 960) %35 = call float @llvm.SI.load.const(<16 x i8> %24, i32 964) %36 = call float @llvm.SI.load.const(<16 x i8> %24, i32 968) %37 = call float @llvm.SI.load.const(<16 x i8> %24, i32 972) %38 = call float @llvm.SI.load.const(<16 x i8> %24, i32 976) %39 = call float @llvm.SI.load.const(<16 x i8> %24, i32 980) %40 = call float @llvm.SI.load.const(<16 x i8> %24, i32 984) %41 = call float @llvm.SI.load.const(<16 x i8> %24, i32 988) %42 = call float @llvm.SI.load.const(<16 x i8> %24, i32 992) %43 = call float @llvm.SI.load.const(<16 x i8> %24, i32 996) %44 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1000) %45 = call float @llvm.SI.load.const(<16 x i8> %24, i32 1004) %46 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !1 %48 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %49 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %48, i64 0, i64 3, !amdgpu.uniform !0 %50 = load <4 x i32>, <4 x i32> addrspace(2)* %49, align 16, !tbaa !1 %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %53 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %6, <2 x i32> %8) %54 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %55 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %56 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %6, <2 x i32> %8) %57 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %6, <2 x i32> %8) %58 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %59 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %60 = fmul float %51, %55 %61 = fmul float %54, %52 %62 = fsub float %61, %60 %63 = fmul float %62, %57 %64 = fmul float %53, %53 %65 = fmul float %63, %63 %66 = fadd float %65, %64 %67 = fmul float %56, %56 %68 = fadd float %66, %67 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %69 = fcmp ogt float %68, 0.000000e+00 br i1 %69, label %IF17, label %ELSE18 ENDIF: ; preds = %IF17, %ELSE18, %main_body %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %140, %IF17 ], [ %144, %ELSE18 ] %70 = fmul float %temp8.0, %53 %71 = fmul float %temp8.0, %63 %72 = fmul float %temp8.0, %56 %73 = fmul float %70, 5.000000e-01 %74 = fadd float %73, 5.000000e-01 %75 = fmul float %71, 5.000000e-01 %76 = fadd float %75, 5.000000e-01 %77 = fmul float %72, 5.000000e-01 %78 = fadd float %77, 5.000000e-01 %79 = call float @llvm.AMDGPU.clamp.(float %74, float 0.000000e+00, float 1.000000e+00) %80 = call float @llvm.AMDGPU.clamp.(float %76, float 0.000000e+00, float 1.000000e+00) %81 = call float @llvm.AMDGPU.clamp.(float %78, float 0.000000e+00, float 1.000000e+00) %82 = fadd float %25, -3.000000e+00 %83 = fmul float %82, 1.953125e-03 %84 = call float @llvm.AMDGPU.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) %85 = fcmp olt float %84, 0.000000e+00 %86 = call float @llvm.sqrt.f32(float %84) %temp8.1 = select i1 %85, float 0.000000e+00, float %86 %87 = bitcast float %58 to i32 %88 = bitcast float %59 to i32 %89 = insertelement <2 x i32> undef, i32 %87, i32 0 %90 = insertelement <2 x i32> %89, i32 %88, i32 1 %91 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %90, <8 x i32> %47, <4 x i32> %50, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = extractelement <4 x float> %91, i32 3 %96 = fmul float %93, %30 %97 = fmul float %93, %31 %98 = fmul float %93, %32 %99 = fmul float %93, %33 %100 = fmul float %26, %92 %101 = fadd float %100, %96 %102 = fmul float %27, %92 %103 = fadd float %102, %97 %104 = fmul float %28, %92 %105 = fadd float %104, %98 %106 = fmul float %29, %92 %107 = fadd float %106, %99 %108 = fmul float %34, %94 %109 = fadd float %108, %101 %110 = fmul float %35, %94 %111 = fadd float %110, %103 %112 = fmul float %36, %94 %113 = fadd float %112, %105 %114 = fmul float %37, %94 %115 = fadd float %114, %107 %116 = fmul float %38, %95 %117 = fadd float %116, %109 %118 = fmul float %39, %95 %119 = fadd float %118, %111 %120 = fmul float %40, %95 %121 = fadd float %120, %113 %122 = fmul float %41, %95 %123 = fadd float %122, %115 %124 = fadd float %117, %42 %125 = fadd float %119, %43 %126 = fadd float %121, %44 %127 = fadd float %123, %45 %128 = bitcast float %5 to i32 %129 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %128, 10 %130 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %129, float %79, 11 %131 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %130, float %80, 12 %132 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %131, float %81, 13 %133 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %132, float %temp8.1, 14 %134 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %133, float %124, 15 %135 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %134, float %125, 16 %136 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %135, float %126, 17 %137 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %136, float %127, 18 %138 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %137, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %138 IF17: ; preds = %ELSE %139 = call float @llvm.sqrt.f32(float %68) %140 = fdiv float 1.000000e+00, %139 br label %ENDIF ELSE18: ; preds = %ELSE %141 = fcmp ogt float %68, 0.000000e+00 %142 = select i1 %141, float 1.000000e+00, float %68 %143 = fcmp oge float %142, 0.000000e+00 %.op = fmul float %142, 0x47EFFFFFE0000000 %144 = select i1 %143, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=90, shader=89) [traceshaders] glDeleteShader(shader=89) [...] [traceshaders] glCreateShader(GL_FRAGMENT_SHADER) → shader=381 [traceshaders] glShaderSource(shader=381) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[46]; }; uniform cbuffer_2 { // PSOffsetConstants vec4 Const2[3]; }; uniform sampler2D tex0_X_smp0; uniform sampler2D tex0; uniform sampler2D tex1_X_smp1; uniform sampler2D tex1; uniform sampler2D tex2_X_smp2; uniform sampler2D tex2; uniform sampler2D tex3_X_smp3; uniform sampler2D tex3; uniform sampler2D tex4_X_smp4; uniform sampler2D tex4; layout(location = 0) in vec4 VtxGeoOutput0; vec4 Input0; layout(location = 1) in vec4 VtxGeoOutput1; vec4 Input1; layout(location = 3) in vec4 VtxGeoOutput3; vec4 Input3; layout(location = 4) in vec4 VtxGeoOutput4; vec4 Input4; layout(location = 0) out vec4 PixOutput0; #define Output0 PixOutput0 vec4 Temp[5]; ivec4 Temp_int[5]; uvec4 Temp_uint[5]; void main() { Input0 = VtxGeoOutput0; Input1 = VtxGeoOutput1; Input3 = VtxGeoOutput3; Input4 = VtxGeoOutput4; Temp[0].x = dot(Input4.xyz, Input4.xyz); Temp[0].x = ( ( Temp[0].x < 0.0 ) ? 0.0 : ( ( Temp[0].x > 0.0 ) ? inversesqrt( Temp[0].x ) : ( 3.4028235E+38 * sign( Temp[0].x ) ) ) ); Temp[0].xyz = Temp[0].xxx * Input4.xyz; Temp[0].xyz = Temp[0].zzz * vec3(uintBitsToFloat(0u), uintBitsToFloat(0u), uintBitsToFloat(1073741824u)) + (-Temp[0].xyz); Temp[1].x = dot(Temp[0].yz, vec2(uintBitsToFloat(1062274540u), uintBitsToFloat(1058262330u))); Temp[1].x = clamp(Temp[1].x, 0.0, 1.0); Temp[1].y = dot(Temp[0].zxy, vec3(uintBitsToFloat(1058262330u), uintBitsToFloat(3207922931u), uintBitsToFloat(3201369580u))); Temp[1].y = clamp(Temp[1].y, 0.0, 1.0); Temp[1].z = dot(Temp[0].xyz, vec3(uintBitsToFloat(1060439283u), uintBitsToFloat(3201369580u), uintBitsToFloat(1058262330u))); Temp[1].z = clamp(Temp[1].z, 0.0, 1.0); Temp[0].xyz = max(Temp[1].xyz, vec3(uintBitsToFloat(897988541u), uintBitsToFloat(897988541u), uintBitsToFloat(897988541u))); InstrHelper.x = ( ( Temp[0].x ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].x ); InstrHelper.y = ( ( Temp[0].y ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].y ); InstrHelper.z = ( ( Temp[0].z ) <= 0.0 ) ? -3.4028235E+38 : log2( Temp[0].z ); Temp[0].xyz = vec3(InstrHelper.xyz); Temp[0].w = Const0[29].x + uintBitsToFloat(1065353216u); Temp[0].xyz = Temp[0].xyz * Temp[0].www; Temp[0].xyz = exp2(Temp[0].xyz); Temp[1].xyz = texture(tex1_X_smp1, Input0.xy).xyz; Temp[1].xyz = Temp[1].xyz * Const0[44].xyz; Temp[0].x = dot(Temp[1].xyz, Temp[0].xyz); Temp[0].y = dot(Temp[1].xyz, vec3(uintBitsToFloat(1051372203u), uintBitsToFloat(1051372203u), uintBitsToFloat(1051372203u))); InstrHelper.z = ( Input3.x ) / ( ( Input3.w == 0.0 ) ? 3.0E-37 : ( Input3.w ) ); InstrHelper.w = ( Input3.y ) / ( ( Input3.w == 0.0 ) ? 3.0E-37 : ( Input3.w ) ); Temp[0].zw = vec2(InstrHelper.zw); Temp[0].zw = Temp[0].zw * Const2[0].xy + Const2[0].wz; Temp[1].xyz = texture(tex4_X_smp4, Temp[0].zw).xyz; Temp[2].xyz = texture(tex3_X_smp3, Temp[0].zw).xyz; Temp[3].xyz = texture(tex0_X_smp0, Input0.xy).xyz; Temp[3] = Temp[3].xyzz * Const0[45].xyzz; Temp[1] = Temp[3] * Temp[0].xxxx + Temp[1].xyzz; Temp[0] = Temp[3].xyww * Temp[0].yyyy + Temp[2].xyzz; Temp[2].xy = Input1.xy * Const0[28].xy; Temp[2].xyz = texture(tex2_X_smp2, Temp[2].xy).xyz; Temp[3] = Temp[2].xyzz * Temp[2].xyzz; Temp[4] = Temp[2].xyzz * Temp[3]; Temp[2].w = dot(Temp[4].xyw, vec3(uintBitsToFloat(1065353216u), uintBitsToFloat(1065353216u), uintBitsToFloat(1065353216u))); Temp[3] = (-Temp[3].xyww) * Temp[2].xyzz + Temp[2].wwww; Temp[2] = Temp[2].xyzz * Const0[22].xyzz; Temp[3] = Const0[28].zzzz * Temp[3] + Temp[4]; Temp[3] = Temp[3] * Const0[24].xyzz; Temp[3] = Temp[3] * Const0[28].wwww; Temp[1] = Temp[1] * Temp[3]; Temp[0] = Temp[2] * Temp[0] + Temp[1]; Output0 = Temp[0] + Const0[21].xyzz; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=381) [traceshaders] glCreateProgram() = 382 [traceshaders] glAttachShader(program=382, shader=381) [traceshaders] glLinkProgram(program=382) SHADER KEY prolog.color_two_side = 0 prolog.poly_stipple = 0 prolog.force_persample_interp = 0 epilog.spi_shader_col_format = 0x0 epilog.color_is_int8 = 0x0 epilog.last_cbuf = 0 epilog.alpha_func = 0 epilog.alpha_to_one = 0 epilog.poly_line_smoothing = 0 epilog.clamp_color = 0 FRAG DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[3], PERSPECTIVE DCL IN[3], GENERIC[4], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SVIEW[0], 2D, FLOAT DCL SVIEW[1], 2D, FLOAT DCL SVIEW[2], 2D, FLOAT DCL SVIEW[3], 2D, FLOAT DCL SVIEW[4], 2D, FLOAT DCL CONST[1][0..2] DCL CONST[2][0..45] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 340282346638528859811704183484516925440.0000, 2.0000, 0.0000} IMM[1] FLT32 { 0.8165, 0.5774, -0.7071, -0.4082} IMM[2] FLT32 { 0.7071, -0.4082, 0.5774, -340282346638528859811704183484516925440.0000} IMM[3] UINT32 {1, 464, 704, 0} IMM[4] FLT32 { 1.0000, 0.3333, 0.0000, 0.0000} IMM[5] UINT32 {720, 448, 352, 384} IMM[6] UINT32 {336, 0, 0, 0} 0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz 1: FSLT TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MOV TEMP[1].x, IMM[0].xxxx 4: ELSE :0 5: FSLT TEMP[2].x, IMM[0].xxxx, TEMP[0].xxxx 6: UIF TEMP[2].xxxx :0 7: RSQ TEMP[2].x, TEMP[0].xxxx 8: ELSE :0 9: SSG TEMP[3].x, TEMP[0].xxxx 10: MUL TEMP[2].x, IMM[0].yyyy, TEMP[3].xxxx 11: ENDIF 12: MOV TEMP[1].x, TEMP[2].xxxx 13: ENDIF 14: MUL TEMP[0].xyz, TEMP[1].xxxx, IN[3].xyzz 15: MAD TEMP[0].xyz, TEMP[0].zzzz, IMM[0].xxzz, -TEMP[0].xyzz 16: DP2 TEMP[1].x, TEMP[0].yzzz, IMM[1].xyyy 17: MOV_SAT TEMP[1].x, TEMP[1].xxxx 18: DP3 TEMP[2].x, TEMP[0].zxyy, IMM[1].yzww 19: MOV_SAT TEMP[2].x, TEMP[2].xxxx 20: MOV TEMP[1].y, TEMP[2].xxxx 21: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[2].xyzz 22: MOV_SAT TEMP[2].x, TEMP[2].xxxx 23: MOV TEMP[1].z, TEMP[2].xxxx 24: MAX TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww 25: FSGE TEMP[2].x, IMM[0].xxxx, TEMP[0].xxxx 26: UIF TEMP[2].xxxx :0 27: MOV TEMP[2].x, IMM[2].wwww 28: ELSE :0 29: LG2 TEMP[2].x, TEMP[0].xxxx 30: ENDIF 31: MOV TEMP[2].x, TEMP[2].xxxx 32: FSGE TEMP[3].x, IMM[0].xxxx, TEMP[0].yyyy 33: UIF TEMP[3].xxxx :0 34: MOV TEMP[3].x, IMM[2].wwww 35: ELSE :0 36: LG2 TEMP[3].x, TEMP[0].yyyy 37: ENDIF 38: MOV TEMP[2].y, TEMP[3].xxxx 39: FSGE TEMP[3].x, IMM[0].xxxx, TEMP[0].zzzz 40: UIF TEMP[3].xxxx :0 41: MOV TEMP[3].x, IMM[2].wwww 42: ELSE :0 43: LG2 TEMP[3].x, TEMP[0].zzzz 44: ENDIF 45: MOV TEMP[2].z, TEMP[3].xxxx 46: ADD TEMP[3].x, CONST[2][29].xxxx, IMM[4].xxxx 47: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[3].xxxx 48: EX2 TEMP[3].x, TEMP[0].xxxx 49: EX2 TEMP[3].y, TEMP[0].yyyy 50: EX2 TEMP[3].z, TEMP[0].zzzz 51: MOV TEMP[4].xy, IN[0].xyyy 52: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D 53: MUL TEMP[1].xyz, TEMP[4].xyzz, CONST[2][44].xyzz 54: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[3].xyzz 55: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[4].yyyy 56: FSEQ TEMP[4].x, IN[2].wwww, IMM[0].xxxx 57: UIF TEMP[4].xxxx :0 58: MOV TEMP[4].x, IMM[4].zzzz 59: ELSE :0 60: MOV TEMP[4].x, IN[2].wwww 61: ENDIF 62: RCP TEMP[4].x, TEMP[4].xxxx 63: MUL TEMP[4].x, IN[2].xxxx, TEMP[4].xxxx 64: MOV TEMP[2].z, TEMP[4].xxxx 65: FSEQ TEMP[4].x, IN[2].wwww, IMM[0].xxxx 66: UIF TEMP[4].xxxx :0 67: MOV TEMP[4].x, IMM[4].zzzz 68: ELSE :0 69: MOV TEMP[4].x, IN[2].wwww 70: ENDIF 71: RCP TEMP[4].x, TEMP[4].xxxx 72: MUL TEMP[4].x, IN[2].yyyy, TEMP[4].xxxx 73: MOV TEMP[2].w, TEMP[4].xxxx 74: MAD TEMP[2].xy, TEMP[2].zwww, CONST[1][0].xyyy, CONST[1][0].wzzz 75: MOV TEMP[4].xy, TEMP[2].xyyy 76: TEX TEMP[4].xyz, TEMP[4], SAMP[4], 2D 77: MOV TEMP[2].xy, TEMP[2].xyyy 78: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D 79: MOV TEMP[5].xy, IN[0].xyyy 80: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D 81: MUL TEMP[5], TEMP[5].xyzz, CONST[2][45].xyzz 82: MAD TEMP[1], TEMP[5], TEMP[0].xxxx, TEMP[4].xyzz 83: MAD TEMP[0], TEMP[5].xyww, TEMP[3].xxxx, TEMP[2].xyzz 84: MUL TEMP[2].xy, IN[1].xyyy, CONST[2][28].xyyy 85: MOV TEMP[3].xy, TEMP[2].xyyy 86: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D 87: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].xyzz 88: MUL TEMP[4], TEMP[3].xyzz, TEMP[5] 89: DP3 TEMP[6].x, TEMP[4].xyww, IMM[4].xxxx 90: MAD TEMP[5], -TEMP[5].xyww, TEMP[3].xyzz, TEMP[6].xxxx 91: MUL TEMP[2], TEMP[3].xyzz, CONST[2][22].xyzz 92: MAD TEMP[5], CONST[2][28].zzzz, TEMP[5], TEMP[4] 93: MUL TEMP[5], TEMP[5], CONST[2][24].xyzz 94: MUL TEMP[5], TEMP[5], CONST[2][28].wwww 95: MUL TEMP[1], TEMP[1], TEMP[5] 96: MAD TEMP[0], TEMP[2], TEMP[0], TEMP[1] 97: ADD TEMP[0], TEMP[0], CONST[2][21].xyzz 98: MOV OUT[0], TEMP[0] 99: END radeonsi: Compiling shader 208 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 { main_body: %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !tbaa !1 %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0) %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 4) %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 8) %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 12) %29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !1 %31 = call float @llvm.SI.load.const(<16 x i8> %30, i32 336) %32 = call float @llvm.SI.load.const(<16 x i8> %30, i32 340) %33 = call float @llvm.SI.load.const(<16 x i8> %30, i32 344) %34 = call float @llvm.SI.load.const(<16 x i8> %30, i32 352) %35 = call float @llvm.SI.load.const(<16 x i8> %30, i32 356) %36 = call float @llvm.SI.load.const(<16 x i8> %30, i32 360) %37 = call float @llvm.SI.load.const(<16 x i8> %30, i32 384) %38 = call float @llvm.SI.load.const(<16 x i8> %30, i32 388) %39 = call float @llvm.SI.load.const(<16 x i8> %30, i32 392) %40 = call float @llvm.SI.load.const(<16 x i8> %30, i32 448) %41 = call float @llvm.SI.load.const(<16 x i8> %30, i32 452) %42 = call float @llvm.SI.load.const(<16 x i8> %30, i32 456) %43 = call float @llvm.SI.load.const(<16 x i8> %30, i32 460) %44 = call float @llvm.SI.load.const(<16 x i8> %30, i32 464) %45 = call float @llvm.SI.load.const(<16 x i8> %30, i32 704) %46 = call float @llvm.SI.load.const(<16 x i8> %30, i32 708) %47 = call float @llvm.SI.load.const(<16 x i8> %30, i32 712) %48 = call float @llvm.SI.load.const(<16 x i8> %30, i32 720) %49 = call float @llvm.SI.load.const(<16 x i8> %30, i32 724) %50 = call float @llvm.SI.load.const(<16 x i8> %30, i32 728) %51 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0 %52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !1 %53 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %54 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %53, i64 0, i64 3, !amdgpu.uniform !0 %55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !1 %56 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 2, !amdgpu.uniform !0 %57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !1 %58 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %59 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %58, i64 0, i64 7, !amdgpu.uniform !0 %60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !1 %61 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 4, !amdgpu.uniform !0 %62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !1 %63 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %64 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %63, i64 0, i64 11, !amdgpu.uniform !0 %65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !1 %66 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 6, !amdgpu.uniform !0 %67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !1 %68 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %69 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %68, i64 0, i64 15, !amdgpu.uniform !0 %70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !1 %71 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 8, !amdgpu.uniform !0 %72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !1 %73 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)* %74 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %73, i64 0, i64 19, !amdgpu.uniform !0 %75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !1 %76 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %8) %77 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %8) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %6, <2 x i32> %8) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %6, <2 x i32> %8) %80 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %6, <2 x i32> %8) %81 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %6, <2 x i32> %8) %82 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %6, <2 x i32> %8) %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %6, <2 x i32> %8) %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %6, <2 x i32> %8) %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %6, <2 x i32> %8) %86 = fmul float %83, %83 %87 = fmul float %84, %84 %88 = fadd float %87, %86 %89 = fmul float %85, %85 %90 = fadd float %88, %89 br i1 false, label %ENDIF, label %ELSE ELSE: ; preds = %main_body %91 = fcmp ogt float %90, 0.000000e+00 br i1 %91, label %IF29, label %ELSE30 ENDIF: ; preds = %IF29, %ELSE30, %main_body %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %122, %IF29 ], [ %126, %ELSE30 ] %92 = fmul float %temp4.0, %83 %93 = fmul float %temp4.0, %84 %94 = fmul float %temp4.0, %85 %95 = fmul float %94, 0.000000e+00 %96 = fsub float %95, %92 %97 = fmul float %94, 0.000000e+00 %98 = fsub float %97, %93 %99 = fmul float %94, 2.000000e+00 %100 = fsub float %99, %94 %101 = fmul float %98, 0x3FEA20BD80000000 %102 = fmul float %100, 0x3FE279A740000000 %103 = fadd float %101, %102 %104 = call float @llvm.AMDGPU.clamp.(float %103, float 0.000000e+00, float 1.000000e+00) %105 = fmul float %100, 0x3FE279A740000000 %106 = fmul float %96, 0xBFE6A09E60000000 %107 = fadd float %106, %105 %108 = fmul float %98, 0xBFDA20BD80000000 %109 = fadd float %107, %108 %110 = call float @llvm.AMDGPU.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) %111 = fmul float %96, 0x3FE6A09E60000000 %112 = fmul float %98, 0xBFDA20BD80000000 %113 = fadd float %112, %111 %114 = fmul float %100, 0x3FE279A740000000 %115 = fadd float %113, %114 %116 = call float @llvm.AMDGPU.clamp.(float %115, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.maxnum.f32(float %104, float 0x3EB0C6F7A0000000) %118 = call float @llvm.maxnum.f32(float %110, float 0x3EB0C6F7A0000000) %119 = call float @llvm.maxnum.f32(float %116, float 0x3EB0C6F7A0000000) %120 = fcmp ugt float %117, 0.000000e+00 br i1 %120, label %ELSE33, label %ENDIF31 IF29: ; preds = %ELSE %121 = call float @llvm.sqrt.f32(float %90) %122 = fdiv float 1.000000e+00, %121 br label %ENDIF ELSE30: ; preds = %ELSE %123 = fcmp ogt float %90, 0.000000e+00 %124 = select i1 %123, float 1.000000e+00, float %90 %125 = fcmp oge float %124, 0.000000e+00 %.op = fmul float %124, 0x47EFFFFFE0000000 %126 = select i1 %125, float %.op, float 0xC7EFFFFFE0000000 br label %ENDIF ELSE33: ; preds = %ENDIF %127 = call float @llvm.log2.f32(float %117) br label %ENDIF31 ENDIF31: ; preds = %ENDIF, %ELSE33 %temp8.1 = phi float [ %127, %ELSE33 ], [ 0xC7EFFFFFE0000000, %ENDIF ] %128 = fcmp ugt float %118, 0.000000e+00 br i1 %128, label %ELSE36, label %ENDIF34 ELSE36: ; preds = %ENDIF31 %129 = call float @llvm.log2.f32(float %118) br label %ENDIF34 ENDIF34: ; preds = %ENDIF31, %ELSE36 %temp12.0 = phi float [ %129, %ELSE36 ], [ 0xC7EFFFFFE0000000, %ENDIF31 ] %130 = fcmp ugt float %119, 0.000000e+00 br i1 %130, label %ELSE39, label %ENDIF37 ELSE39: ; preds = %ENDIF34 %131 = call float @llvm.log2.f32(float %119) br label %ENDIF37 ENDIF37: ; preds = %ENDIF34, %ELSE39 %temp12.1 = phi float [ %131, %ELSE39 ], [ 0xC7EFFFFFE0000000, %ENDIF34 ] %132 = fadd float %44, 1.000000e+00 %133 = fmul float %temp8.1, %132 %134 = fmul float %temp12.0, %132 %135 = fmul float %temp12.1, %132 %136 = call float @llvm.exp2.f32(float %133) %137 = call float @llvm.exp2.f32(float %134) %138 = call float @llvm.exp2.f32(float %135) %139 = bitcast float %76 to i32 %140 = bitcast float %77 to i32 %141 = insertelement <2 x i32> undef, i32 %139, i32 0 %142 = insertelement <2 x i32> %141, i32 %140, i32 1 %143 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %142, <8 x i32> %57, <4 x i32> %60, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %144 = extractelement <4 x float> %143, i32 0 %145 = extractelement <4 x float> %143, i32 1 %146 = extractelement <4 x float> %143, i32 2 %147 = fmul float %144, %45 %148 = fmul float %145, %46 %149 = fmul float %146, %47 %150 = fmul float %147, %136 %151 = fmul float %148, %137 %152 = fadd float %151, %150 %153 = fmul float %149, %138 %154 = fadd float %152, %153 %155 = fmul float %147, 0x3FD5555560000000 %156 = fmul float %148, 0x3FD5555560000000 %157 = fadd float %156, %155 %158 = fmul float %149, 0x3FD5555560000000 %159 = fadd float %157, %158 %160 = fcmp oeq float %82, 0.000000e+00 %.op46 = fdiv float 1.000000e+00, %82 %161 = select i1 %160, float 0x47840FD020000000, float %.op46 %162 = fmul float %80, %161 %163 = fcmp oeq float %82, 0.000000e+00 %.op47 = fdiv float 1.000000e+00, %82 %164 = select i1 %163, float 0x47840FD020000000, float %.op47 %165 = fmul float %81, %164 %166 = fmul float %162, %25 %167 = fadd float %166, %28 %168 = fmul float %165, %26 %169 = fadd float %168, %27 %170 = bitcast float %167 to i32 %171 = bitcast float %169 to i32 %172 = insertelement <2 x i32> undef, i32 %170, i32 0 %173 = insertelement <2 x i32> %172, i32 %171, i32 1 %174 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %173, <8 x i32> %72, <4 x i32> %75, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = bitcast float %167 to i32 %179 = bitcast float %169 to i32 %180 = insertelement <2 x i32> undef, i32 %178, i32 0 %181 = insertelement <2 x i32> %180, i32 %179, i32 1 %182 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %181, <8 x i32> %67, <4 x i32> %70, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %183 = extractelement <4 x float> %182, i32 0 %184 = extractelement <4 x float> %182, i32 1 %185 = extractelement <4 x float> %182, i32 2 %186 = bitcast float %76 to i32 %187 = bitcast float %77 to i32 %188 = insertelement <2 x i32> undef, i32 %186, i32 0 %189 = insertelement <2 x i32> %188, i32 %187, i32 1 %190 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %189, <8 x i32> %52, <4 x i32> %55, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 1 %193 = extractelement <4 x float> %190, i32 2 %194 = fmul float %191, %48 %195 = fmul float %192, %49 %196 = fmul float %193, %50 %197 = fmul float %193, %50 %198 = fmul float %194, %154 %199 = fadd float %198, %175 %200 = fmul float %195, %154 %201 = fadd float %200, %176 %202 = fmul float %196, %154 %203 = fadd float %202, %177 %204 = fmul float %197, %154 %205 = fadd float %204, %177 %206 = fmul float %194, %159 %207 = fadd float %206, %183 %208 = fmul float %195, %159 %209 = fadd float %208, %184 %210 = fmul float %197, %159 %211 = fadd float %210, %185 %212 = fmul float %197, %159 %213 = fadd float %212, %185 %214 = fmul float %78, %40 %215 = fmul float %79, %41 %216 = bitcast float %214 to i32 %217 = bitcast float %215 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %219, <8 x i32> %62, <4 x i32> %65, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) %221 = extractelement <4 x float> %220, i32 0 %222 = extractelement <4 x float> %220, i32 1 %223 = extractelement <4 x float> %220, i32 2 %224 = fmul float %221, %221 %225 = fmul float %222, %222 %226 = fmul float %223, %223 %227 = fmul float %223, %223 %228 = fmul float %221, %224 %229 = fmul float %222, %225 %230 = fmul float %223, %226 %231 = fmul float %223, %227 %232 = fadd float %229, %228 %233 = fadd float %232, %231 %234 = fmul float %224, %221 %235 = fsub float %233, %234 %236 = fmul float %225, %222 %237 = fsub float %233, %236 %238 = fmul float %227, %223 %239 = fsub float %233, %238 %240 = fmul float %227, %223 %241 = fsub float %233, %240 %242 = fmul float %221, %34 %243 = fmul float %222, %35 %244 = fmul float %223, %36 %245 = fmul float %223, %36 %246 = fmul float %42, %235 %247 = fadd float %246, %228 %248 = fmul float %42, %237 %249 = fadd float %248, %229 %250 = fmul float %42, %239 %251 = fadd float %250, %230 %252 = fmul float %42, %241 %253 = fadd float %252, %231 %254 = fmul float %247, %37 %255 = fmul float %249, %38 %256 = fmul float %251, %39 %257 = fmul float %253, %39 %258 = fmul float %254, %43 %259 = fmul float %255, %43 %260 = fmul float %256, %43 %261 = fmul float %257, %43 %262 = fmul float %199, %258 %263 = fmul float %201, %259 %264 = fmul float %203, %260 %265 = fmul float %205, %261 %266 = fmul float %242, %207 %267 = fadd float %266, %262 %268 = fmul float %243, %209 %269 = fadd float %268, %263 %270 = fmul float %244, %211 %271 = fadd float %270, %264 %272 = fmul float %245, %213 %273 = fadd float %272, %265 %274 = fadd float %267, %31 %275 = fadd float %269, %32 %276 = fadd float %271, %33 %277 = fadd float %273, %33 %278 = bitcast float %5 to i32 %279 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %278, 10 %280 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %279, float %274, 11 %281 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %280, float %275, 12 %282 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %281, float %276, 13 %283 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %282, float %277, 14 %284 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %283, float %21, 24 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %284 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.sqrt.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.log2.f32(float) #1 ; Function Attrs: nounwind readnone declare float @llvm.exp2.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 attributes #0 = { "InitialPSInputAddr"="36983" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=382, shader=381) [traceshaders] glDeleteShader(shader=381) [...] [traceshaders] glCreateShader(GL_VERTEX_SHADER) → shader=383 [traceshaders] glShaderSource(shader=383) [traceshaders] ================================================================================ #version 410 #extension GL_ARB_explicit_attrib_location : require #ifdef GL_ARB_separate_shader_objects #extension GL_ARB_separate_shader_objects : enable #endif subroutine void SubroutineType(); struct vec1 { float x; }; struct uvec1 { uint x; }; struct ivec1 { int x; }; vec4 InstrHelper; out gl_PerVertex { vec4 gl_Position; float gl_PointSize; float gl_ClipDistance[];}; layout(std140) uniform; uniform cbuffer_0 { // $Globals vec4 Const0[46]; }; uniform cbuffer_1 { // VSOffsetConstants vec4 Const1[5]; }; layout(location = 0) in vec4 dcl_Input0; vec4 Input0; layout(location = 1) in vec4 dcl_Input1; vec4 Input1; layout(location = 2) in vec4 dcl_Input2; vec4 Input2; layout(location = 4) in vec4 dcl_Input4; vec4 Input4; layout(location = 5) in vec4 dcl_Input5; vec4 Input5; layout(location = 6) in vec4 dcl_Input6; vec4 Input6; layout(location = 7) in vec4 dcl_Input7; vec4 Input7; layout(location = 0) out vec4 VtxGeoOutput0; #define Output0 VtxGeoOutput0 layout(location = 1) out vec4 VtxGeoOutput1; #define Output1 VtxGeoOutput1 layout(location = 2) out vec4 VtxGeoOutput2; #define Output2 VtxGeoOutput2 layout(location = 3) out vec4 VtxGeoOutput3; #define Output3 VtxGeoOutput3 layout(location = 4) out vec4 VtxGeoOutput4; #define Output4 VtxGeoOutput4 #undef Output5 #define Output5 phase0_Output5 vec4 phase0_Output5; vec4 Temp[4]; ivec4 Temp_int[4]; uvec4 Temp_uint[4]; void main() { Input0 = dcl_Input0; Input1 = dcl_Input1; Input2 = dcl_Input2; Input4 = dcl_Input4; Input5 = dcl_Input5; Input6 = dcl_Input6; Input7 = dcl_Input7; Output0.xy = Input7.xy * Const0[36].xy + Const0[36].wz; Output1.xy = Input4.xy; Output1.zw = Input5.yx; Output2.xy = Input6.xy; Output2.zw = vec2(uintBitsToFloat(0u), uintBitsToFloat(0u)); Temp[0].x = Const0[18].w * uintBitsToFloat(1067114824u) + Const0[30].w; Temp[0].y = uintBitsToFloat((Temp[0].x>=(-Temp[0].x)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].x)); Temp[0].y = (floatBitsToInt(Temp[0]).y != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].yz = Temp[0].yx * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1061481552u)); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].yz = sin(Temp[0].yz); Temp[0].y = Temp[0].z + Temp[0].y; Temp[0].y = Temp[0].y + uintBitsToFloat(1065353216u); Temp[1].xyz = Input0.xyz * Const0[41].xyz + Const0[40].xyz; Temp[0].z = Temp[1].x * Const0[33].z; Temp[0].y = Temp[0].y * Temp[0].z; Temp[0].z = Const0[34].x * Temp[1].x + Temp[0].x; Temp[0].z = Temp[0].z * uintBitsToFloat(1056964608u); Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].w = Const0[33].w * Temp[1].x + Temp[0].x; Temp[0].x = Temp[0].x + Const0[32].x; Temp[2].x = uintBitsToFloat((Temp[0].w>=(-Temp[0].w)) ? 0xFFFFFFFFu : 0u); Temp[0].w = fract(abs(Temp[0].w)); Temp[0].w = (floatBitsToInt(Temp[2]).x != 0) ? Temp[0].w : (-Temp[0].w); Temp[0].w = Temp[0].w * uintBitsToFloat(1086918619u); Temp[0].zw = sin(Temp[0].zw); Temp[0].z = Temp[0].z + Temp[0].w; Temp[0].z = Temp[0].z * Const0[34].y; Temp[2].z = Temp[0].y * uintBitsToFloat(1045220557u) + Temp[0].z; Temp[0].y = Const0[30].z * Temp[1].x + (-Temp[0].x); Temp[0].xz = Const0[32].yw * Temp[1].xx + (-Temp[0].xx); Temp[0].xy = Temp[0].xy * vec2(uintBitsToFloat(1045494470u), uintBitsToFloat(1055439406u)); Temp[0].w = uintBitsToFloat((Temp[0].y>=(-Temp[0].y)) ? 0xFFFFFFFFu : 0u); Temp[0].y = fract(abs(Temp[0].y)); Temp[0].y = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].y : (-Temp[0].y); Temp[0].w = uintBitsToFloat((Temp[0].x>=(-Temp[0].x)) ? 0xFFFFFFFFu : 0u); Temp[0].x = fract(abs(Temp[0].x)); Temp[0].x = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].x : (-Temp[0].x); Temp[0].xy = Temp[0].xy * vec2(uintBitsToFloat(1086918619u), uintBitsToFloat(1086918619u)); Temp[0].xy = sin(Temp[0].xy); Temp[0].x = Temp[0].x + Temp[0].y; Temp[0].y = Temp[0].x * Const0[33].y; Temp[0].x = abs(Temp[0].x) * Const0[32].z; Temp[0].w = uintBitsToFloat((Temp[0].z>=(-Temp[0].z)) ? 0xFFFFFFFFu : 0u); Temp[0].z = fract(abs(Temp[0].z)); Temp[0].z = (floatBitsToInt(Temp[0]).w != 0) ? Temp[0].z : (-Temp[0].z); Temp[0].z = Temp[0].z * uintBitsToFloat(1086918619u); Temp[0].z = sin(Temp[0].z); Temp[2].y = Const0[33].x * Temp[0].z + Temp[0].y; Temp[0].y = Const0[30].y * uintBitsToFloat(3256877056u) + uintBitsToFloat(3240099840u); Temp[2].x = Temp[0].x * Temp[0].y; Temp[0].xyz = Temp[2].xyz * Input6.xxx; Temp[1].w = Input0.w; Temp[0].w = uintBitsToFloat(0u); Temp[0] = Temp[0] + Temp[1]; Temp[1] = Temp[0].yyyy * Const0[1]; Temp[1] = Const0[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const0[2] * Temp[0].zzzz + Temp[1]; Temp[0] = Const0[3] * Temp[0].wwww + Temp[1]; Temp[1] = Temp[0].yyyy * Const1[1]; Temp[1] = Const1[0] * Temp[0].xxxx + Temp[1]; Temp[1] = Const1[2] * Temp[0].zzzz + Temp[1]; Temp[1] = Const1[3] * Temp[0].wwww + Temp[1]; Temp[0].xyz = Temp[0].xyz * Const1[4].www; Output3 = Temp[1]; Output5 = Temp[1]; Temp[1].xyz = (-Temp[0].yyy) * Const0[38].xyz; Temp[0].xyw = Const0[37].xyz * (-Temp[0].xxx) + Temp[1].xyz; Temp[0].xyz = Const0[39].xyz * (-Temp[0].zzz) + Temp[0].xyw; Temp[1].xyz = Input1.yzx * vec3(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec3(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[2] = Input2 * vec4(uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u), uintBitsToFloat(1073741824u)) + vec4(uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u), uintBitsToFloat(3212836864u)); Temp[3].xyz = Temp[1].xyz * Temp[2].zxy; Temp[1].xyz = Temp[2].yzx * Temp[1].yzx + (-Temp[3].xyz); Temp[1].xyz = Temp[2].www * Temp[1].xyz; Temp[3].xyz = Temp[2].yzx * Temp[1].zxy; Temp[3].xyz = Temp[1].yzx * Temp[2].zxy + (-Temp[3].xyz); Output4.y = dot(Temp[1].xyz, Temp[0].xyz); Temp[1].xyz = Temp[2].www * Temp[3].xyz; Output4.z = dot(Temp[2].xyz, Temp[0].xyz); Output4.x = dot(Temp[1].xyz, Temp[0].xyz); Output4.w = uintBitsToFloat(1065353216u); gl_Position = vec4(phase0_Output5); gl_Position.y = -gl_Position.y; gl_Position.z = gl_Position.z * 2.0 - gl_Position.w; return; } [traceshaders] ================================================================================ [traceshaders] glCompileShader(shader=383) [traceshaders] glCreateProgram() = 384 [traceshaders] glAttachShader(program=384, shader=383) [traceshaders] glLinkProgram(program=384) SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 as_ls = 0 export_prim_id = 0 VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[5], GENERIC[4] DCL CONST[1][0..45] DCL CONST[2][0..4] DCL TEMP[0..9], LOCAL IMM[0] UINT32 {0, 576, 288, 480} IMM[1] FLT32 { 0.0000, 1.2100, 6.2832, 0.7692} IMM[2] UINT32 {4294967295, 656, 640, 528} IMM[3] INT32 {0, 0, 0, 0} IMM[4] FLT32 { 1.0000, 0.5000, 0.2000, -40.0000} IMM[5] UINT32 {544, 512, 16, 32} IMM[6] FLT32 { 0.2041, 0.4545, -10.0000, 2.0000} IMM[7] UINT32 {48, 1, 64, 608} IMM[8] UINT32 {592, 624, 0, 0} IMM[9] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[6].xyyy, CONST[1][36].xyyy, CONST[1][36].wzzz 1: MOV TEMP[1].xy, IN[3].xyxx 2: MOV TEMP[1].zw, IN[4].xxyx 3: MOV TEMP[2].xy, IN[5].xyxx 4: MOV TEMP[2].zw, IMM[1].xxxx 5: MAD TEMP[3].x, CONST[1][18].wwww, IMM[1].yyyy, CONST[1][30].wwww 6: FSGE TEMP[4].x, TEMP[3].xxxx, -TEMP[3].xxxx 7: UIF TEMP[4].xxxx :0 8: MOV TEMP[4].x, IMM[2].xxxx 9: ELSE :0 10: MOV TEMP[4].x, IMM[0].xxxx 11: ENDIF 12: MOV TEMP[3].y, TEMP[4].xxxx 13: ABS TEMP[4].x, TEMP[3].xxxx 14: FRC TEMP[4].x, TEMP[4].xxxx 15: USNE TEMP[5].x, TEMP[3].yyyy, IMM[3].xxxx 16: UIF TEMP[5].xxxx :0 17: MOV TEMP[5].x, TEMP[4].xxxx 18: ELSE :0 19: MOV TEMP[5].x, -TEMP[4].xxxx 20: ENDIF 21: MOV TEMP[3].y, TEMP[5].xxxx 22: MUL TEMP[4].xy, TEMP[3].yxxx, IMM[1].zwww 23: FSGE TEMP[5].x, TEMP[4].yyyy, -TEMP[4].yyyy 24: UIF TEMP[5].xxxx :0 25: MOV TEMP[5].x, IMM[2].xxxx 26: ELSE :0 27: MOV TEMP[5].x, IMM[0].xxxx 28: ENDIF 29: MOV TEMP[3].w, TEMP[5].xxxx 30: ABS TEMP[5].x, TEMP[4].yyyy 31: FRC TEMP[5].x, TEMP[5].xxxx 32: USNE TEMP[6].x, TEMP[3].wwww, IMM[3].xxxx 33: UIF TEMP[6].xxxx :0 34: MOV TEMP[6].x, TEMP[5].xxxx 35: ELSE :0 36: MOV TEMP[6].x, -TEMP[5].xxxx 37: ENDIF 38: MUL TEMP[5].x, TEMP[6].xxxx, IMM[1].zzzz 39: SIN TEMP[4].x, TEMP[4].xxxx 40: SIN TEMP[4].y, TEMP[5].xxxx 41: ADD TEMP[4].x, TEMP[4].yyyy, TEMP[4].xxxx 42: ADD TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx 43: MAD TEMP[5].xyz, IN[0].xyzz, CONST[1][41].xyzz, CONST[1][40].xyzz 44: MUL TEMP[6].x, TEMP[5].xxxx, CONST[1][33].zzzz 45: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx 46: MAD TEMP[6].x, CONST[1][34].xxxx, TEMP[5].xxxx, TEMP[3].xxxx 47: MUL TEMP[6].x, TEMP[6].xxxx, IMM[4].yyyy 48: FSGE TEMP[7].x, TEMP[6].xxxx, -TEMP[6].xxxx 49: UIF TEMP[7].xxxx :0 50: MOV TEMP[7].x, IMM[2].xxxx 51: ELSE :0 52: MOV TEMP[7].x, IMM[0].xxxx 53: ENDIF 54: MOV TEMP[3].w, TEMP[7].xxxx 55: ABS TEMP[6].x, TEMP[6].xxxx 56: FRC TEMP[6].x, TEMP[6].xxxx 57: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 58: UIF TEMP[7].xxxx :0 59: MOV TEMP[7].x, TEMP[6].xxxx 60: ELSE :0 61: MOV TEMP[7].x, -TEMP[6].xxxx 62: ENDIF 63: MUL TEMP[6].x, TEMP[7].xxxx, IMM[1].zzzz 64: MAD TEMP[7].x, CONST[1][33].wwww, TEMP[5].xxxx, TEMP[3].xxxx 65: ADD TEMP[3].x, TEMP[3].xxxx, CONST[1][32].xxxx 66: FSGE TEMP[8].x, TEMP[7].xxxx, -TEMP[7].xxxx 67: UIF TEMP[8].xxxx :0 68: MOV TEMP[8].x, IMM[2].xxxx 69: ELSE :0 70: MOV TEMP[8].x, IMM[0].xxxx 71: ENDIF 72: MOV TEMP[8].x, TEMP[8].xxxx 73: ABS TEMP[7].x, TEMP[7].xxxx 74: FRC TEMP[7].x, TEMP[7].xxxx 75: USNE TEMP[9].x, TEMP[8].xxxx, IMM[3].xxxx 76: UIF TEMP[9].xxxx :0 77: MOV TEMP[9].x, TEMP[7].xxxx 78: ELSE :0 79: MOV TEMP[9].x, -TEMP[7].xxxx 80: ENDIF 81: MUL TEMP[7].x, TEMP[9].xxxx, IMM[1].zzzz 82: SIN TEMP[6].x, TEMP[6].xxxx 83: SIN TEMP[6].y, TEMP[7].xxxx 84: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy 85: MUL TEMP[6].x, TEMP[6].xxxx, CONST[1][34].yyyy 86: MAD TEMP[4].x, TEMP[4].xxxx, IMM[4].zzzz, TEMP[6].xxxx 87: MOV TEMP[8].z, TEMP[4].xxxx 88: MAD TEMP[4].x, CONST[1][30].zzzz, TEMP[5].xxxx, -TEMP[3].xxxx 89: MOV TEMP[3].y, TEMP[4].xxxx 90: MAD TEMP[4].xy, CONST[1][32].ywww, TEMP[5].xxxx, -TEMP[3].xxxx 91: MOV TEMP[3].x, TEMP[4].xxyx 92: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[6].xyyy 93: FSGE TEMP[6].x, TEMP[3].yyyy, -TEMP[3].yyyy 94: UIF TEMP[6].xxxx :0 95: MOV TEMP[6].x, IMM[2].xxxx 96: ELSE :0 97: MOV TEMP[6].x, IMM[0].xxxx 98: ENDIF 99: MOV TEMP[3].w, TEMP[6].xxxx 100: ABS TEMP[6].x, TEMP[3].yyyy 101: FRC TEMP[6].x, TEMP[6].xxxx 102: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 103: UIF TEMP[7].xxxx :0 104: MOV TEMP[7].x, TEMP[6].xxxx 105: ELSE :0 106: MOV TEMP[7].x, -TEMP[6].xxxx 107: ENDIF 108: MOV TEMP[3].y, TEMP[7].xxxx 109: FSGE TEMP[6].x, TEMP[3].xxxx, -TEMP[3].xxxx 110: UIF TEMP[6].xxxx :0 111: MOV TEMP[6].x, IMM[2].xxxx 112: ELSE :0 113: MOV TEMP[6].x, IMM[0].xxxx 114: ENDIF 115: MOV TEMP[3].w, TEMP[6].xxxx 116: ABS TEMP[6].x, TEMP[3].xxxx 117: FRC TEMP[3].x, TEMP[6].xxxx 118: USNE TEMP[6].x, TEMP[3].wwww, IMM[3].xxxx 119: UIF TEMP[6].xxxx :0 120: MOV TEMP[6].x, TEMP[3].xxxx 121: ELSE :0 122: MOV TEMP[6].x, -TEMP[3].xxxx 123: ENDIF 124: MOV TEMP[3].x, TEMP[6].xxxx 125: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[1].zzzz 126: SIN TEMP[6].x, TEMP[3].xxxx 127: SIN TEMP[6].y, TEMP[3].yyyy 128: ADD TEMP[3].x, TEMP[6].xxxx, TEMP[6].yyyy 129: MUL TEMP[6].x, TEMP[3].xxxx, CONST[1][33].yyyy 130: ABS TEMP[7].x, TEMP[3].xxxx 131: MUL TEMP[3].x, TEMP[7].xxxx, CONST[1][32].zzzz 132: FSGE TEMP[7].x, TEMP[4].yyyy, -TEMP[4].yyyy 133: UIF TEMP[7].xxxx :0 134: MOV TEMP[7].x, IMM[2].xxxx 135: ELSE :0 136: MOV TEMP[7].x, IMM[0].xxxx 137: ENDIF 138: MOV TEMP[3].w, TEMP[7].xxxx 139: ABS TEMP[4].x, TEMP[4].yyyy 140: FRC TEMP[4].x, TEMP[4].xxxx 141: USNE TEMP[7].x, TEMP[3].wwww, IMM[3].xxxx 142: UIF TEMP[7].xxxx :0 143: MOV TEMP[7].x, TEMP[4].xxxx 144: ELSE :0 145: MOV TEMP[7].x, -TEMP[4].xxxx 146: ENDIF 147: MUL TEMP[4].x, TEMP[7].xxxx, IMM[1].zzzz 148: SIN TEMP[4].x, TEMP[4].xxxx 149: MAD TEMP[4].x, CONST[1][33].xxxx, TEMP[4].xxxx, TEMP[6].xxxx 150: MOV TEMP[8].y, TEMP[4].xxxx 151: MAD TEMP[4].x, CONST[1][30].yyyy, IMM[4].wwww, IMM[6].zzzz 152: MUL TEMP[8].x, TEMP[3].xxxx, TEMP[4].xxxx 153: MUL TEMP[3].xyz, TEMP[8].xyzz, IN[5].xxxx 154: MOV TEMP[5].w, IN[0].wwww 155: MOV TEMP[3].w, IMM[1].xxxx 156: ADD TEMP[3], TEMP[3], TEMP[5] 157: MUL TEMP[5], TEMP[3].yyyy, CONST[1][1] 158: MAD TEMP[5], CONST[1][0], TEMP[3].xxxx, TEMP[5] 159: MAD TEMP[5], CONST[1][2], TEMP[3].zzzz, TEMP[5] 160: MAD TEMP[3], CONST[1][3], TEMP[3].wwww, TEMP[5] 161: MUL TEMP[5], TEMP[3].yyyy, CONST[2][1] 162: MAD TEMP[5], CONST[2][0], TEMP[3].xxxx, TEMP[5] 163: MAD TEMP[5], CONST[2][2], TEMP[3].zzzz, TEMP[5] 164: MAD TEMP[5], CONST[2][3], TEMP[3].wwww, TEMP[5] 165: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[2][4].wwww 166: MOV TEMP[4], TEMP[5] 167: MOV TEMP[6], TEMP[5] 168: MUL TEMP[5].xyz, -TEMP[3].yyyy, CONST[1][38].xyzz 169: MAD TEMP[7].xyz, CONST[1][37].xyzz, -TEMP[3].xxxx, TEMP[5].xyzz 170: MAD TEMP[3].xyz, CONST[1][39].xyzz, -TEMP[3].zzzz, TEMP[7].xyzz 171: MAD TEMP[5].xyz, IN[1].yzxx, IMM[6].wwww, IMM[9].xxxx 172: MAD TEMP[8], IN[2], IMM[6].wwww, IMM[9].xxxx 173: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[8].zxyy 174: MAD TEMP[5].xyz, TEMP[8].yzxx, TEMP[5].yzxx, -TEMP[7].xyzz 175: MUL TEMP[5].xyz, TEMP[8].wwww, TEMP[5].xyzz 176: MUL TEMP[7].xyz, TEMP[8].yzxx, TEMP[5].zxyy 177: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[8].zxyy, -TEMP[7].xyzz 178: DP3 TEMP[9].x, TEMP[5].xyzz, TEMP[3].xyzz 179: MOV TEMP[9].y, TEMP[9].xxxx 180: MUL TEMP[5].xyz, TEMP[8].wwww, TEMP[7].xyzz 181: DP3 TEMP[7].x, TEMP[8].xyzz, TEMP[3].xyzz 182: MOV TEMP[9].z, TEMP[7].xxxx 183: DP3 TEMP[9].x, TEMP[5].xyzz, TEMP[3].xyzz 184: MOV TEMP[9].w, IMM[4].xxxx 185: MOV TEMP[3].xw, TEMP[6].xxxw 186: MOV TEMP[3].y, -TEMP[6].yyyy 187: MAD TEMP[5].x, TEMP[6].zzzz, IMM[6].wwww, -TEMP[5].wwww 188: MOV TEMP[3].z, TEMP[5].xxxx 189: MOV OUT[1], TEMP[0] 190: MOV OUT[2], TEMP[1] 191: MOV OUT[3], TEMP[2] 192: MOV OUT[4], TEMP[4] 193: MOV OUT[5], TEMP[9] 194: MOV OUT[0], TEMP[3] 195: END radeonsi: Compiling shader 209 TGSI shader LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %20 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 1, !amdgpu.uniform !0 %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, align 16, !tbaa !1 %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0) %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4) %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8) %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12) %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20) %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24) %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28) %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44) %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48) %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52) %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56) %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60) %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300) %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 484) %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 488) %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 492) %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 512) %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 516) %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 520) %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 524) %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 528) %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 532) %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 536) %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 540) %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 544) %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 548) %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 576) %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 580) %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 584) %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 588) %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 592) %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 596) %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 600) %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 608) %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 612) %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 616) %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 624) %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 628) %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 632) %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 640) %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 644) %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 648) %68 = call float @llvm.SI.load.const(<16 x i8> %21, i32 656) %69 = call float @llvm.SI.load.const(<16 x i8> %21, i32 660) %70 = call float @llvm.SI.load.const(<16 x i8> %21, i32 664) %71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 2, !amdgpu.uniform !0 %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !1 %73 = call float @llvm.SI.load.const(<16 x i8> %72, i32 0) %74 = call float @llvm.SI.load.const(<16 x i8> %72, i32 4) %75 = call float @llvm.SI.load.const(<16 x i8> %72, i32 8) %76 = call float @llvm.SI.load.const(<16 x i8> %72, i32 12) %77 = call float @llvm.SI.load.const(<16 x i8> %72, i32 16) %78 = call float @llvm.SI.load.const(<16 x i8> %72, i32 20) %79 = call float @llvm.SI.load.const(<16 x i8> %72, i32 24) %80 = call float @llvm.SI.load.const(<16 x i8> %72, i32 28) %81 = call float @llvm.SI.load.const(<16 x i8> %72, i32 32) %82 = call float @llvm.SI.load.const(<16 x i8> %72, i32 36) %83 = call float @llvm.SI.load.const(<16 x i8> %72, i32 40) %84 = call float @llvm.SI.load.const(<16 x i8> %72, i32 44) %85 = call float @llvm.SI.load.const(<16 x i8> %72, i32 48) %86 = call float @llvm.SI.load.const(<16 x i8> %72, i32 52) %87 = call float @llvm.SI.load.const(<16 x i8> %72, i32 56) %88 = call float @llvm.SI.load.const(<16 x i8> %72, i32 60) %89 = call float @llvm.SI.load.const(<16 x i8> %72, i32 76) %90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0 %91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !1 %92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %13) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = extractelement <4 x float> %92, i32 3 %97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0 %98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !1 %99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %14) %100 = extractelement <4 x float> %99, i32 0 %101 = extractelement <4 x float> %99, i32 1 %102 = extractelement <4 x float> %99, i32 2 %103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 2, !amdgpu.uniform !0 %104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !1 %105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %15) %106 = extractelement <4 x float> %105, i32 0 %107 = extractelement <4 x float> %105, i32 1 %108 = extractelement <4 x float> %105, i32 2 %109 = extractelement <4 x float> %105, i32 3 %110 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 3, !amdgpu.uniform !0 %111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !1 %112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %111, i32 0, i32 %16) %113 = extractelement <4 x float> %112, i32 0 %114 = extractelement <4 x float> %112, i32 1 %115 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 4, !amdgpu.uniform !0 %116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !1 %117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %116, i32 0, i32 %17) %118 = extractelement <4 x float> %117, i32 0 %119 = extractelement <4 x float> %117, i32 1 %120 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 5, !amdgpu.uniform !0 %121 = load <16 x i8>, <16 x i8> addrspace(2)* %120, align 16, !tbaa !1 %122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %121, i32 0, i32 %18) %123 = extractelement <4 x float> %122, i32 0 %124 = extractelement <4 x float> %122, i32 1 %125 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 6, !amdgpu.uniform !0 %126 = load <16 x i8>, <16 x i8> addrspace(2)* %125, align 16, !tbaa !1 %127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %126, i32 0, i32 %19) %128 = extractelement <4 x float> %127, i32 0 %129 = extractelement <4 x float> %127, i32 1 %130 = fmul float %128, %52 %131 = fadd float %130, %55 %132 = fmul float %129, %53 %133 = fadd float %132, %54 %134 = fmul float %38, 0x3FF35C2900000000 %135 = fadd float %134, %41 %136 = fsub float -0.000000e+00, %135 %137 = fcmp oge float %135, %136 %138 = call float @llvm.fabs.f32(float %135) %139 = call float @llvm.floor.f32(float %138) %140 = fsub float %138, %139 %141 = fsub float -0.000000e+00, %140 %temp20.0 = select i1 %137, float %140, float %141 %142 = fmul float %temp20.0, 0x401921FB60000000 %143 = fmul float %135, 0x3FE89D8A00000000 %144 = fsub float -0.000000e+00, %143 %145 = fcmp oge float %143, %144 %146 = call float @llvm.fabs.f32(float %143) %147 = call float @llvm.floor.f32(float %146) %148 = fsub float %146, %147 %149 = fsub float -0.000000e+00, %148 %temp24.0 = select i1 %145, float %148, float %149 %150 = fmul float %temp24.0, 0x401921FB60000000 %151 = call float @llvm.sin.f32(float %142) %152 = call float @llvm.sin.f32(float %150) %153 = fadd float %152, %151 %154 = fadd float %153, 1.000000e+00 %155 = fmul float %93, %68 %156 = fadd float %155, %65 %157 = fmul float %94, %69 %158 = fadd float %157, %66 %159 = fmul float %95, %70 %160 = fadd float %159, %67 %161 = fmul float %156, %48 %162 = fmul float %154, %161 %163 = fmul float %50, %156 %164 = fadd float %163, %135 %165 = fmul float %164, 5.000000e-01 %166 = fsub float -0.000000e+00, %165 %167 = fcmp oge float %165, %166 %168 = call float @llvm.fabs.f32(float %165) %169 = call float @llvm.floor.f32(float %168) %170 = fsub float %168, %169 %171 = fsub float -0.000000e+00, %170 %temp28.1 = select i1 %167, float %170, float %171 %172 = fmul float %temp28.1, 0x401921FB60000000 %173 = fmul float %49, %156 %174 = fadd float %173, %135 %175 = fadd float %135, %42 %176 = fsub float -0.000000e+00, %174 %177 = fcmp oge float %174, %176 %178 = call float @llvm.fabs.f32(float %174) %179 = call float @llvm.floor.f32(float %178) %180 = fsub float %178, %179 %181 = fsub float -0.000000e+00, %180 %temp36.0 = select i1 %177, float %180, float %181 %182 = fmul float %temp36.0, 0x401921FB60000000 %183 = call float @llvm.sin.f32(float %172) %184 = call float @llvm.sin.f32(float %182) %185 = fadd float %183, %184 %186 = fmul float %185, %51 %187 = fmul float %162, 0x3FC99999A0000000 %188 = fadd float %187, %186 %189 = fmul float %40, %156 %190 = fsub float %189, %175 %191 = fmul float %43, %156 %192 = fsub float %191, %175 %193 = fmul float %45, %156 %194 = fsub float %193, %175 %195 = fmul float %192, 0x3FCA1F58C0000000 %196 = fmul float %190, 0x3FDD1745C0000000 %197 = fsub float -0.000000e+00, %196 %198 = fcmp oge float %196, %197 %199 = call float @llvm.fabs.f32(float %196) %200 = call float @llvm.floor.f32(float %199) %201 = fsub float %199, %200 %202 = fsub float -0.000000e+00, %201 %temp28.2 = select i1 %198, float %201, float %202 %203 = fsub float -0.000000e+00, %195 %204 = fcmp oge float %195, %203 %205 = call float @llvm.fabs.f32(float %195) %206 = call float @llvm.floor.f32(float %205) %207 = fsub float %205, %206 %208 = fsub float -0.000000e+00, %207 %temp24.3 = select i1 %204, float %207, float %208 %209 = fmul float %temp24.3, 0x401921FB60000000 %210 = fmul float %temp28.2, 0x401921FB60000000 %211 = call float @llvm.sin.f32(float %209) %212 = call float @llvm.sin.f32(float %210) %213 = fadd float %211, %212 %214 = fmul float %213, %47 %215 = call float @llvm.fabs.f32(float %213) %216 = fmul float %215, %44 %217 = fsub float -0.000000e+00, %194 %218 = fcmp oge float %194, %217 %219 = call float @llvm.fabs.f32(float %194) %220 = call float @llvm.floor.f32(float %219) %221 = fsub float %219, %220 %222 = fsub float -0.000000e+00, %221 %temp28.4 = select i1 %218, float %221, float %222 %223 = fmul float %temp28.4, 0x401921FB60000000 %224 = call float @llvm.sin.f32(float %223) %225 = fmul float %46, %224 %226 = fadd float %225, %214 %227 = fmul float %39, -4.000000e+01 %228 = fadd float %227, -1.000000e+01 %229 = fmul float %216, %228 %230 = fmul float %229, %123 %231 = fmul float %226, %123 %232 = fmul float %188, %123 %233 = fadd float %230, %156 %234 = fadd float %231, %158 %235 = fadd float %232, %160 %236 = fadd float %96, 0.000000e+00 %237 = fmul float %234, %26 %238 = fmul float %234, %27 %239 = fmul float %234, %28 %240 = fmul float %234, %29 %241 = fmul float %22, %233 %242 = fadd float %241, %237 %243 = fmul float %23, %233 %244 = fadd float %243, %238 %245 = fmul float %24, %233 %246 = fadd float %245, %239 %247 = fmul float %25, %233 %248 = fadd float %247, %240 %249 = fmul float %30, %235 %250 = fadd float %249, %242 %251 = fmul float %31, %235 %252 = fadd float %251, %244 %253 = fmul float %32, %235 %254 = fadd float %253, %246 %255 = fmul float %33, %235 %256 = fadd float %255, %248 %257 = fmul float %34, %236 %258 = fadd float %257, %250 %259 = fmul float %35, %236 %260 = fadd float %259, %252 %261 = fmul float %36, %236 %262 = fadd float %261, %254 %263 = fmul float %37, %236 %264 = fadd float %263, %256 %265 = fmul float %260, %77 %266 = fmul float %260, %78 %267 = fmul float %260, %79 %268 = fmul float %260, %80 %269 = fmul float %73, %258 %270 = fadd float %269, %265 %271 = fmul float %74, %258 %272 = fadd float %271, %266 %273 = fmul float %75, %258 %274 = fadd float %273, %267 %275 = fmul float %76, %258 %276 = fadd float %275, %268 %277 = fmul float %81, %262 %278 = fadd float %277, %270 %279 = fmul float %82, %262 %280 = fadd float %279, %272 %281 = fmul float %83, %262 %282 = fadd float %281, %274 %283 = fmul float %84, %262 %284 = fadd float %283, %276 %285 = fmul float %85, %264 %286 = fadd float %285, %278 %287 = fmul float %86, %264 %288 = fadd float %287, %280 %289 = fmul float %87, %264 %290 = fadd float %289, %282 %291 = fmul float %88, %264 %292 = fadd float %291, %284 %293 = fmul float %258, %89 %294 = fmul float %260, %89 %295 = fmul float %262, %89 %296 = fmul float %294, %59 %297 = fsub float -0.000000e+00, %296 %298 = fmul float %294, %60 %299 = fsub float -0.000000e+00, %298 %300 = fmul float %294, %61 %301 = fsub float -0.000000e+00, %300 %302 = fmul float %293, %56 %303 = fsub float %297, %302 %304 = fmul float %293, %57 %305 = fsub float %299, %304 %306 = fmul float %293, %58 %307 = fsub float %301, %306 %308 = fmul float %295, %62 %309 = fsub float %303, %308 %310 = fmul float %295, %63 %311 = fsub float %305, %310 %312 = fmul float %295, %64 %313 = fsub float %307, %312 %314 = fmul float %101, 2.000000e+00 %315 = fadd float %314, -1.000000e+00 %316 = fmul float %102, 2.000000e+00 %317 = fadd float %316, -1.000000e+00 %318 = fmul float %100, 2.000000e+00 %319 = fadd float %318, -1.000000e+00 %320 = fmul float %106, 2.000000e+00 %321 = fadd float %320, -1.000000e+00 %322 = fmul float %107, 2.000000e+00 %323 = fadd float %322, -1.000000e+00 %324 = fmul float %108, 2.000000e+00 %325 = fadd float %324, -1.000000e+00 %326 = fmul float %109, 2.000000e+00 %327 = fadd float %326, -1.000000e+00 %328 = fmul float %315, %325 %329 = fmul float %317, %321 %330 = fmul float %319, %323 %331 = fmul float %323, %317 %332 = fsub float %331, %328 %333 = fmul float %325, %319 %334 = fsub float %333, %329 %335 = fmul float %321, %315 %336 = fsub float %335, %330 %337 = fmul float %327, %332 %338 = fmul float %327, %334 %339 = fmul float %327, %336 %340 = fmul float %323, %339 %341 = fmul float %325, %337 %342 = fmul float %321, %338 %343 = fmul float %338, %325 %344 = fsub float %343, %340 %345 = fmul float %339, %321 %346 = fsub float %345, %341 %347 = fmul float %337, %323 %348 = fsub float %347, %342 %349 = fmul float %337, %309 %350 = fmul float %338, %311 %351 = fadd float %350, %349 %352 = fmul float %339, %313 %353 = fadd float %351, %352 %354 = fmul float %327, %344 %355 = fmul float %327, %346 %356 = fmul float %327, %348 %357 = fmul float %321, %309 %358 = fmul float %323, %311 %359 = fadd float %358, %357 %360 = fmul float %325, %313 %361 = fadd float %359, %360 %362 = fmul float %354, %309 %363 = fmul float %355, %311 %364 = fadd float %363, %362 %365 = fmul float %356, %313 %366 = fadd float %364, %365 %367 = fsub float -0.000000e+00, %288 %368 = fmul float %290, 2.000000e+00 %369 = fsub float %368, %292 %370 = bitcast i32 %11 to float %371 = insertvalue <{ float, float, float }> undef, float %370, 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %131, float %133, float undef, float undef) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %113, float %114, float %119, float %118) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %123, float %124, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %286, float %288, float %290, float %292) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %366, float %353, float %361, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %286, float %367, float %369, float %292) ret <{ float, float, float }> %371 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.floor.f32(float) #0 ; Function Attrs: nounwind readnone declare float @llvm.sin.f32(float) #0 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { nounwind readnone } !0 = !{} !1 = !{!"const", null, i32 1} [traceshaders] glDetachShader(program=384, shader=383) [traceshaders] glDeleteShader(shader=383) [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=15 [traceshaders] glUseProgramStages(pipeline=15, stages=vert, program=74) [...] [traceshaders] glBindProgramPipeline(pipeline=15) radeonsi: Compiling shader 389 Vertex Shader Prolog LLVM IR: ; ModuleID = 'tgsi' source_filename = "tgsi" target triple = "amdgcn--" define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) { main_body: %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %0, 0 %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %19, i32 %1, 1 %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %20, i32 %2, 2 %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %21, i32 %3, 3 %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %22, i32 %4, 4 %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %23, i32 %5, 5 %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %24, i32 %6, 6 %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %25, i32 %7, 7 %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %26, i32 %8, 8 %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %27, i32 %9, 9 %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %28, i32 %10, 10 %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %29, i32 %11, 11 %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %30, i32 %12, 12 %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %31, i32 %13, 13 %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %32, i32 %14, 14 %34 = bitcast i32 %15 to float %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %33, float %34, 15 %36 = bitcast i32 %16 to float %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %35, float %36, 16 %38 = bitcast i32 %17 to float %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %37, float %38, 17 %40 = bitcast i32 %18 to float %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %39, float %40, 18 %42 = add i32 %15, %12 %43 = bitcast i32 %42 to float %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %41, float %43, 19 %45 = add i32 %15, %12 %46 = bitcast i32 %45 to float %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %44, float %46, 20 %48 = add i32 %15, %12 %49 = bitcast i32 %48 to float %50 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %47, float %49, 21 %51 = add i32 %15, %12 %52 = bitcast i32 %51 to float %53 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %50, float %52, 22 %54 = add i32 %15, %12 %55 = bitcast i32 %54 to float %56 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %53, float %55, 23 %57 = add i32 %15, %12 %58 = bitcast i32 %57 to float %59 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %56, float %58, 24 %60 = add i32 %15, %12 %61 = bitcast i32 %60 to float %62 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %59, float %61, 25 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float }> %62 } Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v10, v4 ; 7E140304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C00A0105 00000000 s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C00A0305 00000010 s_load_dwordx4 s[16:19], s[10:11], 0x50 ; C00A0405 00000050 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[17:20], v4, s[4:7], 0 idxen ; E00C2000 80011104 s_load_dwordx4 s[4:7], s[10:11], 0x20 ; C00A0105 00000020 buffer_load_format_xyzw v[25:28], v5, s[12:15], 0 idxen ; E00C2000 80031905 s_load_dwordx4 s[12:15], s[10:11], 0x30 ; C00A0305 00000030 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[28:31], v6, s[4:7], 0 idxen ; E00C2000 80011C06 s_load_dwordx4 s[4:7], s[10:11], 0x40 ; C00A0105 00000040 s_load_dwordx4 s[8:11], s[10:11], 0x60 ; C00A0205 00000060 buffer_load_format_xyzw v[3:6], v7, s[12:15], 0 idxen ; E00C2000 80030307 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[5:8], v8, s[4:7], 0 idxen ; E00C2000 80010508 s_nop 0 ; BF800000 buffer_load_format_xyzw v[11:14], v9, s[16:19], 0 idxen ; E00C2000 80040B09 s_nop 0 ; BF800000 buffer_load_format_xyzw v[21:24], v10, s[8:11], 0 idxen ; E00C2000 8002150A s_load_dwordx4 s[16:19], s[2:3], 0x10 ; C00A0401 00000010 s_nop 0 ; BF800000 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s12, s[16:19], 0x10 ; C0220308 00000010 s_buffer_load_dword s10, s[16:19], 0x0 ; C0220288 00000000 s_buffer_load_dword s1, s[16:19], 0x20 ; C0220048 00000020 s_buffer_load_dword s9, s[16:19], 0x8 ; C0220248 00000008 s_buffer_load_dword s11, s[16:19], 0x18 ; C02202C8 00000018 s_buffer_load_dword s0, s[16:19], 0x28 ; C0220008 00000028 s_waitcnt vmcnt(2) ; BF8C0F72 v_add_f32_e32 v7, v26, v26 ; 020E351A v_add_f32_e32 v0, v25, v25 ; 02003319 v_mad_f32 v10, 2.0, v27, -1.0 ; D1C1000A 03CE36F4 v_mad_f32 v1, 2.0, v25, -1.0 ; D1C10001 03CE32F4 v_mad_f32 v15, 2.0, v30, -1.0 ; D1C1000F 03CE3CF4 s_waitcnt vmcnt(1) ; BF8C0F71 v_mad_f32 v13, 2.0, v29, -1.0 ; D1C1000D 03CE3AF4 v_mad_f32 v7, v7, v15, -v15 ; D1C10007 843E1F07 v_mad_f32 v8, 2.0, v26, -1.0 ; D1C10008 03CE34F4 v_add_f32_e32 v9, v27, v27 ; 0212371B v_add_f32_e32 v25, v28, v28 ; 0232391C v_mad_f32 v14, 2.0, v28, -1.0 ; D1C1000E 03CE38F4 v_mad_f32 v0, v0, v13, -v13 ; D1C10000 84361B00 v_add_f32_e32 v28, v31, v31 ; 02383F1F v_mad_f32 v7, v13, v10, -v7 ; D1C10007 841E150D v_mad_f32 v0, v14, v8, -v0 ; D1C10000 8402110E v_mad_f32 v8, v9, v14, -v14 ; D1C10008 843A1D09 v_add_f32_e32 v26, v30, v30 ; 02343D1E s_waitcnt vmcnt(0) ; BF8C0F70 v_mad_f32 v24, v28, v7, -v7 ; D1C10018 841E0F1C v_mad_f32 v1, v15, v1, -v8 ; D1C10001 8422030F v_mad_f32 v16, v28, v0, -v0 ; D1C10010 8402011C v_mad_f32 v0, v26, v24, -v24 ; D1C10000 8462311A v_add_f32_e32 v27, v29, v29 ; 02363B1D v_mad_f32 v10, v28, v1, -v1 ; D1C1000A 8406031C v_mad_f32 v7, v27, v16, -v16 ; D1C10007 8442211B v_mad_f32 v0, v16, v14, -v0 ; D1C10000 84021D10 v_mad_f32 v1, v25, v10, -v10 ; D1C10001 842A1519 v_mad_f32 v7, v10, v15, -v7 ; D1C10007 841E1F0A v_mad_f32 v9, v28, v0, -v0 ; D1C10009 8402011C v_mad_f32 v1, v24, v13, -v1 ; D1C10001 84061B18 v_mad_f32 v25, v28, v7, -v7 ; D1C10019 841E0F1C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s12, v9 ; 0A0E120C v_mad_f32 v26, v28, v1, -v1 ; D1C1001A 8406031C v_mul_f32_e32 v1, s12, v10 ; 0A02140C v_mac_f32_e32 v7, s10, v25 ; 2C0E320A v_mad_f32 v0, v27, s12, -s12 ; D1C10000 8030191B v_mac_f32_e32 v1, s10, v24 ; 2C02300A v_mac_f32_e32 v7, s1, v26 ; 2C0E3401 v_mac_f32_e32 v0, s10, v14 ; 2C001C0A v_mac_f32_e32 v1, s1, v16 ; 2C022001 v_mul_f32_e32 v27, v7, v7 ; 0A360F07 v_mac_f32_e32 v0, s1, v15 ; 2C001E01 v_mac_f32_e32 v27, v1, v1 ; 2C360301 v_mac_f32_e32 v27, v0, v0 ; 2C360100 v_mad_f32 v23, 2.0, v31, -1.0 ; D1C10017 03CE3EF4 v_cmp_nlt_f32_e32 vcc, 0, v27 ; 7C9C3680 s_and_saveexec_b64 s[44:45], vcc ; BEAC206A s_xor_b64 s[44:45], exec, s[44:45] ; 88AC2C7E v_cmp_le_f32_e32 vcc, 0, v27 ; 7C863680 v_mul_f32_e32 v8, 0x7f7fffff, v27 ; 0A1036FF 7F7FFFFF v_mov_b32_e32 v28, 0xff7fffff ; 7E3802FF FF7FFFFF v_cndmask_b32_e32 v8, v28, v8, vcc ; 0010111C s_or_saveexec_b64 s[44:45], s[44:45] ; BEAC212C s_load_dwordx4 s[56:59], s[2:3], 0x20 ; C00A0E01 00000020 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[44:45] ; 88FE2C7E v_rsq_f32_e32 v8, v27 ; 7E10491B s_or_b64 exec, exec, s[44:45] ; 87FE2C7E s_buffer_load_dword s29, s[16:19], 0x4 ; C0220748 00000004 s_buffer_load_dword s2, s[16:19], 0xc ; C0220088 0000000C s_buffer_load_dword s31, s[16:19], 0x14 ; C02207C8 00000014 s_buffer_load_dword s30, s[16:19], 0x1c ; C0220788 0000001C s_buffer_load_dword s3, s[16:19], 0x24 ; C02200C8 00000024 s_buffer_load_dword s4, s[16:19], 0x2c ; C0220108 0000002C s_buffer_load_dword s5, s[16:19], 0x30 ; C0220148 00000030 s_buffer_load_dword s6, s[16:19], 0x34 ; C0220188 00000034 s_buffer_load_dword s7, s[16:19], 0x38 ; C02201C8 00000038 s_buffer_load_dword s8, s[16:19], 0x3c ; C0220208 0000003C s_buffer_load_dword s42, s[16:19], 0x12c ; C0220A88 0000012C s_buffer_load_dword s32, s[16:19], 0x2e4 ; C0220808 000002E4 s_buffer_load_dword s39, s[16:19], 0x2e8 ; C02209C8 000002E8 s_buffer_load_dword s47, s[16:19], 0x2ec ; C0220BC8 000002EC s_buffer_load_dword s40, s[16:19], 0x300 ; C0220A08 00000300 s_buffer_load_dword s38, s[16:19], 0x304 ; C0220988 00000304 s_buffer_load_dword s33, s[16:19], 0x308 ; C0220848 00000308 s_buffer_load_dword s37, s[16:19], 0x30c ; C0220948 0000030C s_buffer_load_dword s34, s[16:19], 0x310 ; C0220888 00000310 s_buffer_load_dword s35, s[16:19], 0x314 ; C02208C8 00000314 s_buffer_load_dword s46, s[16:19], 0x318 ; C0220B88 00000318 s_buffer_load_dword s43, s[16:19], 0x31c ; C0220AC8 0000031C s_buffer_load_dword s41, s[16:19], 0x320 ; C0220A48 00000320 s_buffer_load_dword s36, s[16:19], 0x324 ; C0220908 00000324 s_buffer_load_dword s48, s[16:19], 0x330 ; C0220C08 00000330 s_buffer_load_dword s49, s[16:19], 0x340 ; C0220C48 00000340 s_buffer_load_dword s50, s[16:19], 0x344 ; C0220C88 00000344 s_buffer_load_dword s54, s[16:19], 0x348 ; C0220D88 00000348 s_buffer_load_dword s55, s[16:19], 0x34c ; C0220DC8 0000034C s_buffer_load_dword s51, s[16:19], 0x380 ; C0220CC8 00000380 s_buffer_load_dword s52, s[16:19], 0x384 ; C0220D08 00000384 s_buffer_load_dword s53, s[16:19], 0x388 ; C0220D48 00000388 s_buffer_load_dword s60, s[16:19], 0x390 ; C0220F08 00000390 s_buffer_load_dword s61, s[16:19], 0x394 ; C0220F48 00000394 s_buffer_load_dword s62, s[16:19], 0x398 ; C0220F88 00000398 s_buffer_load_dword s13, s[56:59], 0x0 ; C022035C 00000000 s_buffer_load_dword s14, s[56:59], 0x4 ; C022039C 00000004 s_buffer_load_dword s15, s[56:59], 0x8 ; C02203DC 00000008 s_buffer_load_dword s16, s[56:59], 0xc ; C022041C 0000000C s_buffer_load_dword s17, s[56:59], 0x10 ; C022045C 00000010 s_buffer_load_dword s19, s[56:59], 0x14 ; C02204DC 00000014 s_buffer_load_dword s21, s[56:59], 0x18 ; C022055C 00000018 s_buffer_load_dword s23, s[56:59], 0x1c ; C02205DC 0000001C s_buffer_load_dword s18, s[56:59], 0x20 ; C022049C 00000020 s_buffer_load_dword s20, s[56:59], 0x24 ; C022051C 00000024 s_buffer_load_dword s22, s[56:59], 0x28 ; C022059C 00000028 s_buffer_load_dword s24, s[56:59], 0x2c ; C022061C 0000002C s_buffer_load_dword s25, s[56:59], 0x30 ; C022065C 00000030 s_buffer_load_dword s26, s[56:59], 0x34 ; C022069C 00000034 s_buffer_load_dword s27, s[56:59], 0x38 ; C02206DC 00000038 s_buffer_load_dword s28, s[56:59], 0x3c ; C022071C 0000003C v_mul_f32_e32 v10, s11, v10 ; 0A14140B v_mul_f32_e32 v9, s11, v9 ; 0A12120B v_mac_f32_e32 v10, s9, v24 ; 2C143009 v_mac_f32_e32 v9, s9, v25 ; 2C123209 v_mac_f32_e32 v10, s0, v16 ; 2C142000 v_mul_f32_e32 v13, s11, v13 ; 0A1A1A0B v_mac_f32_e32 v13, s9, v14 ; 2C1A1C09 v_mac_f32_e32 v9, s0, v26 ; 2C123400 v_mul_f32_e32 v25, v10, v10 ; 0A32150A v_mac_f32_e32 v13, s0, v15 ; 2C1A1E00 v_mac_f32_e32 v25, v9, v9 ; 2C321309 v_mac_f32_e32 v25, v13, v13 ; 2C321B0D v_cmp_nlt_f32_e32 vcc, 0, v25 ; 7C9C3280 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[44:45], vcc ; BEAC206A s_xor_b64 s[44:45], exec, s[44:45] ; 88AC2C7E v_cmp_le_f32_e32 vcc, 0, v25 ; 7C863280 v_mul_f32_e32 v14, 0x7f7fffff, v25 ; 0A1C32FF 7F7FFFFF v_mov_b32_e32 v15, 0xff7fffff ; 7E1E02FF FF7FFFFF v_cndmask_b32_e32 v16, v15, v14, vcc ; 00201D0F s_or_saveexec_b64 s[44:45], s[44:45] ; BEAC212C v_mov_b32_e32 v24, s47 ; 7E30022F v_mov_b32_e32 v14, s54 ; 7E1C0236 v_mov_b32_e32 v15, s55 ; 7E1E0237 v_mov_b32_e32 v26, s60 ; 7E34023C v_mov_b32_e32 v27, s61 ; 7E36023D v_mov_b32_e32 v28, s62 ; 7E38023E s_xor_b64 exec, exec, s[44:45] ; 88FE2C7E v_rsq_f32_e32 v16, v25 ; 7E204919 s_or_b64 exec, exec, s[44:45] ; 87FE2C7E v_mac_f32_e32 v14, s50, v22 ; 2C1C2C32 v_mov_b32_e32 v22, 0x3f9ae148 ; 7E2C02FF 3F9AE148 v_mad_f32 v17, v26, v17, s51 ; D1C10011 00CE231A v_mac_f32_e32 v24, s42, v22 ; 2C302C2A v_mad_f32 v22, s43, v17, v24 ; D1C10016 0462222B v_floor_f32_e64 v25, |v22| ; D15F0119 00000116 v_sub_f32_e64 v25, |v22|, v25 ; D1020119 00023316 v_bfrev_b32_e32 v26, 1 ; 7E345881 v_mad_f32 v18, v27, v18, s52 ; D1C10012 00D2251B v_xor_b32_e32 v27, v25, v26 ; 2A363519 v_cmp_ge_f32_e64 vcc, v22, -v22 ; D046006A 40022D16 v_cndmask_b32_e32 v22, v27, v25, vcc ; 002C331B v_mad_f32 v25, s41, v17, v24 ; D1C10019 04622229 v_mul_f32_e32 v25, 0.5, v25 ; 0A3232F0 v_floor_f32_e64 v27, |v25| ; D15F011B 00000119 v_sub_f32_e64 v27, |v25|, v27 ; D102011B 00023719 v_mad_f32 v19, v28, v19, s53 ; D1C10013 00D6271C v_xor_b32_e32 v28, v27, v26 ; 2A38351B v_cmp_ge_f32_e64 vcc, v25, -v25 ; D046006A 40023319 v_cndmask_b32_e32 v25, v28, v27, vcc ; 0032371C v_floor_f32_e64 v27, |v24| ; D15F011B 00000118 v_sub_f32_e64 v27, |v24|, v27 ; D102011B 00023718 v_xor_b32_e32 v28, v27, v26 ; 2A38351B v_cmp_ge_f32_e64 vcc, v24, -v24 ; D046006A 40023118 v_cndmask_b32_e32 v27, v28, v27, vcc ; 0036371C v_mul_f32_e32 v28, 0x3f44ec50, v24 ; 0A3830FF 3F44EC50 v_floor_f32_e64 v29, |v28| ; D15F011D 0000011C v_sub_f32_e64 v29, |v28|, v29 ; D102011D 00023B1C v_xor_b32_e32 v30, v29, v26 ; 2A3C351D v_cmp_ge_f32_e64 vcc, v28, -v28 ; D046006A 4002391C v_add_f32_e32 v24, s40, v24 ; 02303028 v_cndmask_b32_e32 v28, v30, v29, vcc ; 00383B1E v_mad_f32 v29, s39, v17, -v24 ; D1C1001D 84622227 v_mul_f32_e32 v29, 0x3ee8ba2e, v29 ; 0A3A3AFF 3EE8BA2E v_floor_f32_e64 v30, |v29| ; D15F011E 0000011D v_sub_f32_e64 v30, |v29|, v30 ; D102011E 00023D1D v_xor_b32_e32 v31, v30, v26 ; 2A3E351E v_cmp_ge_f32_e64 vcc, v29, -v29 ; D046006A 40023B1D v_cndmask_b32_e32 v29, v31, v30, vcc ; 003A3D1F v_mad_f32 v30, s38, v17, -v24 ; D1C1001E 84622226 v_mul_f32_e32 v30, 0x3e50fac6, v30 ; 0A3C3CFF 3E50FAC6 v_floor_f32_e64 v31, |v30| ; D15F011F 0000011E v_sub_f32_e64 v31, |v30|, v31 ; D102011F 00023F1E v_xor_b32_e32 v32, v31, v26 ; 2A40351F v_cmp_ge_f32_e64 vcc, v30, -v30 ; D046006A 40023D1E v_cndmask_b32_e32 v30, v32, v31, vcc ; 003C3F20 v_mov_b32_e32 v31, 0x40c90fdb ; 7E3E02FF 40C90FDB v_mul_f32_e32 v22, v31, v22 ; 0A2C2D1F v_mov_b32_e32 v32, 0x3e22f983 ; 7E4002FF 3E22F983 v_mul_f32_e32 v25, v31, v25 ; 0A32331F v_mul_f32_e32 v22, v32, v22 ; 0A2C2D20 v_mul_f32_e32 v25, v32, v25 ; 0A323320 v_fract_f32_e32 v22, v22 ; 7E2C3716 v_fract_f32_e32 v25, v25 ; 7E323719 v_sin_f32_e32 v22, v22 ; 7E2C5316 v_sin_f32_e32 v25, v25 ; 7E325319 v_mad_f32 v24, s37, v17, -v24 ; D1C10018 84622225 v_floor_f32_e64 v33, |v24| ; D15F0121 00000118 v_sub_f32_e64 v33, |v24|, v33 ; D1020121 00024318 v_add_f32_e32 v22, v22, v25 ; 022C3316 v_mul_f32_e32 v25, v31, v29 ; 0A323B1F v_mul_f32_e32 v29, v31, v30 ; 0A3A3D1F v_xor_b32_e32 v34, v33, v26 ; 2A443521 v_cmp_ge_f32_e64 vcc, v24, -v24 ; D046006A 40023118 v_cndmask_b32_e32 v24, v34, v33, vcc ; 00304322 v_mul_f32_e32 v29, v32, v29 ; 0A3A3B20 v_mul_f32_e32 v25, v32, v25 ; 0A323320 v_mul_f32_e32 v24, v31, v24 ; 0A30311F v_fract_f32_e32 v29, v29 ; 7E3A371D v_fract_f32_e32 v25, v25 ; 7E323719 v_mul_f32_e32 v7, v7, v8 ; 0A0E1107 v_mul_f32_e32 v1, v1, v8 ; 0A021101 v_mul_f32_e32 v0, v0, v8 ; 0A001100 exp 15, 32, 0, 0, 0, v7, v1, v0, v0 ; C400020F 00000107 v_mul_f32_e32 v24, v32, v24 ; 0A303120 s_waitcnt expcnt(0) ; BF8C0F0F v_mul_f32_e32 v0, v31, v27 ; 0A00371F v_mul_f32_e32 v1, v31, v28 ; 0A02391F v_sin_f32_e32 v29, v29 ; 7E3A531D v_sin_f32_e32 v25, v25 ; 7E325319 v_fract_f32_e32 v24, v24 ; 7E303718 v_mul_f32_e32 v0, v32, v0 ; 0A000120 v_mul_f32_e32 v1, v32, v1 ; 0A020320 v_fract_f32_e32 v0, v0 ; 7E003700 v_fract_f32_e32 v1, v1 ; 7E023701 v_sin_f32_e32 v24, v24 ; 7E305318 v_sin_f32_e32 v0, v0 ; 7E005300 v_sin_f32_e32 v1, v1 ; 7E025301 v_add_f32_e32 v25, v25, v29 ; 02323B19 v_mul_f32_e32 v29, s35, v25 ; 0A3A3223 v_mac_f32_e32 v29, s34, v24 ; 2C3A3022 v_mov_b32_e32 v24, 0xc2200000 ; 7E3002FF C2200000 v_mac_f32_e32 v15, s49, v21 ; 2C1E2A31 v_mul_f32_e32 v21, s46, v17 ; 0A2A222E v_add_f32_e32 v0, v0, v1 ; 02000300 v_mul_f32_e64 v25, |v25|, s33 ; D1050119 00004319 v_madak_f32_e32 v24, s32, v24, 0xc1200000 ; 30303020 C1200000 v_mac_f32_e32 v18, v11, v29 ; 2C243B0B v_mul_f32_e32 v24, v24, v25 ; 0A303318 v_mul_f32_e32 v22, s36, v22 ; 0A2C2C24 v_mac_f32_e32 v21, v21, v0 ; 2C2A0115 v_mac_f32_e32 v22, 0x3e4ccccd, v21 ; 2C2C2AFF 3E4CCCCD v_mul_f32_e32 v30, s31, v18 ; 0A3C241F v_mac_f32_e32 v17, v11, v24 ; 2C22310B v_mul_f32_e32 v29, s12, v18 ; 0A3A240C v_mac_f32_e32 v30, s29, v17 ; 2C3C221D v_mac_f32_e32 v19, v11, v22 ; 2C262D0B v_mul_f32_e32 v33, s11, v18 ; 0A42240B v_mac_f32_e32 v29, s10, v17 ; 2C3A220A v_add_f32_e32 v20, 0, v20 ; 02282880 v_mac_f32_e32 v30, s3, v19 ; 2C3C2603 v_mul_f32_e32 v18, s30, v18 ; 0A24241E v_mac_f32_e32 v33, s9, v17 ; 2C422209 v_mac_f32_e32 v29, s1, v19 ; 2C3A2601 v_mac_f32_e32 v30, s6, v20 ; 2C3C2806 v_mac_f32_e32 v18, s2, v17 ; 2C242202 v_mac_f32_e32 v33, s0, v19 ; 2C422600 v_mac_f32_e32 v29, s5, v20 ; 2C3A2805 v_mul_f32_e32 v0, s17, v30 ; 0A003C11 v_mac_f32_e32 v18, s4, v19 ; 2C242604 v_mac_f32_e32 v33, s7, v20 ; 2C422807 v_mac_f32_e32 v0, s13, v29 ; 2C003A0D v_mul_f32_e32 v1, s19, v30 ; 0A023C13 v_mul_f32_e32 v7, v9, v16 ; 0A0E2109 v_mul_f32_e32 v8, v10, v16 ; 0A10210A v_mul_f32_e32 v9, v13, v16 ; 0A12210D v_mul_f32_e32 v10, s21, v30 ; 0A143C15 v_mul_f32_e32 v13, s23, v30 ; 0A1A3C17 v_mul_f32_e32 v23, s48, v23 ; 0A2E2E30 v_mac_f32_e32 v18, s8, v20 ; 2C242808 v_mac_f32_e32 v0, s18, v33 ; 2C004212 v_mac_f32_e32 v1, s14, v29 ; 2C023A0E v_mac_f32_e32 v10, s15, v29 ; 2C143A0F v_mac_f32_e32 v13, s16, v29 ; 2C1A3A10 v_mac_f32_e32 v0, s25, v18 ; 2C002419 exp 15, 33, 0, 0, 0, v7, v8, v9, v23 ; C400021F 17090807 v_mac_f32_e32 v1, s20, v33 ; 2C024214 v_mac_f32_e32 v10, s22, v33 ; 2C144216 v_mac_f32_e32 v13, s24, v33 ; 2C1A4218 exp 15, 34, 0, 0, 0, v15, v14, v0, v0 ; C400022F 00000E0F v_mac_f32_e32 v1, s26, v18 ; 2C02241A v_mac_f32_e32 v10, s27, v18 ; 2C14241B v_mac_f32_e32 v13, s28, v18 ; 2C1A241C exp 15, 35, 0, 0, 0, v3, v4, v6, v5 ; C400023F 05060403 v_mov_b32_e32 v16, 0 ; 7E200280 v_xor_b32_e32 v1, v1, v26 ; 2A023501 exp 15, 36, 0, 0, 0, v11, v12, v16, v16 ; C400024F 10100C0B s_waitcnt expcnt(0) ; BF8C0F0F v_mad_f32 v3, 2.0, v10, -v13 ; D1C10003 843614F4 exp 15, 12, 0, 1, 0, v0, v1, v3, v13 ; C40008CF 0D030100 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 36 Spilled VGPRs: 0 Code Size: 1912 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 7 ******************** [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=20 [traceshaders] glUseProgramStages(pipeline=20, stages=vert, program=74) [traceshaders] glUseProgramStages(pipeline=20, stages=frag, program=90) [traceshaders] glBindProgramPipeline(pipeline=20) Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v6, v2, 0, 0, [m0] ; D4180002 v_interp_p2_f32 v6, [v6], v3, 0, 0, [m0] ; D4190003 v_interp_p1_f32 v7, v2, 1, 0, [m0] ; D41C0102 v_interp_p2_f32 v7, [v7], v3, 1, 0, [m0] ; D41D0103 v_interp_p1_f32 v0, v2, 2, 0, [m0] ; D4000202 s_load_dwordx4 s[28:31], s[2:3], 0x10 ; C00A0701 00000010 v_interp_p2_f32 v0, [v0], v3, 2, 0, [m0] ; D4010203 v_interp_p1_f32 v8, v2, 0, 1, [m0] ; D4200402 v_interp_p2_f32 v8, [v8], v3, 0, 1, [m0] ; D4210403 v_interp_p1_f32 v9, v2, 1, 1, [m0] ; D4240502 v_interp_p2_f32 v9, [v9], v3, 1, 1, [m0] ; D4250503 v_interp_p1_f32 v1, v2, 2, 1, [m0] ; D4040602 v_interp_p2_f32 v1, [v1], v3, 2, 1, [m0] ; D4050603 v_interp_p1_f32 v10, v2, 3, 1, [m0] ; D4280702 v_interp_p2_f32 v10, [v10], v3, 3, 1, [m0] ; D4290703 v_interp_p1_f32 v4, v2, 0, 2, [m0] ; D4100802 v_interp_p2_f32 v4, [v4], v3, 0, 2, [m0] ; D4110803 v_interp_p1_f32 v5, v2, 1, 2, [m0] ; D4140902 v_mul_f32_e32 v2, v9, v6 ; 0A040D09 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s24, s[28:31], 0x2d0 ; C022060E 000002D0 s_buffer_load_dword s18, s[28:31], 0x3a0 ; C022048E 000003A0 s_buffer_load_dword s16, s[28:31], 0x3a4 ; C022040E 000003A4 s_buffer_load_dword s14, s[28:31], 0x3a8 ; C022038E 000003A8 s_buffer_load_dword s12, s[28:31], 0x3ac ; C022030E 000003AC s_buffer_load_dword s20, s[28:31], 0x3b0 ; C022050E 000003B0 s_buffer_load_dword s21, s[28:31], 0x3b4 ; C022054E 000003B4 s_buffer_load_dword s22, s[28:31], 0x3b8 ; C022058E 000003B8 s_buffer_load_dword s23, s[28:31], 0x3bc ; C02205CE 000003BC s_buffer_load_dword s19, s[28:31], 0x3c0 ; C02204CE 000003C0 s_buffer_load_dword s17, s[28:31], 0x3c4 ; C022044E 000003C4 s_buffer_load_dword s15, s[28:31], 0x3c8 ; C02203CE 000003C8 s_buffer_load_dword s13, s[28:31], 0x3cc ; C022034E 000003CC s_buffer_load_dword s9, s[28:31], 0x3d0 ; C022024E 000003D0 s_buffer_load_dword s8, s[28:31], 0x3d4 ; C022020E 000003D4 s_buffer_load_dword s7, s[28:31], 0x3d8 ; C02201CE 000003D8 s_buffer_load_dword s0, s[28:31], 0x3dc ; C022000E 000003DC s_buffer_load_dword s1, s[28:31], 0x3e0 ; C022004E 000003E0 s_buffer_load_dword s2, s[28:31], 0x3e4 ; C022008E 000003E4 s_buffer_load_dword s3, s[28:31], 0x3e8 ; C02200CE 000003E8 s_buffer_load_dword s6, s[28:31], 0x3ec ; C022018E 000003EC v_mad_f32 v2, v8, v7, -v2 ; D1C10002 840A0F08 v_interp_p2_f32 v5, [v5], v3, 1, 2, [m0] ; D4150903 v_mul_f32_e32 v2, v10, v2 ; 0A04050A v_mul_f32_e32 v3, v0, v0 ; 0A060100 v_mac_f32_e32 v3, v2, v2 ; 2C060502 v_mac_f32_e32 v3, v1, v1 ; 2C060301 v_cmp_nlt_f32_e32 vcc, 0, v3 ; 7C9C0680 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[26:27], vcc ; BE9A206A s_xor_b64 s[26:27], exec, s[26:27] ; 889A1A7E v_cmp_le_f32_e32 vcc, 0, v3 ; 7C860680 v_mul_f32_e32 v6, 0x7f7fffff, v3 ; 0A0C06FF 7F7FFFFF v_mov_b32_e32 v7, 0xff7fffff ; 7E0E02FF FF7FFFFF v_cndmask_b32_e32 v6, v7, v6, vcc ; 000C0D07 s_or_saveexec_b64 s[26:27], s[26:27] ; BE9A211A s_load_dwordx8 s[28:35], s[4:5], 0x0 ; C00E0702 00000000 s_load_dwordx4 s[36:39], s[4:5], 0x30 ; C00A0902 00000030 s_waitcnt lgkmcnt(0) ; BF8C007F s_xor_b64 exec, exec, s[26:27] ; 88FE1A7E v_rsq_f32_e32 v6, v3 ; 7E0C4903 s_or_b64 exec, exec, s[26:27] ; 87FE1A7E v_mul_f32_e32 v2, v2, v6 ; 0A040D02 v_mul_f32_e32 v1, v1, v6 ; 0A020D01 v_mad_f32 v2, v2, 0.5, 0.5 ; D1C10002 03C1E102 v_mad_f32 v11, v1, 0.5, 0.5 ; D1C1000B 03C1E101 v_add_f32_e64 v1, 0, v2 clamp ; D1018001 00020480 v_mov_b32_e32 v2, 0xc0400000 ; 7E0402FF C0400000 v_add_f32_e32 v2, s24, v2 ; 02040418 v_mul_f32_e32 v2, 0x3b000000, v2 ; 0A0404FF 3B000000 v_add_f32_e64 v2, 0, v2 clamp ; D1018002 00020480 image_sample v[7:10], v[4:5], s[28:35], s[36:39] dmask:0xf ; F0800F00 01270704 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C880480 v_sqrt_f32_e32 v2, v2 ; 7E044F02 v_mul_f32_e32 v0, v0, v6 ; 0A000D00 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v5, s21, v8 ; 0A0A1015 v_mul_f32_e32 v6, s22, v8 ; 0A0C1016 v_cndmask_b32_e64 v3, v2, 0, vcc ; D1000003 01A90102 v_mul_f32_e32 v2, s20, v8 ; 0A041014 v_mul_f32_e32 v8, s23, v8 ; 0A101017 v_mac_f32_e32 v2, s18, v7 ; 2C040E12 v_mac_f32_e32 v5, s16, v7 ; 2C0A0E10 v_mac_f32_e32 v6, s14, v7 ; 2C0C0E0E v_mac_f32_e32 v8, s12, v7 ; 2C100E0C v_mac_f32_e32 v2, s19, v9 ; 2C041213 v_mac_f32_e32 v5, s17, v9 ; 2C0A1211 v_mac_f32_e32 v6, s15, v9 ; 2C0C120F v_mac_f32_e32 v8, s13, v9 ; 2C10120D v_mad_f32 v0, v0, 0.5, 0.5 ; D1C10000 03C1E100 v_mac_f32_e32 v5, s8, v10 ; 2C0A1408 v_mac_f32_e32 v6, s7, v10 ; 2C0C1407 v_mac_f32_e32 v2, s9, v10 ; 2C041409 v_mac_f32_e32 v8, s0, v10 ; 2C101400 v_add_f32_e32 v4, s1, v2 ; 02080401 v_add_f32_e64 v0, 0, v0 clamp ; D1018000 00020080 v_add_f32_e32 v5, s2, v5 ; 020A0A02 v_add_f32_e32 v6, s3, v6 ; 020C0C03 v_add_f32_e32 v7, s6, v8 ; 020E1006 v_add_f32_e64 v2, 0, v11 clamp ; D1018002 00021680 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; C400040F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e64 v0, v4, v5 ; D2960000 00020B04 v_cvt_pkrtz_f16_f32_e64 v1, v6, v7 ; D2960001 00020F06 exp 15, 1, 1, 1, 1, v0, v1, v0, v0 ; C4001C1F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 80 VGPRS: 16 Spilled VGPRs: 0 Code Size: 616 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** [...] [traceshaders] glGenProgramPipelines(): [traceshaders] - pipeline=105 [traceshaders] glUseProgramStages(pipeline=105, stages=vert, program=384) [traceshaders] glUseProgramStages(pipeline=105, stages=frag, program=382) [traceshaders] glBindProgramPipeline(pipeline=105) Vertex Shader as VS: Shader prolog disassembly: v_add_i32_e32 v4, vcc, s12, v0 ; 3208000C v_mov_b32_e32 v5, v4 ; 7E0A0304 v_mov_b32_e32 v6, v4 ; 7E0C0304 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v8, v4 ; 7E100304 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v10, v4 ; 7E140304 Shader main disassembly: s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C00A0105 00000000 s_load_dwordx4 s[12:15], s[10:11], 0x10 ; C00A0305 00000010 s_load_dwordx4 s[16:19], s[10:11], 0x20 ; C00A0405 00000020 v_mov_b32_e32 v1, 0x3f9ae148 ; 7E0202FF 3F9AE148 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[11:14], v4, s[4:7], 0 idxen ; E00C2000 80010B04 s_nop 0 ; BF800000 buffer_load_format_xyzw v[15:18], v5, s[12:15], 0 idxen ; E00C2000 80030F05 s_load_dwordx4 s[12:15], s[10:11], 0x30 ; C00A0305 00000030 buffer_load_format_xyzw v[3:6], v6, s[16:19], 0 idxen ; E00C2000 80040306 s_load_dwordx4 s[16:19], s[10:11], 0x40 ; C00A0405 00000040 s_load_dwordx4 s[4:7], s[2:3], 0x10 ; C00A0101 00000010 s_load_dwordx4 s[0:3], s[2:3], 0x20 ; C00A0001 00000020 s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071 buffer_load_format_xyzw v[18:21], v7, s[12:15], 0 idxen ; E00C2000 80031207 s_load_dwordx4 s[12:15], s[10:11], 0x50 ; C00A0305 00000050 s_load_dwordx4 s[8:11], s[10:11], 0x60 ; C00A0205 00000060 s_waitcnt vmcnt(0) ; BF8C0F70 buffer_load_format_xyzw v[20:23], v8, s[16:19], 0 idxen ; E00C2000 80041408 s_buffer_load_dword s27, s[4:7], 0x1ec ; C02206C2 000001EC s_buffer_load_dword s51, s[4:7], 0x280 ; C0220CC2 00000280 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 buffer_load_format_xyzw v[22:25], v9, s[12:15], 0 idxen ; E00C2000 80031609 s_nop 0 ; BF800000 buffer_load_format_xyzw v[7:10], v10, s[8:11], 0 idxen ; E00C2000 8002070A s_buffer_load_dword s11, s[4:7], 0x12c ; C02202C2 0000012C v_mov_b32_e32 v0, s27 ; 7E00021B s_waitcnt vmcnt(0) ; BF8C0F70 v_bfrev_b32_e32 v9, 1 ; 7E125881 s_buffer_load_dword s54, s[4:7], 0x290 ; C0220D82 00000290 s_buffer_load_dword s36, s[4:7], 0x220 ; C0220902 00000220 s_waitcnt lgkmcnt(0) ; BF8C007F v_mac_f32_e32 v0, s11, v1 ; 2C00020B v_floor_f32_e64 v1, |v0| ; D15F0101 00000100 v_sub_f32_e64 v1, |v0|, v1 ; D1020101 00020300 v_xor_b32_e32 v10, v1, v9 ; 2A141301 v_cmp_ge_f32_e64 vcc, v0, -v0 ; D046006A 40020100 v_cndmask_b32_e32 v1, v10, v1, vcc ; 0002030A v_mul_f32_e32 v10, 0x3f44ec50, v0 ; 0A1400FF 3F44EC50 v_floor_f32_e64 v24, |v10| ; D15F0118 0000010A v_sub_f32_e64 v24, |v10|, v24 ; D1020118 0002310A v_xor_b32_e32 v25, v24, v9 ; 2A321318 v_cmp_ge_f32_e64 vcc, v10, -v10 ; D046006A 4002150A v_cndmask_b32_e32 v10, v25, v24, vcc ; 00143119 v_mov_b32_e32 v24, s51 ; 7E300233 s_buffer_load_dword s28, s[4:7], 0x200 ; C0220702 00000200 s_buffer_load_dword s35, s[4:7], 0x21c ; C02208C2 0000021C s_buffer_load_dword s26, s[4:7], 0x1e8 ; C0220682 000001E8 s_buffer_load_dword s29, s[4:7], 0x204 ; C0220742 00000204 s_buffer_load_dword s31, s[4:7], 0x20c ; C02207C2 0000020C s_buffer_load_dword s41, s[4:7], 0x24c ; C0220A42 0000024C s_buffer_load_dword s38, s[4:7], 0x240 ; C0220982 00000240 s_buffer_load_dword s40, s[4:7], 0x248 ; C0220A02 00000248 s_buffer_load_dword s39, s[4:7], 0x244 ; C02209C2 00000244 s_buffer_load_dword s52, s[4:7], 0x284 ; C0220D02 00000284 s_buffer_load_dword s55, s[4:7], 0x294 ; C0220DC2 00000294 s_buffer_load_dword s33, s[4:7], 0x214 ; C0220842 00000214 s_buffer_load_dword s53, s[4:7], 0x288 ; C0220D42 00000288 s_buffer_load_dword s32, s[4:7], 0x210 ; C0220802 00000210 s_buffer_load_dword s20, s[4:7], 0x0 ; C0220502 00000000 s_buffer_load_dword s21, s[4:7], 0x4 ; C0220542 00000004 s_buffer_load_dword s22, s[4:7], 0x8 ; C0220582 00000008 s_buffer_load_dword s23, s[4:7], 0xc ; C02205C2 0000000C s_buffer_load_dword s24, s[4:7], 0x10 ; C0220602 00000010 s_buffer_load_dword s16, s[4:7], 0x14 ; C0220402 00000014 s_buffer_load_dword s17, s[4:7], 0x18 ; C0220442 00000018 s_buffer_load_dword s18, s[4:7], 0x1c ; C0220482 0000001C s_buffer_load_dword s19, s[4:7], 0x20 ; C02204C2 00000020 s_buffer_load_dword s12, s[4:7], 0x24 ; C0220302 00000024 s_buffer_load_dword s13, s[4:7], 0x28 ; C0220342 00000028 s_buffer_load_dword s14, s[4:7], 0x2c ; C0220382 0000002C s_buffer_load_dword s15, s[4:7], 0x30 ; C02203C2 00000030 s_buffer_load_dword s8, s[4:7], 0x34 ; C0220202 00000034 s_buffer_load_dword s9, s[4:7], 0x38 ; C0220242 00000038 s_buffer_load_dword s10, s[4:7], 0x3c ; C0220282 0000003C s_buffer_load_dword s25, s[4:7], 0x1e4 ; C0220642 000001E4 s_buffer_load_dword s30, s[4:7], 0x208 ; C0220782 00000208 s_buffer_load_dword s34, s[4:7], 0x218 ; C0220882 00000218 s_buffer_load_dword s37, s[4:7], 0x224 ; C0220942 00000224 s_buffer_load_dword s42, s[4:7], 0x250 ; C0220A82 00000250 s_buffer_load_dword s43, s[4:7], 0x254 ; C0220AC2 00000254 s_buffer_load_dword s44, s[4:7], 0x258 ; C0220B02 00000258 s_buffer_load_dword s45, s[4:7], 0x260 ; C0220B42 00000260 s_buffer_load_dword s46, s[4:7], 0x264 ; C0220B82 00000264 s_buffer_load_dword s47, s[4:7], 0x268 ; C0220BC2 00000268 s_buffer_load_dword s48, s[4:7], 0x270 ; C0220C02 00000270 s_buffer_load_dword s49, s[4:7], 0x274 ; C0220C42 00000274 s_buffer_load_dword s50, s[4:7], 0x278 ; C0220C82 00000278 s_buffer_load_dword s4, s[4:7], 0x298 ; C0220102 00000298 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v30, s53 ; 7E3C0235 s_buffer_load_dword s57, s[0:3], 0x10 ; C0220E40 00000010 s_buffer_load_dword s58, s[0:3], 0x14 ; C0220E80 00000014 v_mac_f32_e32 v24, s54, v11 ; 2C301636 v_mad_f32 v11, s36, v24, v0 ; D1C1000B 04023024 v_mul_f32_e32 v11, 0.5, v11 ; 0A1616F0 v_floor_f32_e64 v25, |v11| ; D15F0119 0000010B v_sub_f32_e64 v25, |v11|, v25 ; D1020119 0002330B v_xor_b32_e32 v26, v25, v9 ; 2A341319 v_cmp_ge_f32_e64 vcc, v11, -v11 ; D046006A 4002170B v_cndmask_b32_e32 v11, v26, v25, vcc ; 0016331A v_add_f32_e32 v25, s28, v0 ; 0232001C v_mac_f32_e32 v0, s35, v24 ; 2C003023 v_floor_f32_e64 v26, |v0| ; D15F011A 00000100 v_sub_f32_e64 v26, |v0|, v26 ; D102011A 00023500 v_xor_b32_e32 v27, v26, v9 ; 2A36131A v_cmp_ge_f32_e64 vcc, v0, -v0 ; D046006A 40020100 v_cndmask_b32_e32 v0, v27, v26, vcc ; 0000351B v_mad_f32 v26, s26, v24, -v25 ; D1C1001A 8466301A v_mul_f32_e32 v26, 0x3ee8ba2e, v26 ; 0A3434FF 3EE8BA2E v_floor_f32_e64 v27, |v26| ; D15F011B 0000011A v_sub_f32_e64 v27, |v26|, v27 ; D102011B 0002371A v_xor_b32_e32 v28, v27, v9 ; 2A38131B v_cmp_ge_f32_e64 vcc, v26, -v26 ; D046006A 4002351A v_cndmask_b32_e32 v26, v28, v27, vcc ; 0034371C v_mad_f32 v27, s29, v24, -v25 ; D1C1001B 8466301D v_mul_f32_e32 v27, 0x3e50fac6, v27 ; 0A3636FF 3E50FAC6 v_floor_f32_e64 v28, |v27| ; D15F011C 0000011B v_sub_f32_e64 v28, |v27|, v28 ; D102011C 0002391B v_xor_b32_e32 v29, v28, v9 ; 2A3A131C v_cmp_ge_f32_e64 vcc, v27, -v27 ; D046006A 4002371B v_mad_f32 v25, s31, v24, -v25 ; D1C10019 8466301F v_cndmask_b32_e32 v27, v29, v28, vcc ; 0036391D v_floor_f32_e64 v28, |v25| ; D15F011C 00000119 v_sub_f32_e64 v28, |v25|, v28 ; D102011C 00023919 v_xor_b32_e32 v29, v28, v9 ; 2A3A131C v_cmp_ge_f32_e64 vcc, v25, -v25 ; D046006A 40023319 v_cndmask_b32_e32 v25, v29, v28, vcc ; 0032391D v_mov_b32_e32 v28, s41 ; 7E380229 v_mac_f32_e32 v28, s38, v7 ; 2C380E26 v_mov_b32_e32 v7, s40 ; 7E0E0228 v_mac_f32_e32 v7, s39, v8 ; 2C0E1027 v_mov_b32_e32 v8, s52 ; 7E100234 v_mac_f32_e32 v8, s55, v12 ; 2C101837 v_mov_b32_e32 v12, 0x40c90fdb ; 7E1802FF 40C90FDB v_mul_f32_e32 v11, v12, v11 ; 0A16170C v_mov_b32_e32 v29, 0x3e22f983 ; 7E3A02FF 3E22F983 v_mul_f32_e32 v0, v12, v0 ; 0A00010C v_mul_f32_e32 v11, v29, v11 ; 0A16171D v_mul_f32_e32 v0, v29, v0 ; 0A00011D v_fract_f32_e32 v11, v11 ; 7E16370B v_fract_f32_e32 v0, v0 ; 7E003700 v_sin_f32_e32 v11, v11 ; 7E16530B v_sin_f32_e32 v0, v0 ; 7E005300 v_mul_f32_e32 v26, v12, v26 ; 0A34350C v_mul_f32_e32 v26, v29, v26 ; 0A34351D v_fract_f32_e32 v26, v26 ; 7E34371A v_add_f32_e32 v0, v0, v11 ; 02001700 v_mul_f32_e32 v11, v12, v27 ; 0A16370C v_mul_f32_e32 v11, v29, v11 ; 0A16171D v_fract_f32_e32 v11, v11 ; 7E16370B v_mul_f32_e32 v25, v12, v25 ; 0A32330C v_mul_f32_e32 v25, v29, v25 ; 0A32331D v_mul_f32_e32 v1, v12, v1 ; 0A02030C v_mul_f32_e32 v10, v12, v10 ; 0A14150C v_sin_f32_e32 v11, v11 ; 7E16530B v_sin_f32_e32 v26, v26 ; 7E34531A v_fract_f32_e32 v25, v25 ; 7E323719 v_mul_f32_e32 v1, v29, v1 ; 0A02031D v_mul_f32_e32 v10, v29, v10 ; 0A14151D v_fract_f32_e32 v1, v1 ; 7E023701 v_fract_f32_e32 v10, v10 ; 7E14370A v_sin_f32_e32 v25, v25 ; 7E325319 v_sin_f32_e32 v1, v1 ; 7E025301 v_sin_f32_e32 v10, v10 ; 7E14530A v_add_f32_e32 v11, v26, v11 ; 0216171A v_mul_f32_e32 v26, s33, v11 ; 0A341621 v_mac_f32_e32 v26, s32, v25 ; 2C343220 v_mov_b32_e32 v25, 0xc1200000 ; 7E3202FF C1200000 v_mov_b32_e32 v27, 0xc2200000 ; 7E3602FF C2200000 v_mac_f32_e32 v30, s4, v13 ; 2C3C1A04 v_add_f32_e32 v1, v1, v10 ; 02021501 v_mul_f32_e32 v13, s34, v24 ; 0A1A3022 v_mul_f32_e64 v11, |v11|, s30 ; D105010B 00003D0B v_mac_f32_e32 v25, s25, v27 ; 2C323619 v_mac_f32_e32 v8, v22, v26 ; 2C103516 v_mul_f32_e32 v11, v25, v11 ; 0A161719 v_mac_f32_e32 v13, v13, v1 ; 2C1A030D v_mul_f32_e32 v0, s37, v0 ; 0A000025 v_mac_f32_e32 v0, 0x3e4ccccd, v13 ; 2C001AFF 3E4CCCCD v_mac_f32_e32 v24, v22, v11 ; 2C301716 v_mul_f32_e32 v27, s16, v8 ; 0A361010 v_mul_f32_e32 v26, s24, v8 ; 0A341018 v_mul_f32_e32 v31, s17, v8 ; 0A3E1011 v_mul_f32_e32 v8, s18, v8 ; 0A101012 v_mac_f32_e32 v30, v22, v0 ; 2C3C0116 v_mac_f32_e32 v27, s21, v24 ; 2C363015 s_buffer_load_dword s59, s[0:3], 0x18 ; C0220EC0 00000018 s_buffer_load_dword s60, s[0:3], 0x1c ; C0220F00 0000001C v_mac_f32_e32 v26, s20, v24 ; 2C343014 v_mac_f32_e32 v31, s22, v24 ; 2C3E3016 v_mac_f32_e32 v8, s23, v24 ; 2C103017 s_buffer_load_dword s5, s[0:3], 0x0 ; C0220140 00000000 s_buffer_load_dword s6, s[0:3], 0x4 ; C0220180 00000004 s_buffer_load_dword s7, s[0:3], 0x8 ; C02201C0 00000008 s_buffer_load_dword s56, s[0:3], 0xc ; C0220E00 0000000C v_mac_f32_e32 v27, s12, v30 ; 2C363C0C v_add_f32_e32 v0, 0, v14 ; 02001C80 v_mac_f32_e32 v26, s19, v30 ; 2C343C13 v_mac_f32_e32 v31, s13, v30 ; 2C3E3C0D v_mac_f32_e32 v8, s14, v30 ; 2C103C0E v_mac_f32_e32 v27, s8, v0 ; 2C360008 s_buffer_load_dword s61, s[0:3], 0x20 ; C0220F40 00000020 s_buffer_load_dword s62, s[0:3], 0x24 ; C0220F80 00000024 s_buffer_load_dword s63, s[0:3], 0x28 ; C0220FC0 00000028 s_buffer_load_dword s64, s[0:3], 0x2c ; C0221000 0000002C s_buffer_load_dword s65, s[0:3], 0x30 ; C0221040 00000030 s_buffer_load_dword s66, s[0:3], 0x34 ; C0221080 00000034 s_buffer_load_dword s67, s[0:3], 0x38 ; C02210C0 00000038 s_buffer_load_dword s72, s[0:3], 0x3c ; C0221200 0000003C s_buffer_load_dword s0, s[0:3], 0x4c ; C0220000 0000004C v_mac_f32_e32 v26, s15, v0 ; 2C34000F v_mac_f32_e32 v31, s9, v0 ; 2C3E0009 v_mac_f32_e32 v8, s10, v0 ; 2C10000A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s57, v27 ; 0A003639 exp 15, 32, 0, 0, 0, v28, v7, v0, v0 ; C400020F 0000071C exp 15, 33, 0, 0, 0, v18, v19, v21, v20 ; C400021F 14151312 v_mul_f32_e32 v14, s58, v27 ; 0A1C363A s_waitcnt expcnt(0) ; BF8C0F0F v_mul_f32_e32 v18, s59, v27 ; 0A24363B v_mul_f32_e32 v19, s60, v27 ; 0A26363C v_add_f32_e32 v1, v16, v16 ; 02022110 v_add_f32_e32 v10, v17, v17 ; 02142311 v_mad_f32 v11, 2.0, v17, -1.0 ; D1C1000B 03CE22F4 v_add_f32_e32 v17, v5, v5 ; 02220B05 v_mad_f32 v5, 2.0, v5, -1.0 ; D1C10005 03CE0AF4 v_add_f32_e32 v12, v15, v15 ; 02181F0F v_mad_f32 v13, 2.0, v15, -1.0 ; D1C1000D 03CE1EF4 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_mac_f32_e32 v0, s5, v26 ; 2C003405 v_mac_f32_e32 v14, s6, v26 ; 2C1C3406 v_mac_f32_e32 v18, s7, v26 ; 2C243407 v_mac_f32_e32 v19, s56, v26 ; 2C263438 v_mad_f32 v7, 2.0, v16, -1.0 ; D1C10007 03CE20F4 v_add_f32_e32 v16, v3, v3 ; 02200703 exp 15, 34, 0, 0, 0, v22, v23, v15, v15 ; C400022F 0F0F1716 v_mad_f32 v3, 2.0, v3, -1.0 ; D1C10003 03CE06F4 s_waitcnt expcnt(0) ; BF8C0F0F v_add_f32_e32 v15, v4, v4 ; 021E0904 v_mad_f32 v4, 2.0, v4, -1.0 ; D1C10004 03CE08F4 v_mad_f32 v1, v1, v5, -v5 ; D1C10001 84160B01 v_mac_f32_e32 v0, s61, v31 ; 2C003E3D v_mac_f32_e32 v14, s62, v31 ; 2C1C3E3E v_mac_f32_e32 v18, s63, v31 ; 2C243E3F v_mac_f32_e32 v19, s64, v31 ; 2C263E40 v_mul_f32_e32 v20, s0, v27 ; 0A283600 v_mad_f32 v10, v10, v3, -v3 ; D1C1000A 840E070A v_mad_f32 v1, v4, v11, -v1 ; D1C10001 84061704 v_add_f32_e32 v6, v6, v6 ; 020C0D06 v_mad_f32 v11, v12, v4, -v4 ; D1C1000B 8412090C v_mad_f32 v10, v5, v13, -v10 ; D1C1000A 842A1B05 v_mad_f32 v7, v3, v7, -v11 ; D1C10007 842E0F03 v_mad_f32 v1, v6, v1, -v1 ; D1C10001 84060306 v_mac_f32_e32 v0, s65, v8 ; 2C001041 v_mac_f32_e32 v14, s66, v8 ; 2C1C1042 v_mac_f32_e32 v18, s67, v8 ; 2C241043 v_mac_f32_e32 v19, s72, v8 ; 2C261048 v_mul_f32_e32 v8, s0, v26 ; 0A103400 v_mul_f32_e32 v22, s45, v20 ; 0A2C282D v_mul_f32_e32 v23, s46, v20 ; 0A2E282E v_mad_f32 v7, v6, v7, -v7 ; D1C10007 841E0F06 v_mad_f32 v12, v17, v1, -v1 ; D1C1000C 84060311 v_mul_f32_e32 v21, s0, v31 ; 0A2A3E00 v_mul_f32_e32 v20, s47, v20 ; 0A28282F v_mad_f32 v22, -v8, s42, -v22 ; D1C10016 A4585508 v_mad_f32 v10, v6, v10, -v10 ; D1C1000A 842A1506 v_mad_f32 v3, v7, v3, -v12 ; D1C10003 84320707 v_mad_f32 v12, v16, v10, -v10 ; D1C1000C 842A1510 v_mad_f32 v23, -v8, s43, -v23 ; D1C10017 A45C5708 v_mad_f32 v8, -v8, s44, -v20 ; D1C10008 A4505908 v_mad_f32 v20, -v21, s48, v22 ; D1C10014 24586115 v_mad_f32 v11, v15, v7, -v7 ; D1C1000B 841E0F0F v_mad_f32 v12, v1, v4, -v12 ; D1C1000C 84320901 v_mad_f32 v22, -v21, s49, v23 ; D1C10016 245C6315 v_mul_f32_e32 v1, v20, v1 ; 0A020314 v_mad_f32 v11, v10, v5, -v11 ; D1C1000B 842E0B0A v_mac_f32_e32 v1, v22, v10 ; 2C021516 v_mad_f32 v10, v16, v20, -v20 ; D1C1000A 84522910 v_mac_f32_e32 v10, v22, v4 ; 2C140916 v_mad_f32 v4, v6, v11, -v11 ; D1C10004 842E1706 v_mul_f32_e32 v4, v20, v4 ; 0A080914 v_mad_f32 v3, v6, v3, -v3 ; D1C10003 840E0706 v_mad_f32 v8, -v21, s50, v8 ; D1C10008 24206515 v_mac_f32_e32 v4, v22, v3 ; 2C080716 v_mad_f32 v3, v6, v12, -v12 ; D1C10003 84321906 v_mac_f32_e32 v4, v8, v3 ; 2C080708 v_mac_f32_e32 v1, v8, v7 ; 2C020F08 v_mac_f32_e32 v10, v8, v5 ; 2C140B08 exp 15, 35, 0, 0, 0, v0, v14, v18, v19 ; C400023F 13120E00 v_mov_b32_e32 v3, 1.0 ; 7E0602F2 exp 15, 36, 0, 0, 0, v4, v1, v10, v3 ; C400024F 030A0104 s_waitcnt expcnt(0) ; BF8C0F0F v_xor_b32_e32 v1, v14, v9 ; 2A02130E v_mad_f32 v3, 2.0, v18, -v19 ; D1C10003 844E24F4 exp 15, 12, 0, 1, 0, v0, v1, v3, v19 ; C40008CF 13030100 s_waitcnt expcnt(0) ; BF8C0F0F Shader epilog disassembly: s_endpgm ; BF810000 *** SHADER STATS *** SGPRS: 80 VGPRS: 32 Spilled VGPRs: 0 Code Size: 1840 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 8 ******************** Pixel Shader: Shader main disassembly: s_wqm_b64 exec, exec ; BEFE077E s_mov_b32 m0, s11 ; BEFC000B v_interp_p1_f32 v0, v2, 0, 0, [m0] ; D4000002 v_interp_p2_f32 v0, [v0], v3, 0, 0, [m0] ; D4010003 v_interp_p1_f32 v1, v2, 1, 0, [m0] ; D4040102 v_interp_p2_f32 v1, [v1], v3, 1, 0, [m0] ; D4050103 v_interp_p1_f32 v7, v2, 0, 1, [m0] ; D41C0402 v_interp_p2_f32 v7, [v7], v3, 0, 1, [m0] ; D41D0403 v_interp_p1_f32 v8, v2, 1, 1, [m0] ; D4200502 v_interp_p2_f32 v8, [v8], v3, 1, 1, [m0] ; D4210503 v_interp_p1_f32 v5, v2, 0, 2, [m0] ; D4140802 v_interp_p2_f32 v5, [v5], v3, 0, 2, [m0] ; D4150803 v_interp_p1_f32 v4, v2, 1, 2, [m0] ; D4100902 v_interp_p2_f32 v4, [v4], v3, 1, 2, [m0] ; D4110903 v_interp_p1_f32 v6, v2, 3, 2, [m0] ; D4180B02 v_interp_p2_f32 v6, [v6], v3, 3, 2, [m0] ; D4190B03 v_interp_p1_f32 v9, v2, 0, 3, [m0] ; D4240C02 v_interp_p2_f32 v9, [v9], v3, 0, 3, [m0] ; D4250C03 v_interp_p1_f32 v10, v2, 1, 3, [m0] ; D4280D02 v_interp_p2_f32 v10, [v10], v3, 1, 3, [m0] ; D4290D03 v_interp_p1_f32 v2, v2, 2, 3, [m0] ; D4080E02 v_mul_f32_e32 v11, v9, v9 ; 0A161309 v_interp_p2_f32 v2, [v2], v3, 2, 3, [m0] ; D4090E03 v_mac_f32_e32 v11, v10, v10 ; 2C16150A v_mac_f32_e32 v11, v2, v2 ; 2C160502 v_writelane_b32 v22, s10, 0 ; D28A0016 0001000A v_cmp_nlt_f32_e32 vcc, 0, v11 ; 7C9C1680 s_and_saveexec_b64 s[0:1], vcc ; BE80206A s_xor_b64 s[0:1], exec, s[0:1] ; 8880007E v_cmp_le_f32_e32 vcc, 0, v11 ; 7C861680 v_mul_f32_e32 v3, 0x7f7fffff, v11 ; 0A0616FF 7F7FFFFF v_mov_b32_e32 v12, 0xff7fffff ; 7E1802FF FF7FFFFF v_cndmask_b32_e32 v3, v12, v3, vcc ; 0006070C s_or_saveexec_b64 s[0:1], s[0:1] ; BE802100 s_xor_b64 exec, exec, s[0:1] ; 88FE007E v_rsq_f32_e32 v3, v11 ; 7E06490B s_or_b64 exec, exec, s[0:1] ; 87FE007E s_load_dwordx4 s[8:11], s[2:3], 0x10 ; C00A0201 00000010 s_load_dwordx4 s[20:23], s[2:3], 0x20 ; C00A0501 00000020 v_mul_f32_e32 v9, v9, v3 ; 0A120709 v_mul_f32_e32 v11, v2, v3 ; 0A160702 v_mul_f32_e32 v10, v10, v3 ; 0A14070A v_mad_f32 v9, v11, 0, -v9 ; D1C10009 8425010B v_mad_f32 v10, v11, 0, -v10 ; D1C1000A 8429010B v_mac_f32_e32 v11, v2, v3 ; 2C160702 v_mad_f32 v2, -v3, v2, v11 ; D1C10002 242E0503 v_mov_b32_e32 v11, 0x3f13cd3a ; 7E1602FF 3F13CD3A v_mul_f32_e32 v3, v11, v2 ; 0A06050B v_mov_b32_e32 v14, 0xbf3504f3 ; 7E1C02FF BF3504F3 v_mov_b32_e32 v12, v3 ; 7E180303 v_mac_f32_e32 v3, v9, v14 ; 2C061D09 v_mov_b32_e32 v14, 0xbed105ec ; 7E1C02FF BED105EC v_mul_f32_e32 v9, 0x3f3504f3, v9 ; 0A1212FF 3F3504F3 v_mac_f32_e32 v12, 0x3f5105ec, v10 ; 2C1814FF 3F5105EC v_mac_f32_e32 v9, v14, v10 ; 2C12150E v_mac_f32_e32 v9, v11, v2 ; 2C12050B v_mac_f32_e32 v3, v14, v10 ; 2C06150E v_add_f32_e64 v12, 0, v12 clamp ; D101800C 00021880 v_max_f32_e32 v10, 0x358637bd, v12 ; 161418FF 358637BD v_add_f32_e64 v3, 0, v3 clamp ; D1018003 00020680 v_add_f32_e64 v9, 0, v9 clamp ; D1018009 00021280 v_mov_b32_e32 v2, 0xff7fffff ; 7E0402FF FF7FFFFF v_cmp_nge_f32_e32 vcc, 0, v10 ; 7C921480 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[0:1], vcc ; BE80206A s_xor_b64 s[0:1], exec, s[0:1] ; 8880007E v_log_f32_e32 v2, v10 ; 7E04430A s_or_b64 exec, exec, s[0:1] ; 87FE007E s_buffer_load_dword s2, s[20:23], 0x150 ; C022008A 00000150 s_buffer_load_dword s15, s[8:11], 0x0 ; C02203C4 00000000 s_buffer_load_dword s14, s[8:11], 0x4 ; C0220384 00000004 s_buffer_load_dword s19, s[8:11], 0x8 ; C02204C4 00000008 s_buffer_load_dword s72, s[8:11], 0xc ; C0221204 0000000C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s2, 3 ; D28A0016 00010602 s_buffer_load_dword s2, s[20:23], 0x154 ; C022008A 00000154 s_buffer_load_dword s9, s[20:23], 0x160 ; C022024A 00000160 s_buffer_load_dword s11, s[20:23], 0x164 ; C02202CA 00000164 s_buffer_load_dword s12, s[20:23], 0x168 ; C022030A 00000168 s_buffer_load_dword s65, s[20:23], 0x180 ; C022104A 00000180 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s2, 2 ; D28A0016 00010402 s_buffer_load_dword s2, s[20:23], 0x158 ; C022008A 00000158 s_buffer_load_dword s66, s[20:23], 0x184 ; C022108A 00000184 s_buffer_load_dword s67, s[20:23], 0x188 ; C02210CA 00000188 s_buffer_load_dword s60, s[20:23], 0x1c0 ; C0220F0A 000001C0 s_buffer_load_dword s62, s[20:23], 0x1c4 ; C0220F8A 000001C4 s_buffer_load_dword s13, s[20:23], 0x1c8 ; C022034A 000001C8 s_buffer_load_dword s8, s[20:23], 0x1cc ; C022020A 000001CC s_buffer_load_dword s61, s[20:23], 0x1d0 ; C0220F4A 000001D0 s_buffer_load_dword s10, s[20:23], 0x2c0 ; C022028A 000002C0 s_buffer_load_dword s63, s[20:23], 0x2c4 ; C0220FCA 000002C4 s_buffer_load_dword s64, s[20:23], 0x2c8 ; C022100A 000002C8 s_buffer_load_dword s17, s[20:23], 0x2d0 ; C022044A 000002D0 s_buffer_load_dword s18, s[20:23], 0x2d4 ; C022048A 000002D4 s_buffer_load_dword s16, s[20:23], 0x2d8 ; C022040A 000002D8 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s2, 1 ; D28A0016 00010202 v_max_f32_e32 v10, 0x358637bd, v3 ; 161406FF 358637BD v_cmp_nge_f32_e32 vcc, 0, v10 ; 7C921480 v_mov_b32_e32 v3, 0xff7fffff ; 7E0602FF FF7FFFFF s_and_saveexec_b64 vcc, vcc ; BEEA206A s_xor_b64 vcc, exec, vcc ; 88EA6A7E v_log_f32_e32 v3, v10 ; 7E06430A s_or_b64 exec, exec, vcc ; 87FE6A7E s_load_dwordx4 s[0:3], s[4:5], 0xb0 ; C00A0002 000000B0 s_load_dwordx8 s[52:59], s[4:5], 0x80 ; C00E0D02 00000080 s_load_dwordx4 s[32:35], s[4:5], 0xf0 ; C00A0802 000000F0 s_load_dwordx4 s[48:51], s[4:5], 0x130 ; C00A0C02 00000130 s_load_dwordx4 s[36:39], s[4:5], 0x30 ; C00A0902 00000030 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v22, s0, 4 ; D28A0016 00010800 v_writelane_b32 v22, s1, 5 ; D28A0016 00010A01 v_writelane_b32 v22, s2, 6 ; D28A0016 00010C02 v_writelane_b32 v22, s3, 7 ; D28A0016 00010E03 v_writelane_b32 v22, s52, 8 ; D28A0016 00011034 v_writelane_b32 v22, s53, 9 ; D28A0016 00011235 v_writelane_b32 v22, s54, 10 ; D28A0016 00011436 v_writelane_b32 v22, s55, 11 ; D28A0016 00011637 v_writelane_b32 v22, s56, 12 ; D28A0016 00011838 v_writelane_b32 v22, s57, 13 ; D28A0016 00011A39 v_writelane_b32 v22, s58, 14 ; D28A0016 00011C3A v_writelane_b32 v22, s59, 15 ; D28A0016 00011E3B s_load_dwordx8 s[40:47], s[4:5], 0x0 ; C00E0A02 00000000 s_load_dwordx8 s[20:27], s[4:5], 0x40 ; C00E0502 00000040 s_load_dwordx4 s[28:31], s[4:5], 0x70 ; C00A0702 00000070 s_load_dwordx8 s[52:59], s[4:5], 0xc0 ; C00E0D02 000000C0 s_load_dwordx8 s[0:7], s[4:5], 0x100 ; C00E0002 00000100 v_mov_b32_e32 v11, s19 ; 7E160213 v_mov_b32_e32 v10, s72 ; 7E140248 v_max_f32_e32 v12, 0x358637bd, v9 ; 161812FF 358637BD v_cmp_nge_f32_e32 vcc, 0, v12 ; 7C921880 v_mov_b32_e32 v9, 0xff7fffff ; 7E1202FF FF7FFFFF s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 vcc, vcc ; BEEA206A s_xor_b64 vcc, exec, vcc ; 88EA6A7E v_log_f32_e32 v9, v12 ; 7E12430C s_or_b64 exec, exec, vcc ; 87FE6A7E v_mul_f32_e32 v17, s60, v7 ; 0A220E3C v_rcp_f32_e32 v7, v6 ; 7E0E4506 image_sample v[14:16], v[0:1], s[20:27], s[28:31] dmask:0x7 ; F0800700 00E50E00 v_mac_f32_e32 v2, s61, v2 ; 2C04043D v_mac_f32_e32 v3, s61, v3 ; 2C06063D s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v19, s10, v14 ; 0A261C0A v_exp_f32_e32 v2, v2 ; 7E044102 v_mul_f32_e32 v18, s62, v8 ; 0A24103E v_mov_b32_e32 v8, 0x7c207e81 ; 7E1002FF 7C207E81 v_cmp_eq_f32_e32 vcc, 0, v6 ; 7C840C80 v_cndmask_b32_e32 v6, v7, v8, vcc ; 000C1107 v_mac_f32_e32 v9, s61, v9 ; 2C12123D v_mul_f32_e32 v20, s63, v15 ; 0A281E3F v_exp_f32_e32 v3, v3 ; 7E064103 v_mul_f32_e32 v2, v2, v19 ; 0A042702 v_mul_f32_e32 v5, v6, v5 ; 0A0A0B06 v_mul_f32_e32 v4, v6, v4 ; 0A080906 v_mac_f32_e32 v2, v3, v20 ; 2C042903 v_mul_f32_e32 v21, s64, v16 ; 0A2A2040 v_exp_f32_e32 v7, v9 ; 7E0E4109 v_mov_b32_e32 v3, 0x3eaaaaab ; 7E0602FF 3EAAAAAB v_mac_f32_e32 v2, v7, v21 ; 2C042B07 v_mul_f32_e32 v7, v3, v19 ; 0A0E2703 v_mac_f32_e32 v10, s15, v5 ; 2C140A0F v_mac_f32_e32 v11, s14, v4 ; 2C16080E image_sample v[4:6], v[10:11], s[0:7], s[48:51] dmask:0x7 ; F0800700 0180040A v_mac_f32_e32 v7, v3, v20 ; 2C0E2903 image_sample v[10:12], v[10:11], s[52:59], s[32:35] dmask:0x7 ; F0800700 010D0A0A s_nop 0 ; BF800000 image_sample v[14:16], v[0:1], s[40:47], s[36:39] dmask:0x7 ; F0800700 012A0E00 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v0, s17, v14 ; 0A001C11 v_mac_f32_e32 v7, v3, v21 ; 2C0E2B03 v_mul_f32_e32 v1, s18, v15 ; 0A021E12 v_mul_f32_e32 v3, s16, v16 ; 0A062010 v_readlane_b32 s16, v22, 8 ; D2890010 00011116 v_readlane_b32 s17, v22, 9 ; D2890011 00011316 v_readlane_b32 s18, v22, 10 ; D2890012 00011516 v_readlane_b32 s19, v22, 11 ; D2890013 00011716 v_readlane_b32 s0, v22, 4 ; D2890000 00010916 v_readlane_b32 s20, v22, 12 ; D2890014 00011916 v_readlane_b32 s1, v22, 5 ; D2890001 00010B16 v_readlane_b32 s21, v22, 13 ; D2890015 00011B16 v_readlane_b32 s2, v22, 6 ; D2890002 00010D16 v_readlane_b32 s22, v22, 14 ; D2890016 00011D16 v_readlane_b32 s3, v22, 7 ; D2890003 00010F16 v_readlane_b32 s23, v22, 15 ; D2890017 00011F16 v_mad_f32 v4, v2, v0, v4 ; D1C10004 04120102 v_mad_f32 v5, v2, v1, v5 ; D1C10005 04160302 v_mac_f32_e32 v6, v2, v3 ; 2C0C0702 v_mad_f32 v8, v7, v0, v10 ; D1C10008 042A0107 v_mad_f32 v9, v7, v1, v11 ; D1C10009 042E0307 image_sample v[0:2], v[17:18], s[16:23], s[0:3] dmask:0x7 ; F0800700 00040011 v_mac_f32_e32 v12, v7, v3 ; 2C180707 s_waitcnt vmcnt(0) ; BF8C0F70 v_mul_f32_e32 v3, v0, v0 ; 0A060100 v_mul_f32_e32 v7, v1, v1 ; 0A0E0301 v_mul_f32_e32 v11, v3, v0 ; 0A160103 v_mul_f32_e32 v10, v2, v2 ; 0A140502 v_mad_f32 v16, v7, v1, v11 ; D1C10010 042E0307 v_mac_f32_e32 v16, v10, v2 ; 2C20050A v_mad_f32 v3, -v0, v3, v16 ; D1C10003 24420700 v_mul_f32_e32 v14, v7, v1 ; 0A1C0307 v_mad_f32 v7, -v1, v7, v16 ; D1C10007 24420F01 v_mac_f32_e32 v11, s13, v3 ; 2C16060D v_mul_f32_e32 v15, v10, v2 ; 0A1E050A v_mad_f32 v10, -v2, v10, v16 ; D1C1000A 24421502 v_mul_f32_e32 v16, s9, v0 ; 0A200009 v_mul_f32_e32 v0, s65, v11 ; 0A001641 v_mac_f32_e32 v14, s13, v7 ; 2C1C0E0D v_mul_f32_e32 v17, s11, v1 ; 0A22020B v_mul_f32_e32 v1, s66, v14 ; 0A021C42 v_mac_f32_e32 v15, s13, v10 ; 2C1E140D v_mul_f32_e32 v0, s8, v0 ; 0A000008 v_readlane_b32 s0, v22, 3 ; D2890000 00010716 v_mad_f32 v0, v4, v0, s0 ; D1C10000 00020104 v_mul_f32_e32 v18, s12, v2 ; 0A24040C v_mul_f32_e32 v2, s67, v15 ; 0A041E43 v_mul_f32_e32 v1, s8, v1 ; 0A020208 v_readlane_b32 s0, v22, 2 ; D2890000 00010516 v_mad_f32 v1, v5, v1, s0 ; D1C10001 00020305 v_mul_f32_e32 v2, s8, v2 ; 0A040408 v_readlane_b32 s0, v22, 1 ; D2890000 00010316 v_mad_f32 v2, v6, v2, s0 ; D1C10002 00020506 v_mac_f32_e32 v2, v12, v18 ; 2C04250C v_mac_f32_e32 v0, v8, v16 ; 2C002108 v_mac_f32_e32 v1, v9, v17 ; 2C022309 v_readlane_b32 s10, v22, 0 ; D289000A 00010116 v_mov_b32_e32 v3, v2 ; 7E060302 Shader epilog disassembly: v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300 v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702 exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; C4001C0F 00000100 s_endpgm ; BF810000 *** SHADER CONFIG *** SPI_PS_INPUT_ADDR = 0xd077 SPI_PS_INPUT_ENA = 0x0002 *** SHADER STATS *** SGPRS: 80 VGPRS: 24 Spilled VGPRs: 0 Code Size: 1360 bytes LDS: 0 blocks Scratch: 0 bytes per wave Max Waves: 10 ******************** [...]