$ INTEL_DEBUG=cs MESA_GLES_VERSION_OVERRIDE=3.1 LD_LIBRARY_PATH=/home/ilia/install/lib ./deqp-gles31 --deqp-visibility=hidden --deqp-case='dEQP-GLES31.functional.compute.shared_var.atomic.add.highp_int' dEQP Core git-0c2b7cdc3a0455e0486f88574021d3ccd22f28a8 (0x0c2b7cdc) starting.. target implementation = 'X11 EGL/GLX' Test case 'dEQP-GLES31.functional.compute.shared_var.atomic.add.highp_int'.. GLSL IR for native compute shader 1: ( (declare (location=0 shader_storage ) InOut sb_inout) (declare (location=0 ) uint gl_LocalInvocationIndex) (declare (location=20 sys ) uvec3 gl_NumWorkGroups) (declare (location=19 sys ) uvec3 gl_WorkGroupID) (declare (location=18 sys ) uvec3 gl_LocalInvocationID) ( function main (signature void (parameters ) ( (assign (x) (var_ref gl_LocalInvocationIndex) (expression uint + (expression uint + (expression uint * (swiz y (var_ref gl_LocalInvocationID) )(constant uint (3)) ) (expression uint * (constant uint (6)) (swiz z (var_ref gl_LocalInvocationID) )) ) (swiz x (var_ref gl_LocalInvocationID) )) ) (declare () uint offset) (declare () uint globalNdx) (declare (temporary ) uint assignment_tmp) (assign (x) (var_ref assignment_tmp) (expression uint + (expression uint + (expression uint * (expression uint * (swiz x (var_ref gl_NumWorkGroups) )(swiz y (var_ref gl_NumWorkGroups) )) (swiz z (var_ref gl_WorkGroupID) )) (expression uint * (swiz x (var_ref gl_NumWorkGroups) )(swiz y (var_ref gl_WorkGroupID) )) ) (swiz x (var_ref gl_WorkGroupID) )) ) (assign (x) (var_ref globalNdx) (var_ref assignment_tmp) ) (assign (x) (var_ref offset) (expression uint + (expression uint * (constant uint (6)) (var_ref assignment_tmp) ) (var_ref gl_LocalInvocationIndex) ) ) (if (expression bool == (var_ref gl_LocalInvocationIndex) (constant uint (0)) ) ( (call __intrinsic_store_shared ((constant uint (0)) (constant int (1)) (constant uint (1)) )) ) ()) (barrier) (declare (temporary ) int atomic_retval) (declare (temporary ) int ssbo_load_result) (call __intrinsic_load_ssbo (var_ref ssbo_load_result) ((constant uint (0)) (expression uint * (var_ref offset) (constant uint (16)) ) (constant uint (0)) )) (call __intrinsic_atomic_add_shared (var_ref atomic_retval) ((constant uint (0)) (var_ref ssbo_load_result) )) (call __intrinsic_store_ssbo ((constant uint (0)) (expression uint + (expression uint * (var_ref offset) (constant uint (16)) ) (constant uint (6144)) ) (expression uint i2u (var_ref atomic_retval) ) (constant uint (1)) (constant uint (0)) )) (barrier) (if (expression bool == (var_ref gl_LocalInvocationIndex) (constant uint (0)) ) ( (declare (temporary ) uint ssbo_store_temp_offset) (assign (x) (var_ref ssbo_store_temp_offset) (expression uint * (var_ref globalNdx) (constant uint (16)) ) ) (declare (temporary ) int shared_load_result) (call __intrinsic_load_shared (var_ref shared_load_result) ((constant uint (0)) )) (call __intrinsic_store_ssbo ((constant uint (0)) (expression uint + (var_ref ssbo_store_temp_offset) (constant uint (12288)) ) (expression uint i2u (var_ref shared_load_result) ) (constant uint (1)) (constant uint (0)) )) ) ()) )) ) ) NIR (SSA form) for compute shader: shader: MESA_SHADER_COMPUTE name: GLSL1 inputs: 0 outputs: 0 uniforms: 0 decl_var shader_storageINTERP_QUALIFIER_NONE InOut sb_inout (0, 0) decl_function main returning void impl main { block block_0: /* preds: */ vec1 ssa_0 = load_const (0x00003000 /* 0.000000 */) vec1 ssa_1 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_2 = load_const (0x00000010 /* 0.000000 */) vec1 ssa_3 = load_const (0x00001800 /* 0.000000 */) vec1 ssa_4 = load_const (0x00000001 /* 0.000000 */) vec1 ssa_5 = load_const (0x00000006 /* 0.000000 */) vec1 ssa_6 = load_const (0x00000003 /* 0.000000 */) vec3 ssa_7 = intrinsic load_local_invocation_id () () () vec1 ssa_8 = imul ssa_7.y, ssa_6 vec1 ssa_9 = imul ssa_5, ssa_7.z vec1 ssa_10 = iadd ssa_8, ssa_9 vec1 ssa_11 = iadd ssa_10, ssa_7 vec3 ssa_12 = intrinsic load_num_work_groups () () () vec1 ssa_13 = imul ssa_12, ssa_12.y vec3 ssa_14 = intrinsic load_work_group_id () () () vec1 ssa_15 = imul ssa_13, ssa_14.z vec1 ssa_16 = imul ssa_12, ssa_14.y vec1 ssa_17 = iadd ssa_15, ssa_16 vec1 ssa_18 = iadd ssa_17, ssa_14 vec1 ssa_19 = imul ssa_5, ssa_18 vec1 ssa_20 = iadd ssa_19, ssa_11 vec1 ssa_21 = ieq ssa_11, ssa_1 /* succs: block_1 block_2 */ if ssa_21 { block block_1: /* preds: block_0 */ intrinsic store_shared (ssa_4, ssa_1) () (0, 1) /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ intrinsic barrier () () () vec1 ssa_22 = imul ssa_20, ssa_2 vec1 ssa_23 = intrinsic load_ssbo (ssa_1, ssa_22) () () vec1 ssa_24 = intrinsic shared_atomic_add (ssa_1, ssa_23) () () vec1 ssa_25 = iadd ssa_22, ssa_3 intrinsic store_ssbo (ssa_24, ssa_1, ssa_25) () (1) intrinsic barrier () () () /* succs: block_4 block_5 */ if ssa_21 { block block_4: /* preds: block_3 */ vec1 ssa_26 = imul ssa_18, ssa_2 vec1 ssa_27 = intrinsic load_shared (ssa_1) () (0) vec1 ssa_28 = iadd ssa_26, ssa_0 intrinsic store_ssbo (ssa_27, ssa_1, ssa_28) () (1) /* succs: block_6 */ } else { block block_5: /* preds: block_3 */ /* succs: block_6 */ } block block_6: /* preds: block_4 block_5 */ /* succs: block_7 */ block block_7: } NIR (final form) for compute shader: shader: MESA_SHADER_COMPUTE name: GLSL1 inputs: 0 outputs: 0 uniforms: 0 decl_var shader_storageINTERP_QUALIFIER_NONE InOut sb_inout (0, 0) decl_function main returning void impl main { block block_0: /* preds: */ vec1 ssa_0 = load_const (0x00003000 /* 0.000000 */) vec1 ssa_1 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_2 = load_const (0x00000010 /* 0.000000 */) vec1 ssa_3 = load_const (0x00001800 /* 0.000000 */) vec1 ssa_4 = load_const (0x00000001 /* 0.000000 */) vec1 ssa_5 = load_const (0x00000006 /* 0.000000 */) vec1 ssa_6 = load_const (0x00000003 /* 0.000000 */) vec3 ssa_7 = intrinsic load_local_invocation_id () () () vec1 ssa_8 = imul ssa_7.y, ssa_6 vec1 ssa_9 = imul ssa_5, ssa_7.z vec1 ssa_10 = iadd ssa_8, ssa_9 vec1 ssa_11 = iadd ssa_10, ssa_7 vec3 ssa_12 = intrinsic load_num_work_groups () () () vec1 ssa_13 = imul ssa_12, ssa_12.y vec3 ssa_14 = intrinsic load_work_group_id () () () vec1 ssa_15 = imul ssa_13, ssa_14.z vec1 ssa_16 = imul ssa_12, ssa_14.y vec1 ssa_17 = iadd ssa_15, ssa_16 vec1 ssa_18 = iadd ssa_17, ssa_14 vec1 ssa_19 = imul ssa_5, ssa_18 vec1 ssa_20 = iadd ssa_19, ssa_11 vec1 ssa_21 = ieq ssa_11, ssa_1 /* succs: block_1 block_2 */ if ssa_21 { block block_1: /* preds: block_0 */ intrinsic store_shared (ssa_4, ssa_1) () (0, 1) /* succs: block_3 */ } else { block block_2: /* preds: block_0 */ /* succs: block_3 */ } block block_3: /* preds: block_1 block_2 */ intrinsic barrier () () () vec1 ssa_22 = imul ssa_20, ssa_2 vec1 ssa_23 = intrinsic load_ssbo (ssa_1, ssa_22) () () vec1 ssa_24 = intrinsic shared_atomic_add (ssa_1, ssa_23) () () vec1 ssa_25 = iadd ssa_22, ssa_3 intrinsic store_ssbo (ssa_24, ssa_1, ssa_25) () (1) intrinsic barrier () () () /* succs: block_4 block_5 */ if ssa_21 { block block_4: /* preds: block_3 */ vec1 ssa_26 = imul ssa_18, ssa_2 vec1 ssa_27 = intrinsic load_shared (ssa_1) () (0) vec1 ssa_28 = iadd ssa_26, ssa_0 intrinsic store_ssbo (ssa_27, ssa_1, ssa_28) () (1) /* succs: block_6 */ } else { block block_5: /* preds: block_3 */ /* succs: block_6 */ } block block_6: /* preds: block_4 block_5 */ /* succs: block_7 */ block block_7: } CS8 estimated execution time: 18 cycles CS16 estimated execution time: 20 cycles Native code for unnamed compute shader GLSL1 SIMD16 shader: 65 instructions. 0 loops. 15282 cycles. 0:0 spills:fills. Promoted 0 constants. Compacted 1040 to 848 bytes (18%) START B0 mov(16) g18<1>UD g1<8,8,1>UD { align1 1H compacted }; mov(16) g20<1>UD g3<8,8,1>UD { align1 1H compacted }; mov(16) g26<1>UD g0.1<0,1,0>UD { align1 1H compacted }; mov(16) g28<1>UD g0.6<0,1,0>UD { align1 1H }; mov(16) g30<1>UD 0x00000000UD { align1 1H compacted }; mov(16) g32<1>UD 0x00000004UD { align1 1H compacted }; mov(16) g1<1>UD g5<8,8,1>UD { align1 1H compacted }; mov(16) g3<1>UD g0.7<0,1,0>UD { align1 1H }; mul(16) g5<1>D g20<8,8,1>D 3D { align1 1H compacted }; send(16) g12<1>UD g30<8,8,1>UD dp data 1 ( untyped surface read, Surface = 0, SIMD16, Mask = 0xe) mlen 2 rlen 2 { align1 1H }; send(16) g14<1>UD g32<8,8,1>UD dp data 1 ( untyped surface read, Surface = 0, SIMD16, Mask = 0xe) mlen 2 rlen 2 { align1 1H }; mul(16) g7<1>D g1<8,8,1>D 6D { align1 1H compacted }; add(16) g9<1>D g5<8,8,1>D g7<8,8,1>D { align1 1H compacted }; mul(16) g6<1>D g12<8,8,1>D g28.1<16,8,2>UW { align1 1H }; mul(16) g20<1>D g12<8,8,1>D g28<16,8,2>UW { align1 1H }; mul(16) g1<1>D g12<8,8,1>D g14.1<16,8,2>UW { align1 1H }; mul(16) g16<1>D g12<8,8,1>D g14<16,8,2>UW { align1 1H }; add(16) g11<1>D g9<8,8,1>D g18<8,8,1>D { align1 1H compacted }; add(16) g20.1<2>UW g20.1<16,8,2>UW g6<16,8,2>UW { align1 1H }; add(16) g16.1<2>UW g16.1<16,8,2>UW g1<16,8,2>UW { align1 1H }; mul(16) g18<1>D g16<8,8,1>D g3<16,8,2>UW { align1 1H }; mul(16) g5<1>D g16<8,8,1>D g3.1<16,8,2>UW { align1 1H }; add(16) g18.1<2>UW g18.1<16,8,2>UW g5<16,8,2>UW { align1 1H }; add(16) g22<1>D g18<8,8,1>D g20<8,8,1>D { align1 1H compacted }; add(16) g24<1>D g22<8,8,1>D g26<8,8,1>D { align1 1H compacted }; mul(16) g26<1>D g24<8,8,1>D 6D { align1 1H compacted }; add(16) g28<1>D g26<8,8,1>D g11<8,8,1>D { align1 1H compacted }; cmp.z.f0(16) g30<1>D g11<8,8,1>D 0D { align1 1H compacted }; (+f0) if(16) JIP: 10 UIP: 10 { align1 1H }; END B0 ->B1 ->B2 START B1 <-B0 mov(8) g1<1>UD 0D { align1 WE_all 1Q }; mov(16) g2<1>UD 0x00000000UD { align1 1H compacted }; mov(16) g4<1>UD 0x00000001UD { align1 1H compacted }; mov(1) g1.7<1>UD 65535D { align1 WE_all }; send(16) null<1>F g1<8,8,1>UD dp data 1 ( DC untyped surface write, Surface = 254, SIMD16, Mask = 0xe) mlen 5 rlen 0 { align1 1H }; END B1 ->B2 START B2 <-B0 <-B1 endif(16) JIP: 2 { align1 1H }; mov(8) g1<1>UD 0x00000000UD { align1 WE_all 1Q compacted }; and(1) g1.2<1>UD g0.2<0,1,0>UD 0x0f000000UD { align1 WE_all }; send(16) null<1>F g1<8,8,1>UD gateway (barrier msg) mlen 1 rlen 0 { align1 WE_all 1H }; wait(1) n0<0,1,0>UD { align1 WE_all }; mul(16) g1<1>D g28<8,8,1>D 16D { align1 1H compacted }; mov(8) g3<1>UD 0D { align1 WE_all 1Q }; mov(16) g4<1>UD 0x00000000UD { align1 1H compacted }; send(16) g6<1>UD g1<8,8,1>UD dp data 1 ( untyped surface read, Surface = 1, SIMD16, Mask = 0xe) mlen 2 rlen 2 { align1 1H }; mov(1) g3.7<1>UD 65535D { align1 WE_all }; send(16) g11<1>UD g3<8,8,1>UD dp data 1 ( DC untyped atomic op, Surface = 254, SIMD16, add) mlen 5 rlen 2 { align1 1H }; add(16) g9<1>D g1<8,8,1>D 6144D { align1 1H }; mov(8) g8<1>UD 0D { align1 WE_all 1Q }; mov(1) g8.7<1>UD 65535D { align1 WE_all }; send(16) null<1>F g8<8,8,1>UD dp data 1 ( DC untyped surface write, Surface = 1, SIMD16, Mask = 0xe) mlen 5 rlen 0 { align1 1H }; mov(8) g1<1>UD 0x00000000UD { align1 WE_all 1Q compacted }; and(1) g1.2<1>UD g0.2<0,1,0>UD 0x0f000000UD { align1 WE_all }; send(16) null<1>F g1<8,8,1>UD gateway (barrier msg) mlen 1 rlen 0 { align1 WE_all 1H }; wait(1) n0<0,1,0>UD { align1 WE_all }; mov.nz.f0(16) null<1>D g30<8,8,1>D { align1 1H }; (+f0) if(16) JIP: 14 UIP: 14 { align1 1H }; END B2 ->B3 ->B4 START B3 <-B2 mul(16) g1<1>D g24<8,8,1>D 16D { align1 1H compacted }; mov(16) g3<1>UD 0x00000000UD { align1 1H compacted }; mov(8) g9<1>UD 0D { align1 WE_all 1Q }; add(16) g10<1>D g1<8,8,1>D 12288D { align1 1H }; send(16) g12<1>UD g3<8,8,1>UD dp data 1 ( untyped surface read, Surface = 254, SIMD16, Mask = 0xe) mlen 2 rlen 2 { align1 1H }; mov(1) g9.7<1>UD 65535D { align1 WE_all }; send(16) null<1>F g9<8,8,1>UD dp data 1 ( DC untyped surface write, Surface = 1, SIMD16, Mask = 0xe) mlen 5 rlen 0 { align1 1H }; END B3 ->B4 START B4 <-B2 <-B3 endif(16) JIP: 2 { align1 1H }; mov(8) g127<1>UD g0<8,8,1>UD { align1 WE_all 1Q compacted }; send(16) null<1>F g127<8,8,1>UD thread_spawner mlen 1 rlen 0 { align1 WE_all 1H EOT }; END B4 Compute shader compile time = 2.563000 ms Link time = 9.525000 ms Test case duration in microseconds = 15762 us Fail (Comparison failed) DONE! Test run totals: Passed: 0/1 (0.0%) Failed: 1/1 (100.0%) Not supported: 0/1 (0.0%) Warnings: 0/1 (0.0%)