diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 2af42e0..566ef7a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2088,7 +2088,9 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous, * maximum number of fragment shader uniform components (64). If * there are too many of these, they'd fill up all of register space. * So, this will push some of them out to the pull constant buffer and - * update the program to load them. + * update the program to load them. We also use pull constants for all + * indirect constant loads because we don't support indirect accesses in + * registers yet. */ void fs_visitor::assign_constant_locations() @@ -2102,12 +2104,8 @@ fs_visitor::assign_constant_locations() bool is_live_64bit[uniforms]; memset(is_live_64bit, 0, sizeof(is_live_64bit)); - /* For each uniform slot, a value of true indicates that the given slot and - * the next slot must remain contiguous. This is used to keep us from - * splitting arrays apart. - */ - bool contiguous[uniforms]; - memset(contiguous, 0, sizeof(contiguous)); + bool needs_pull[uniforms]; + memset(needs_pull, 0, sizeof(needs_pull)); int thread_local_id_index = (stage == MESA_SHADER_COMPUTE) ? @@ -2117,7 +2115,8 @@ fs_visitor::assign_constant_locations() * * 1) Figure out which uniforms are live. * - * 2) Mark any indirectly used ranges of registers as contiguous. + * 2) Find all indirect access of uniform arrays and flag them as needing + * to go into the pull constant buffer. * * Note that we don't move constant-indexed accesses to arrays. No * testing has been done of the performance impact of this choice. @@ -2130,29 +2129,14 @@ fs_visitor::assign_constant_locations() int constant_nr = inst->src[i].nr + inst->src[i].reg_offset; if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) { - assert(inst->src[2].ud % 4 == 0); - unsigned last = constant_nr + (inst->src[2].ud / 4) - 1; - assert(last < uniforms); - - for (unsigned j = constant_nr; j < last; j++) { - is_live[j] = true; - contiguous[j] = true; - if (type_sz(inst->src[i].type) == 8) { - is_live_64bit[j] = true; - } + for (unsigned j = 0; j < inst->src[2].ud / 4; j++) { + is_live[constant_nr + j] = true; + needs_pull[constant_nr + j] = true; } - is_live[last] = true; } else { - if (constant_nr >= 0 && constant_nr < (int) uniforms) { - int regs_read = inst->components_read(i) * - type_sz(inst->src[i].type) / 4; - for (int j = 0; j < regs_read; j++) { - is_live[constant_nr + j] = true; - if (type_sz(inst->src[i].type) == 8) { - is_live_64bit[constant_nr + j] = true; - } - } - } + /* Mark the the one accessed uniform as live */ + if (constant_nr >= 0 && constant_nr < (int) uniforms) + is_live[constant_nr] = true; } } } @@ -2184,39 +2168,20 @@ fs_visitor::assign_constant_locations() push_constant_loc = ralloc_array(mem_ctx, int, uniforms); pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); - /* Default to -1 meaning no location */ - memset(push_constant_loc, -1, uniforms * sizeof(*push_constant_loc)); - memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc)); - - int chunk_start = -1; - - /* First push 64-bit uniforms to ensure they are properly aligned */ - for (unsigned u = 0; u < uniforms; u++) { - if (!is_live[u] || !is_live_64bit[u]) - continue; - - set_push_pull_constant_loc(u, &chunk_start, contiguous[u], - push_constant_loc, pull_constant_loc, - &num_push_constants, &num_pull_constants, - max_push_components, max_chunk_size, - stage_prog_data); - - } - - /* Then push the rest of uniforms */ - for (unsigned u = 0; u < uniforms; u++) { - if (!is_live[u] || is_live_64bit[u]) - continue; + for (unsigned int i = 0; i < uniforms; i++) { + push_constant_loc[i] = -1; + pull_constant_loc[i] = -1; - /* Skip thread_local_id_index to put it in the last push register. */ - if (thread_local_id_index == (int)u) + if (!is_live[i]) continue; - set_push_pull_constant_loc(u, &chunk_start, contiguous[u], - push_constant_loc, pull_constant_loc, - &num_push_constants, &num_pull_constants, - max_push_components, max_chunk_size, - stage_prog_data); + if (!needs_pull[i] && num_push_constants < max_push_components) { + /* Retain as a push constant */ + push_constant_loc[i] = num_push_constants++; + } else { + /* We have to pull it */ + pull_constant_loc[i] = num_pull_constants++; + } } /* Add the CS local thread ID uniform at the end of the push constants */ @@ -2323,9 +2288,7 @@ fs_visitor::lower_constant_loads() continue; /* Out of bounds access */ int pull_index = pull_constant_loc[location]; - - if (pull_index == -1) - continue; + assert(pull_index >= 0); /* This had better be pull */ VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst, brw_imm_ud(index),