From c60be7444f2f84ea2a3676ae2722788bb2702dbe Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Tue, 30 May 2017 12:46:19 +0200 Subject: [PATCH 1/1] r600g: work around shaders allocating too many superflous temporaries Related bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99349 https://bugs.freedesktop.org/show_bug.cgi?id=50338 1. Allocate ctx.temp_reg and a limited number of registers (R600_TEMP_REG_RESERVED=10) that are given out via r600_get_temp() before the temporaries of the TGSI are allocated. That makes it possible for tgsi_split_constants() allocate registers inside the proper GPR range, so that r600_asm.c:check_and_set_bank_swizzle doesn't fail. 2. Move the test for the register use limit (124) to after the optimization in r600_pipe_shader_create(). Add a test for a hard limit of 191 in tr600_shader_from_tgsi() though to avoid interference with reserved values. --- src/gallium/drivers/r600/r600_shader.c | 52 +++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index bdaf28ced2..d550f4cd7f 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -83,6 +83,13 @@ The compiler must issue the source argument to slots z, y, and x face_gpr.w = SampleID */ #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) + +/* Number of GPRs reserved before the temporaries in order to work around + problems with shaders that request too many temporaries that can be + optimized away in the sb pass. +*/ +#define R600_TEMP_REG_RESERVED 10 + static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, union r600_shader_key key); @@ -216,6 +223,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } + if (shader->shader.bc.ngpr > 124) { + r = -ENOMEM; + R600_ERR("Shader GPR limit exceeded - shader requires %d registers.\n", + shader->shader.bc.ngpr); + goto error; + } + if (shader->gs_copy_shader) { if (dump) { // dump copy shader @@ -322,6 +336,7 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned temp_reg_highmem; const struct r600_shader_tgsi_instruction *inst_info; struct r600_bytecode *bc; struct r600_shader *shader; @@ -814,7 +829,11 @@ static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) static int r600_get_temp(struct r600_shader_ctx *ctx) { - return ctx->temp_reg + ctx->max_driver_temp_used++; + if (ctx->max_driver_temp_used < R600_TEMP_REG_RESERVED) + return ctx->temp_reg + ctx->max_driver_temp_used++; + else + return ctx->temp_reg_highmem + ctx->max_driver_temp_used++ - + R600_TEMP_REG_RESERVED; } static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) @@ -2213,6 +2232,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx, r600_bytecode_add_vtx(ctx.bc, &vtx); } ctx.temp_reg = i + 1; + ctx.temp_reg_highmem = ctx.temp_reg + R600_TEMP_REG_RESERVED; + for (ring = 3; ring >= 0; --ring) { bool enabled = false; for (i = 0; i < so->num_outputs; i++) { @@ -3065,8 +3086,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_max[TGSI_FILE_INPUT] + 1; - ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + - ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; + + ctx.temp_reg = ctx.file_offset[TGSI_FILE_OUTPUT] + + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; + + ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.temp_reg + R600_TEMP_REG_RESERVED; /* Outside the GPR range. This will be translated to one of the * kcache banks later. */ @@ -3081,19 +3105,19 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, if (ctx.type == PIPE_SHADER_TESS_CTRL) { ctx.tess_input_info = ctx.bc->ar_reg + 3; ctx.tess_output_info = ctx.bc->ar_reg + 4; - ctx.temp_reg = ctx.bc->ar_reg + 5; + ctx.temp_reg_highmem = ctx.bc->ar_reg + 5; } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { ctx.tess_input_info = 0; ctx.tess_output_info = ctx.bc->ar_reg + 3; - ctx.temp_reg = ctx.bc->ar_reg + 4; + ctx.temp_reg_highmem = ctx.bc->ar_reg + 4; } else if (ctx.type == PIPE_SHADER_GEOMETRY) { ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; - ctx.temp_reg = ctx.bc->ar_reg + 7; + ctx.temp_reg_highmem = ctx.bc->ar_reg + 7; } else { - ctx.temp_reg = ctx.bc->ar_reg + 3; + ctx.temp_reg_highmem = ctx.bc->ar_reg + 3; } shader->max_arrays = 0; @@ -3656,9 +3680,17 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* check GPR limit - we have 124 = 128 - 4 - * (4 are reserved as alu clause temporary registers) */ - if (ctx.bc->ngpr > 124) { - R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); + * (4 are reserved as alu clause temporary registers) + * Use this as a soft limit since the sb optimiation pass + * might reduce this number. */ + if (ctx.bc->ngpr > 124) + fprintf(stderr, "Warning: GPR limit exceeded prior to optimization" + " - shader requires %d registers\n", ctx.bc->ngpr); + + /* Set a hard limit for register usage */ + if (ctx.bc->ngpr > 191) { + R600_ERR("GPR limit exceeded - shader requires %d registers\n", + ctx.bc->ngpr); r = -ENOMEM; goto out_err; } -- 2.13.0