From 1741adbc43cd8f6beb8b8786b116859dcd14daff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Jun 2011 15:11:31 -0400 Subject: [PATCH] r600g: eg+ support for FS_COLOR0_WRITES_ALL_CBUFS Evergreen+ don't support multi-writes so we need to emulate it in the shader. Signed-off-by: Alex Deucher --- src/gallium/drivers/r600/evergreen_state.c | 5 ++- src/gallium/drivers/r600/r600_asm.c | 2 + src/gallium/drivers/r600/r600_shader.c | 75 +++++++++++++++++---------- 3 files changed, 53 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9ebfe54..d76c053 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1616,7 +1616,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; + if (rshader->fs_write_all) + num_cout = 8; + else + num_cout++; } } exports_ps |= S_02884C_EXPORT_COLORS(num_cout); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3196d97..1fd7b89 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1860,6 +1860,8 @@ void r600_bc_dump(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); fprintf(stderr, "GPR:%X ", cf->output.gpr); fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 39e6d85..e5feb96 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -605,7 +605,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh struct r600_bc_output output[32]; unsigned output_done, noutput; unsigned opcode; - int i, r = 0, pos0; + int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; @@ -730,50 +730,68 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } /* export output */ noutput = shader->noutput; + j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); - output[i].gpr = shader->output[i].gpr; - output[i].elem_size = 3; - output[i].swizzle_x = 0; - output[i].swizzle_y = 1; - output[i].swizzle_z = 2; - output[i].swizzle_w = 3; - output[i].burst_count = 1; - output[i].barrier = 1; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[i].array_base = i - pos0; - output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i + j].array_base = i - pos0; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 60; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 60; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { - output[i].array_base = 61; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 61; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = shader->output[i].sid; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = shader->output[i].sid; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + for (j = 1; j < 8; j++) { + memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].array_base = shader->output[i].sid + j; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } + j--; + } } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 61; - output[i].swizzle_x = 2; - output[i].swizzle_y = 7; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 2; + output[i + j].swizzle_y = 7; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[i].array_base = 61; - output[i].swizzle_x = 7; - output[i].swizzle_y = 1; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 7; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -786,6 +804,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } + noutput += j; /* add fake param output for vertex shader if no param is exported */ if (ctx.type == TGSI_PROCESSOR_VERTEX) { for (i = 0, pos0 = 0; i < noutput; i++) { -- 1.7.1.1