diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index dbd2337..a668777 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -158,9 +158,13 @@ static const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; +/* Declared in r700_render.c */ +extern const struct tnl_pipeline_stage _r700_tcl_stage; + static const struct tnl_pipeline_stage *r600_pipeline[] = { /* Catch any t&l fallbacks */ + &_r700_tcl_stage, &_tnl_vertex_transform_stage, &_tnl_normal_transform_stage, &_tnl_lighting_stage, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 02c56b9..7b00990 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -147,8 +147,11 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, + unsigned int nStreamID, void * pAos, - StreamDesc * pStreamDesc) + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int count) /* number of vectors in stream */ { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; @@ -173,40 +176,19 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); - if(0 == pStreamDesc->stride) - { - nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + nVBsize = count * size * 4; - } - else - { - nVBsize = paos->count * pStreamDesc->stride; - } - uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - - if(GL_TRUE == pStreamDesc->normalize) - { - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - } - //else - //{ - // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, - // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - //} - - if(1 == pStreamDesc->_signed) - { SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); - } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, @@ -215,7 +197,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); @@ -258,8 +240,11 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) if(vp->mesa_program->Base.InputsRead & (1 << i)) { r700SetupVTXConstants(ctx, + i, (void*)(&context->radeon.tcl.aos[j]), - &(context->stream_desc[j])); + (unsigned int)context->radeon.tcl.aos[j].components, + (unsigned int)context->radeon.tcl.aos[j].stride * 4, + (unsigned int)context->radeon.tcl.aos[j].count); j++; } } diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 47f89c9..436013f 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -60,6 +60,7 @@ void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); static unsigned int r700PrimitiveType(int prim); +void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -321,6 +322,8 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, uint32_t vgt_index_type = 0; uint32_t vgt_primitive_type = 0; uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; type = r700PrimitiveType(prim); num_indices = r700NumVerts(end - start, prim); @@ -335,34 +338,13 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, SETfield(vgt_primitive_type, type, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - if (num_indices > 0xffff) - { SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } - else - { - SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if (start == 0) - { - SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } - else - { - if (num_indices > 0xffff) - { total_emit += num_indices; - } - else - { - total_emit += (num_indices + 1) / 2; - } SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ @@ -380,42 +362,20 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); // draw packet - if(start == 0) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - } - else - { - if (num_indices > 0xffff) - { R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); for (i = start; i < (start + num_indices); i++) { - R600_OUT_BATCH(i); + if(vb->Elts) + { + R600_OUT_BATCH(vb->Elts[i]); + } + else + { + R600_OUT_BATCH(i); + } } - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i += 2) - { - if ((i + 1) == (start + num_indices)) - { - R600_OUT_BATCH(i); - } - else - { - R600_OUT_BATCH(((i + 1) << 16) | (i)); - } - } - } - } END_BATCH(); COMMIT_BATCH(); @@ -437,17 +397,19 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, dwords = PRE_EMIT_STATE_BUFSZ; if (ib) dwords += nr_prims * 14; - else { + else if (prim) { for (i = 0; i < nr_prims; ++i) { - if (prim[i].start == 0) - dwords += 10; - else if (prim[i].count > 0xffff) dwords += prim[i].count + 10; - else - dwords += ((prim[i].count + 1) / 2) + 10; } } + else { + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + for (i = 0; i < vb->PrimitiveCount; i++) + dwords += vb->Primitive[i].count + 10; + } state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, @@ -463,6 +425,110 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, } +static GLboolean r700RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + unsigned int i, id = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + struct radeon_renderbuffer *rrb; + + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); + + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); + + r700SetScissor(context); + r700SetupVertexProgram(ctx); + r700SetupFragmentProgram(ctx); + r600UpdateTextureState(ctx); + + GLuint emit_end = r700PredictRenderSize(ctx, 0, 0, 0) + + context->radeon.cmdbuf.cs->cdw; + r700SetupStreams(ctx); + + radeonEmitState(radeon); + + radeon_debug_add_indent(); + /* richard test code */ + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + if (context->ind_buf.bo) { + r700RunRenderPrimitive(ctx, start, end, prim); + } else { + r700RunRenderPrimitiveImmediate(ctx, start, end, prim); + } + } + radeon_debug_remove_indent(); + + /* Flush render op cached for last several quads. */ + r700WaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + radeonReleaseArrays(ctx, ~0); + + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", + __func__, context->radeon.cmdbuf.cs->cdw); + + if ( emit_end < context->radeon.cmdbuf.cs->cdw ) + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + + return GL_FALSE; +} + +static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ + struct tnl_pipeline_stage *stage) +{ + GLboolean bRet = GL_FALSE; + + /* TODO : sw fallback */ + + /* Need shader bo's setup before bo check */ + r700UpdateShaders(ctx); + /** + + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ + if(!r600ValidateBuffers(ctx)) + { + return GL_TRUE; + } + + bRet = r700RunRender(ctx, stage); + + return bRet; + //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline + //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. +} + +const struct tnl_pipeline_stage _r700_tcl_stage = { + "r700 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r700RunTCLRender +}; + #define CONVERT( TYPE, MACRO ) do { \ GLuint i, j, sz; \ sz = input->Size; \ @@ -604,7 +670,32 @@ static void r700AlignDataToDword(GLcontext *ctx, attr->stride = dst_stride; } +void r700SetupStreams(GLcontext *ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int i, j = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + R600_STATECHANGE(context, vtx); + + for(i=0; imesa_program->Base.InputsRead & (1 << i)) { + rcommon_emit_vector(ctx, + &context->radeon.tcl.aos[j], + vb->AttribPtr[i]->data, + vb->AttribPtr[i]->size, + vb->AttribPtr[i]->stride, + vb->Count); + j++; + } + } + context->radeon.tcl.aos_count = j; +} + -static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) { context_t *context = R700_CONTEXT(ctx); GLuint stride; @@ -895,7 +986,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, + context->radeon.cmdbuf.cs->cdw; r700SetupIndexBuffer(ctx, ib); - r700SetupStreams(ctx, arrays, max_index + 1); + r700SetupStreams2(ctx, arrays, max_index + 1); radeonEmitState(radeon); @@ -975,7 +1066,9 @@ void r700InitDraw(GLcontext *ctx) struct vbo_context *vbo = vbo_context(ctx); /* to be enabled */ + /* vbo->draw_prims = r700DrawPrims; + */ } diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 16b05d5..f6bece7 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1732,10 +1732,24 @@ void r700InitState(GLcontext * ctx) //------------------- r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); r700DepthMask(ctx, ctx->Depth.Mask); r700DepthFunc(ctx, ctx->Depth.Func); + SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; + + r700->DB_RENDER_CONTROL.u32All = 0; SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); - r700SetDBRenderState(ctx); + r700->DB_RENDER_OVERRIDE.u32All = 0; + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) + { + CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit); + } r700->DB_ALPHA_TO_MASK.u32All = 0; SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index ffc6068..6375e6c 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -203,9 +203,9 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); return; } @@ -293,6 +293,9 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int unBit; unsigned int i; vp = _mesa_calloc(sizeof(*vp)); @@ -303,11 +306,15 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->mesa_program); } - for(i=0; inNumActiveAos; i++) + for(i=0; imesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + { - vp->aos_desc[i].size = context->stream_desc[i].size; - vp->aos_desc[i].stride = context->stream_desc[i].stride; - vp->aos_desc[i].type = context->stream_desc[i].type; + vp->aos_desc[i].size = vb->AttribPtr[i]->size; + vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vp->aos_desc[i].type = GL_FLOAT; + } } if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) @@ -351,6 +358,9 @@ void r700SelectVertexShader(GLcontext *ctx) context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int unBit; unsigned int i; GLboolean match; GLbitfield InputsRead; @@ -366,13 +376,17 @@ void r700SelectVertexShader(GLcontext *ctx) for (vp = vpc->progs; vp; vp = vp->next) { match = GL_TRUE; - for(i=0; inNumActiveAos; i++) + for(i=0; iaos_desc[i].size != context->stream_desc[i].size) + if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) { match = GL_FALSE; break; } + } } if (match) {