diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -164,9 +164,13 @@ static const struct dri_extension gl_20_extension[] = { {NULL, NULL} }; +/* Declared in r700_render.c */ +extern const struct tnl_pipeline_stage _r700_tcl_stage; + static const struct tnl_pipeline_stage *r600_pipeline[] = { /* Catch any t&l fallbacks */ + &_r700_tcl_stage, &_tnl_vertex_transform_stage, &_tnl_normal_transform_stage, &_tnl_lighting_stage, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -169,7 +169,7 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, void * pAos, - StreamDesc * pStreamDesc) + unsigned int nStreamID) { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; @@ -194,41 +194,28 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); - if(0 == pStreamDesc->stride) + if(0 == paos->stride) { - nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + nVBsize = paos->count * paos->components * 4; } else { - nVBsize = (paos->count - 1) * pStreamDesc->stride - + pStreamDesc->size * getTypeSize(pStreamDesc->type); + nVBsize = (paos->count - 1) * paos->stride * 4 + + paos->components * 4; } uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, paos->stride * 4, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, paos->components, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - - if(GL_TRUE == pStreamDesc->normalize) - { - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - } - else - { SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - } - - if(1 == pStreamDesc->_signed) - { SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); - } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, @@ -237,7 +224,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); @@ -281,7 +268,7 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { r700SetupVTXConstants(ctx, (void*)(&context->radeon.tcl.aos[j]), - &(context->stream_desc[j])); + i); j++; } } diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -318,6 +318,8 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, uint32_t vgt_index_type = 0; uint32_t vgt_primitive_type = 0; uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; type = r700PrimitiveType(prim); num_indices = r700NumVerts(end - start, prim); @@ -332,34 +334,13 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, SETfield(vgt_primitive_type, type, VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - if (num_indices > 0xffff) - { SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } - else - { - SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - } vgt_num_indices = num_indices; SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - if (start == 0) - { - SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } - else - { - if (num_indices > 0xffff) - { total_emit += num_indices; - } - else - { - total_emit += (num_indices + 1) / 2; - } SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - } total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ @@ -377,42 +358,16 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); // draw packet - if(start == 0) - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - } - else - { - if (num_indices > 0xffff) - { R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); for (i = start; i < (start + num_indices); i++) { - R600_OUT_BATCH(i); + if(vb->Elts) + R600_OUT_BATCH(vb->Elts[i]); + else + R600_OUT_BATCH(i); } - } - else - { - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); - for (i = start; i < (start + num_indices); i += 2) - { - if ((i + 1) == (start + num_indices)) - { - R600_OUT_BATCH(i); - } - else - { - R600_OUT_BATCH(((i + 1) << 16) | (i)); - } - } - } - } END_BATCH(); COMMIT_BATCH(); @@ -437,13 +392,9 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, else { for (i = 0; i < nr_prims; ++i) { - if (prim[i].start == 0) - dwords += 10; - else if (prim[i].count > 0xffff) dwords += prim[i].count + 10; - else - dwords += ((prim[i].count + 1) / 2) + 10; } + dwords += 42; } state_size = radeonCountStateEmitSize(&context->radeon); @@ -609,6 +560,30 @@ static void r700AlignDataToDword(GLcontext *ctx, attr->stride = dst_stride; } +void r700SetupStreams1(GLcontext *ctx) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int i, j = 0; + + R600_STATECHANGE(context, vtx); + + for(i=0; imesa_program->Base.InputsRead & (1 << i)) { + rcommon_emit_vector(ctx, + &context->radeon.tcl.aos[j], + vb->AttribPtr[i]->data, + vb->AttribPtr[i]->size, + vb->AttribPtr[i]->stride, + vb->Count); + j++; + } + } + context->radeon.tcl.aos_count = j; +} + static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) { context_t *context = R700_CONTEXT(ctx); @@ -728,11 +703,11 @@ static void r700FreeData(GLcontext *ctx) int i; - for (i = 0; i < context->nNumActiveAos; i++) + for (i = 0; i < context->radeon.tcl.aos_count; i++) { - if (!context->stream_desc[i].is_named_bo) + if (context->radeon.tcl.aos[i].bo) { - radeon_bo_unref(context->stream_desc[i].bo); + radeon_bo_unref(context->radeon.tcl.aos[i].bo); } context->radeon.tcl.aos[i].bo = NULL; } @@ -899,7 +874,6 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, return GL_FALSE; _tnl_UpdateFixedFunctionProgram(ctx); - r700SetVertexFormat(ctx, arrays, max_index + 1); /* shaders need to be updated before buffers are validated */ r700UpdateShaders(ctx); if (!r600ValidateBuffers(ctx)) @@ -921,7 +895,7 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, + context->radeon.cmdbuf.cs->cdw; r700SetupIndexBuffer(ctx, ib); - r700SetupStreams(ctx, arrays, max_index + 1); + r700SetupStreams1(ctx); radeonEmitState(radeon); @@ -989,14 +963,8 @@ static void r700DrawPrims(GLcontext *ctx, return; } - /* Make an attempt at drawing */ - retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - - /* If failed run tnl pipeline - it should take care of fallbacks */ - if (!retval) { _swsetup_Wakeup(ctx); _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - } } void r700InitDraw(GLcontext *ctx) @@ -1007,4 +975,25 @@ void r700InitDraw(GLcontext *ctx) vbo->draw_prims = r700DrawPrims; } +static GLboolean r700RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + GLboolean bRet = GL_FALSE; + bRet = r700TryDrawPrims(ctx, 0, vb->Primitive, vb->PrimitiveCount, 0, 0, 0); + if (bRet == GL_TRUE) + return GL_FALSE; + return GL_TRUE; + //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline + //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. +} +const struct tnl_pipeline_stage _r700_tcl_stage = { + "r700 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r700RunTCLRender +}; diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -213,9 +213,9 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); return; } @@ -305,6 +305,8 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; unsigned int i; vp = calloc(1, sizeof(*vp)); @@ -315,12 +317,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->mesa_program); } - for(i=0; inNumActiveAos; i++) + for(i=0; iaos_desc[i].size = context->stream_desc[i].size; - vp->aos_desc[i].stride = context->stream_desc[i].stride; - vp->aos_desc[i].type = context->stream_desc[i].type; - vp->aos_desc[i].format = context->stream_desc[i].format; + vp->aos_desc[i].size = vb->AttribPtr[i]->size; + vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vp->aos_desc[i].type = GL_FLOAT; } if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) @@ -380,6 +381,8 @@ void r700SelectVertexShader(GLcontext *ctx) context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; unsigned int i; GLboolean match; GLbitfield InputsRead; @@ -395,10 +398,9 @@ void r700SelectVertexShader(GLcontext *ctx) for (vp = vpc->progs; vp; vp = vp->next) { match = GL_TRUE; - for(i=0; inNumActiveAos; i++) + for(i=0; iaos_desc[i].size != context->stream_desc[i].size || - vp->aos_desc[i].format != context->stream_desc[i].format) + if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) { match = GL_FALSE; break;