Common subdirectories: mach64/CVS and mach64-vtx/CVS diff -aup -x '.*' -x 'depend*' -x '*o' mach64/mach64_context.c mach64-vtx/mach64_context.c --- mach64/mach64_context.c 2005-10-19 05:46:42.000000000 +0300 +++ mach64-vtx/mach64_context.c 2006-04-12 18:41:59.000000000 +0300 @@ -221,7 +221,9 @@ GLboolean mach64CreateContext( const __G driInitExtensions( ctx, card_extensions, GL_TRUE ); +#if MACH64_NATIVE_VTXFMT mach64InitVB( ctx ); +#endif mach64InitTriFuncs( ctx ); mach64DDInitStateFuncs( ctx ); mach64DDInitSpanFuncs( ctx ); @@ -275,7 +277,9 @@ void mach64DestroyContext( __DRIcontextP _ac_DestroyContext( mmesa->glCtx ); _swrast_DestroyContext( mmesa->glCtx ); +#if MACH64_NATIVE_VTXFMT mach64FreeVB( mmesa->glCtx ); +#endif /* Free the vertex buffer */ if ( mmesa->vert_buf ) diff -aup -x '.*' -x 'depend*' -x '*o' mach64/mach64_context.h mach64-vtx/mach64_context.h --- mach64/mach64_context.h 2006-04-04 18:51:58.000000000 +0300 +++ mach64-vtx/mach64_context.h 2006-05-23 18:32:48.000000000 +0300 @@ -37,6 +37,7 @@ #include "mach64_drm.h" #include "mtypes.h" +#include "tnl/t_vertex.h" #include "mach64_reg.h" @@ -58,6 +59,9 @@ typedef struct mach64_context *mach64Con /* Native vertex format */ #define MACH64_NATIVE_VTXFMT 1 +/* premultiply texture coordinates by homogenous coordinate */ +#define MACH64_PREMULT_TEXCOORDS + /* Flags for what context state needs to be updated: */ #define MACH64_NEW_ALPHA 0x0001 @@ -83,7 +87,8 @@ typedef struct mach64_context *mach64Con #define MACH64_FALLBACK_SEP_SPECULAR 0x0040 #define MACH64_FALLBACK_BLEND_EQ 0x0080 #define MACH64_FALLBACK_BLEND_FUNC 0x0100 -#define MACH64_FALLBACK_DISABLE 0x0200 +#define MACH64_FALLBACK_PROJTEX 0x0200 +#define MACH64_FALLBACK_DISABLE 0x0400 #define CARD32 GLuint /* KW: For building in mesa tree */ @@ -203,6 +208,15 @@ struct mach64_context { GLuint dirty; /* Hardware state to be updated */ drm_mach64_context_regs_t setup; + GLuint vertex_size; + GLuint vertex_format; + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + GLubyte *verts; /* points to tnl->clipspace.vertex_buf */ + GLuint num_verts; + int coloroffset, specoffset; + DECLARE_RENDERINPUTS(tnl_state_bitset); /* tnl->render_inputs for this _tnl_install_attrs */ + GLuint NewGLState; GLuint Fallback; GLuint SetupIndex; @@ -210,11 +224,6 @@ struct mach64_context { GLuint RenderIndex; GLfloat hw_viewport[16]; GLfloat depth_scale; - GLuint vertex_size; - GLuint vertex_stride_shift; - GLuint vertex_format; - GLuint num_verts; - GLubyte *verts; CARD32 Color; /* Current draw color */ CARD32 ClearColor; /* Color used to clear color buffer */ diff -aup -x '.*' -x 'depend*' -x '*o' mach64/mach64_tris.c mach64-vtx/mach64_tris.c --- mach64/mach64_tris.c 2006-05-23 21:09:23.000000000 +0300 +++ mach64-vtx/mach64_tris.c 2006-05-23 18:24:20.000000000 +0300 @@ -70,6 +70,16 @@ static void mach64RenderPrimitive( GLcon * Emit primitives as inline vertices * ***********************************************************************/ +/* mach64 has two implementations of the vertex buffer, one that uses the + * template vertex format (from mesa/tnl) and one that uses the native vertex + * format (from mesa/tnl_dd). + * + * native template + * glxgears 228 228 + * teapot 13.6 12.7 + */ +#if MACH64_NATIVE_VTXFMT + #if defined(USE_X86_ASM) #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ @@ -112,6 +122,101 @@ do { \ #define COPY_VERTEX( vb, vertsize, v, n ) DO_COPY_VERTEX( vb, vertsize, v, n, 0 ) #define COPY_VERTEX_OOA( vb, vertsize, v, n ) DO_COPY_VERTEX( vb, vertsize, v, n, 1 ) +#else /* !MACH64_NATIVE_VTXFMT */ + +#define DO_SET_W( vn, n ) \ +do { \ + int __t = vertsize + 4; \ + if ( vertsize > 7 ) \ + vn->f[__t - 8] = vn->v.w; \ + \ + if ( vertsize > 4 ) \ + vn->f[__t - 5] = vn->v.w; \ +} while (0) + +#define DO_SET_XYZ( vn, n ) \ +do { \ + int __t = vertsize + 4; \ + vn->ui[__t - 3] = (GLint)(vn->v.z) << 15; \ + vn->ui[__t - 1] = (xx[n-1] << 16) | (yy[n-1] & 0xffff); \ +} while (0) + +#ifdef MACH64_PREMULT_TEXCOORDS +#define DO_COPY_VERTEX( vn, n, m ) \ +do { \ + CARD32 *__p = (CARD32 *)vn + 4; \ + int __s = vertsize; \ + GLfloat w = vn->v.w; \ + \ + DO_SET_XYZ( vn, n ); \ + \ + if ( vertsize > 7 ) { \ + LE32_OUT( vb++, (2 << 16) | \ + ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \ + LE32_OUT_FLOAT( vb++, w * *(GLfloat *)__p++ ); \ + LE32_OUT_FLOAT( vb++, w * *(GLfloat *)__p++ ); \ + LE32_OUT_FLOAT( vb++, w ); __p++; \ + __s -= 3; \ + } \ + \ + LE32_OUT( vb++, ((__s - 1 + m) << 16) | \ + (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) ); \ + \ + if ( vertsize > 4 ) { \ + LE32_OUT_FLOAT( vb++, w * *(GLfloat *)__p++ ); \ + LE32_OUT_FLOAT( vb++, w * *(GLfloat *)__p++ ); \ + LE32_OUT_FLOAT( vb++, w ); __p++; \ + __s -= 3; \ + } \ + \ + if ( vertsize > 3 ) \ + *vb++ = *__p++; \ + \ + LE32_OUT( vb++, *__p++ ); \ + *vb++ = *__p++; \ + LE32_OUT( vb++, *__p++ ); \ +} while (0) +#else /* !MACH64_PREMULT_TEXCOORDS */ +#define DO_COPY_VERTEX( vn, n, m ) \ +do { \ + CARD32 *__p = (CARD32 *)vn + 4; \ + int __s = vertsize; \ + \ + DO_SET_XYZ( vn, n ); \ + DO_SET_W( vn, n ); \ + \ + if ( vertsize > 7 ) { \ + LE32_OUT( vb++, (2 << 16) | \ + ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \ + LE32_OUT( vb++, *__p++ ); \ + LE32_OUT( vb++, *__p++ ); \ + LE32_OUT( vb++, *__p++ ); \ + __s -= 3; \ + } \ + \ + LE32_OUT( vb++, ((__s - 1 + m) << 16) | \ + (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) ); \ + \ + if ( vertsize > 4 ) { \ + LE32_OUT( vb++, *__p++ ); \ + LE32_OUT( vb++, *__p++ ); \ + LE32_OUT( vb++, *__p++ ); \ + __s -= 3; \ + } \ + \ + if ( vertsize > 3 ) \ + *vb++ = *__p++; \ + \ + LE32_OUT( vb++, *__p++ ); \ + *vb++ = *__p++; \ + LE32_OUT( vb++, *__p++ ); \ +} while (0) +#endif /* MACH64_PREMULT_TEXCOORDS */ + +#define COPY_VERTEX( v, n ) DO_COPY_VERTEX( v, n, 0 ) +#define COPY_VERTEX_OOA( v, n ) DO_COPY_VERTEX( v, n, 1 ) + +#endif /* MACH64_NATIVE_VTXFMT */ static __inline void mach64_draw_quad( mach64ContextPtr mmesa, mach64VertexPtr v0, @@ -200,20 +305,11 @@ static __inline void mach64_draw_quad( m } #endif #else - GLuint vertsize = mmesa->vertex_size; - GLint coloridx; + GLuint vertsize = mmesa->vertex_size - 4; GLfloat ooa; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = - (( - 1 + - (vertsize > 6 ? 2 : 0) + - (vertsize > 4 ? 2 : 0) + - 3 + - (mmesa->multitex ? 4 : 0) - ) * 4 + 4); - CARD32 *vb; - unsigned vbidx = 0; + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; + CARD32 *vb, *vbchk; if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) { fprintf(stderr, "%s:\n", __FUNCTION__); @@ -259,109 +355,14 @@ static __inline void mach64_draw_quad( m } vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 ); + vbchk = vb + vbsiz; ooa = 1.0 / ooa; - coloridx = (vertsize > 4) ? 4: 3; - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */ - } - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */ - LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */ - LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */ - vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */ - } - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v3->ui[6] ); /* MACH64_VERTEX_3_S */ - LE32_OUT( &vb[vbidx++], v3->ui[7] ); /* MACH64_VERTEX_3_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_W */ - LE32_OUT( &vb[vbidx++], v3->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v3->v.z) << 15) ); /* MACH64_VERTEX_3_Z */ - vb[vbidx++] = v3->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v3->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v3->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */ - } - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX( v0, 1 ); + COPY_VERTEX( v1, 2 ); + COPY_VERTEX_OOA( v3, 3 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); xx[0] = (GLint)(v2->v.x * 4); yy[0] = (GLint)(v2->v.y * 4); @@ -370,42 +371,10 @@ static __inline void mach64_draw_quad( m (yy[0] - yy[2]) * (xx[1] - xx[2])); ooa = 1.0 / ooa; - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v2->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */ - } - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX_OOA( v2, 1 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); - assert(vbsiz == vbidx); + assert( vb == vbchk ); #if MACH64_PRINT_BUFFER { @@ -491,20 +460,11 @@ static __inline void mach64_draw_triangl } #endif #else - GLuint vertsize = mmesa->vertex_size; - GLint coloridx; + GLuint vertsize = mmesa->vertex_size - 4; GLfloat ooa; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = - (( - 1 + - (vertsize > 6 ? 2 : 0) + - (vertsize > 4 ? 2 : 0) + - 3 + - (mmesa->multitex ? 4 : 0) - ) * 3 + 2); - CARD32 *vb; - unsigned vbidx = 0; + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; + CARD32 *vb, *vbchk; if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) { fprintf(stderr, "%s:\n", __FUNCTION__); @@ -548,111 +508,16 @@ static __inline void mach64_draw_triangl } vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 ); + vbchk = vb + vbsiz; ooa = 1.0 / ooa; - coloridx = (vertsize > 4) ? 4: 3; - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */ - } - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */ - LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */ - LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */ - vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */ - } - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_3_S */ - LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_3_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_W */ - LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) ); /* MACH64_VERTEX_3_Z */ - vb[vbidx++] = v2->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */ - - if (mmesa->multitex) { - /* setup for 3 sequential reg writes */ - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) ); - LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */ - LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */ - LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */ - } - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX( v0, 1 ); + COPY_VERTEX( v1, 2 ); + COPY_VERTEX_OOA( v2, 3 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); - assert(vbsiz == vbidx); + assert( vb == vbchk ); #if MACH64_PRINT_BUFFER { @@ -744,21 +609,12 @@ static __inline void mach64_draw_line( m *pxy0 = xy0old; *pxy1 = xy1old; #else /* !MACH64_NATIVE_VTXFMT */ - GLuint vertsize = mmesa->vertex_size; - GLint coloridx; + GLuint vertsize = mmesa->vertex_size - 4; float width = 1.0; /* Only support 1 pix lines now */ GLfloat ooa; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = - (( - 1 + - (vertsize > 6 ? 2 : 0) + - (vertsize > 4 ? 2 : 0) + - 3 + - (mmesa->multitex ? 4 : 0) - ) * 4 + 4); - CARD32 *vb; - unsigned vbidx = 0; + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; + CARD32 *vb, *vbchk; GLfloat hw, dx, dy, ix, iy; GLfloat x0 = v0->v.x; @@ -837,85 +693,14 @@ static __inline void mach64_draw_line( m } vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 ); + vbchk = vb + vbsiz; ooa = 1.0 / ooa; - coloridx = (vertsize > 4) ? 4: 3; - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */ - LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */ - LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_2_Z */ - vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */ - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_3_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */ - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX( v0, 1 ); + COPY_VERTEX( v1, 2 ); + COPY_VERTEX_OOA( v0, 3 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); xx[0] = (GLint)((x1 + ix) * 4); yy[0] = (GLint)((y1 + iy) * 4); @@ -924,34 +709,10 @@ static __inline void mach64_draw_line( m (yy[0] - yy[2]) * (xx[1] - xx[2])); ooa = 1.0 / ooa; - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v1->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX_OOA( v1, 1 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); - assert(vbsiz == vbidx); + assert( vb == vbchk ); #endif } @@ -1006,21 +767,12 @@ static __inline void mach64_draw_point( *pxy = xyold; #else /* !MACH64_NATIVE_VTXFMT */ - GLuint vertsize = mmesa->vertex_size; - GLint coloridx; + GLuint vertsize = mmesa->vertex_size - 4; float sz = 1.0; /* Only support 1 pix points now */ GLfloat ooa; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = - (( - 1 + - (vertsize > 6 ? 2 : 0) + - (vertsize > 4 ? 2 : 0) + - 3 + - (mmesa->multitex ? 4 : 0) - ) * 4 + 4); - CARD32 *vb; - unsigned vbidx = 0; + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; + CARD32 *vb, *vbchk; if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) { fprintf(stderr, "%s:\n", __FUNCTION__); @@ -1060,85 +812,14 @@ static __inline void mach64_draw_point( } vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 ); + vbchk = vb + vbsiz; ooa = 1.0 / ooa; - coloridx = (vertsize > 4) ? 4: 3; - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_2_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_2_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_2_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_2_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_2_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */ - - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_3_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_3_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */ - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX( v0, 1 ); + COPY_VERTEX( v0, 2 ); + COPY_VERTEX_OOA( v0, 3 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); xx[0] = (GLint)((v0->v.x + sz) * 4); yy[0] = (GLint)((v0->v.y + sz) * 4); @@ -1147,34 +828,10 @@ static __inline void mach64_draw_point( (yy[0] - yy[2]) * (xx[1] - xx[2])); ooa = 1.0 / ooa; - /* setup for 3,5, or 7 sequential reg writes based on vertex format */ - switch (vertsize) { - case 6: - LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) ); - break; - case 4: - LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) ); - break; - default: /* vertsize >= 8 */ - LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) ); - break; - } - if (vertsize > 6) { - LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */ - LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */ - } - if (vertsize > 4) { - LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */ - LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */ - } - LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) ); /* MACH64_VERTEX_1_Z */ - vb[vbidx++] = v0->ui[coloridx]; /* MACH64_VERTEX_1_ARGB */ - LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */ - - LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) ); - LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa ); + COPY_VERTEX_OOA( v0, 1 ); + LE32_OUT( vb++, *(GLuint *)&ooa ); - assert(vbsiz == vbidx); + assert( vb == vbchk ); #endif } @@ -1336,17 +993,19 @@ do { \ #define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] -#define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[5], c ) -#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5]) -#define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[5] -#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx] +#define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[specoffset], c ) +#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx] #define LOCAL_VARS(n) \ mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \ GLuint color[n], spec[n]; \ - GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4); \ - GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1); \ - (void) color; (void) spec; (void) coloroffset; (void) havespec; + GLuint coloroffset = mmesa->coloroffset; \ + GLuint specoffset = mmesa->specoffset; \ + GLboolean havespec = (mmesa->specoffset != 0); \ + (void) color; (void) spec; (void) specoffset; \ + (void) coloroffset; (void) havespec; #endif @@ -1472,9 +1131,15 @@ mach64_fallback_tri( mach64ContextPtr mm { GLcontext *ctx = mmesa->glCtx; SWvertex v[3]; +#if MACH64_NATIVE_VTXFMT mach64_translate_vertex( ctx, v0, &v[0] ); mach64_translate_vertex( ctx, v1, &v[1] ); mach64_translate_vertex( ctx, v2, &v[2] ); +#else + _swsetup_Translate( ctx, v0, &v[0] ); + _swsetup_Translate( ctx, v1, &v[1] ); + _swsetup_Translate( ctx, v2, &v[2] ); +#endif _swrast_Triangle( ctx, &v[0], &v[1], &v[2] ); } @@ -1486,8 +1151,13 @@ mach64_fallback_line( mach64ContextPtr m { GLcontext *ctx = mmesa->glCtx; SWvertex v[2]; +#if MACH64_NATIVE_VTXFMT mach64_translate_vertex( ctx, v0, &v[0] ); mach64_translate_vertex( ctx, v1, &v[1] ); +#else + _swsetup_Translate( ctx, v0, &v[0] ); + _swsetup_Translate( ctx, v1, &v[1] ); +#endif _swrast_Line( ctx, &v[0], &v[1] ); } @@ -1498,7 +1168,11 @@ mach64_fallback_point( mach64ContextPtr { GLcontext *ctx = mmesa->glCtx; SWvertex v[1]; +#if MACH64_NATIVE_VTXFMT mach64_translate_vertex( ctx, v0, &v[0] ); +#else + _swsetup_Translate( ctx, v0, &v[0] ); +#endif _swrast_Point( ctx, &v[0] ); } @@ -1577,6 +1251,15 @@ static void mach64RenderClippedLine( GLc } #if MACH64_NATIVE_VTXFMT +/* + * Optimized version of mach64FastRenderClippedPoly(), it submits (n) vertices + * instead of (3 * n). Does not seem to improve performance over the simple one. + * + * FIXME: + * Further optimization is possible by consolidating the mach64AllocDmaLow() + * calls. However, doing so produces irrelevant colors/textures for the + * triangles of the polygon at the periphery of the window. + */ static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, GLuint n ) { @@ -1587,7 +1270,7 @@ static void mach64FastRenderClippedPoly( GLuint xy; const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2); + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; CARD32 *vb, *vbchk; GLubyte *mach64verts = (GLubyte *)mmesa->verts; mach64VertexPtr v0, v1, v2; @@ -1631,42 +1314,38 @@ static void mach64FastRenderClippedPoly( COPY_VERTEX_OOA( vb, vertsize, v2, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - i = 3; - while (1) { - if (i >= n) - break; - v0 = (mach64VertexPtr)VERT(elts[i]); - i++; + assert( vb == vbchk ); - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - - (yy[0] - yy[2]) * (xx[1] - xx[2]); - ooa = 16.0 / a; - - COPY_VERTEX_OOA( vb, vertsize, v0, 1 ); - LE32_OUT( vb++, *(CARD32 *)&ooa ); + /* draw triangles with vertices elts[0], elts[i-1], elts[i] */ + for (i = 3 ; i < n ; i++) { + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 1 + 1; + mach64VertexPtr vk; + int k; + + /* odd elements use v0, even elements use v1 */ + k = 1 - (i & 0x1); + + vk = (mach64VertexPtr)VERT(elts[i]); + + xy = LE32_IN( &vk->ui[xyoffset] ); + xx[k] = (GLshort)( xy >> 16 ); + yy[k] = (GLshort)( xy & 0xffff ); - if (i >= n) - break; - v1 = (mach64VertexPtr)VERT(elts[i]); - i++; - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); ooa = 16.0 / a; + + vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); + vbchk = vb + vbsiz; - COPY_VERTEX_OOA( vb, vertsize, v1, 2 ); + if (k == 0) + COPY_VERTEX_OOA( vb, vertsize, vk, 1 ); + else + COPY_VERTEX_OOA( vb, vertsize, vk, 2 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); + + assert( vb == vbchk ); } - - assert( vb == vbchk ); } #else static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, @@ -1740,13 +1419,13 @@ static void mach64ChooseRenderState(GLco TNLcontext *tnl = TNL_CONTEXT(ctx); tnl->Driver.Render.Points = rast_tab[index].points; tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; tnl->Driver.Render.Triangle = rast_tab[index].triangle; tnl->Driver.Render.Quad = rast_tab[index].quad; if (index == 0) { tnl->Driver.Render.PrimTabVerts = mach64_render_tab_verts; tnl->Driver.Render.PrimTabElts = mach64_render_tab_elts; - tnl->Driver.Render.ClippedLine = rast_tab[index].line; tnl->Driver.Render.ClippedPolygon = mach64FastRenderClippedPoly; } else { tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; @@ -1771,8 +1450,10 @@ static void mach64RunPipeline( GLcontext mach64DDUpdateHWState( ctx ); if (!mmesa->Fallback && mmesa->NewGLState) { +#if MACH64_NATIVE_VTXFMT if (mmesa->NewGLState & _MACH64_NEW_VERTEX_STATE) mach64ChooseVertexState( ctx ); +#endif if (mmesa->NewGLState & _MACH64_NEW_RENDER_STATE) mach64ChooseRenderState( ctx ); @@ -1819,6 +1500,7 @@ static void mach64RenderPrimitive( GLcon } +#if MACH64_NATIVE_VTXFMT static void mach64RenderStart( GLcontext *ctx ) { /* Check for projective texturing. Make sure all texcoord @@ -1827,6 +1509,159 @@ static void mach64RenderStart( GLcontext mach64CheckTexSizes( ctx ); } +#else + +#define EMIT_ATTR( ATTR, STYLE, SIZE ) \ +do { \ + mmesa->vertex_attrs[mmesa->vertex_attr_count].attrib = (ATTR); \ + mmesa->vertex_attrs[mmesa->vertex_attr_count].format = (STYLE); \ + mmesa->vertex_attr_count++; \ + offset += (SIZE); \ +} while (0) + +#define EMIT_PAD( SIZE ) \ +do { \ + mmesa->vertex_attrs[mmesa->vertex_attr_count].attrib = 0; \ + mmesa->vertex_attrs[mmesa->vertex_attr_count].format = EMIT_PAD; \ + mmesa->vertex_attrs[mmesa->vertex_attr_count].offset = (SIZE); \ + mmesa->vertex_attr_count++; \ + offset += (SIZE); \ +} while (0) + +#define TINY_VERTEX_FORMAT 1 +#define NOTEX_VERTEX_FORMAT 2 +#define TEX0_VERTEX_FORMAT 3 +#define TEX1_VERTEX_FORMAT 4 + +static void mach64RenderStart( GLcontext *ctx ) +{ + mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + GLuint vc_frmt = 0; + GLuint vc_size = 0; + GLboolean fallback_projtex = GL_FALSE; + GLuint offset = 0; + const GLuint t1 = mmesa->tmu_source[1]; + const GLuint t0 = mmesa->tmu_source[0]; + + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + + /* Important: */ + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + mmesa->vertex_attr_count = 0; + mmesa->specoffset = 0; + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(t1) )) { + vc_frmt = TEX1_VERTEX_FORMAT; + vc_size = 10; + } else if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(t0) )) { + vc_frmt = TEX0_VERTEX_FORMAT; + vc_size = 7; + } else if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || + RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { + vc_frmt = NOTEX_VERTEX_FORMAT; + vc_size = 4; + } else { + vc_frmt = TINY_VERTEX_FORMAT; + vc_size = 3; + } + vc_size += 4; + + /* The TNL module requires that the first attribute is _TNL_ATTRIB_POS, + * at the same time, the mach64 native vertex format has the XYZW coords + * at non-consecutive positions spread inside the vertex. + * + * We reserve space for the XYZW coords with EMIT_PAD's and when the vertex + * is copied to the vertex buffer for submission to the card, we use the + * _TNL_ATTRIB_POS values to fill in the pads reserved for XYZW and discard + * the first 4 bytes of the vertex. + */ + if ( vc_frmt >= TEX0_VERTEX_FORMAT ) + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, 16 ); + else { + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 12 ); + EMIT_PAD( 4 ); + } + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if ( vc_frmt >= TEX1_VERTEX_FORMAT ) { + if ( VB->TexCoordPtr[t1]->size > 2 ) + fallback_projtex = GL_TRUE; + EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, 8 ); /* VERTEX_?_SEC_S, VERTEX_?_SEC_T */ + EMIT_PAD( 4 ); /* VERTEX_?_SEC_W */ + } + if ( vc_frmt >= TEX0_VERTEX_FORMAT ) { + if ( VB->TexCoordPtr[t0]->size > 2 ) + fallback_projtex = GL_TRUE; + EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, 8 ); /* VERTEX_?_S, VERTEX_?_T */ + EMIT_PAD( 4 ); /* VERTEX_?_W */ + } + + if ( vc_frmt >= NOTEX_VERTEX_FORMAT) { + mmesa->specoffset = offset >> 2; +#if MESA_LITTLE_ENDIAN + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, 3 ); + } else + EMIT_PAD( 3 ); + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, 1 ); + else + EMIT_PAD( 1 ); +#else + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, 1 ); + else + EMIT_PAD( 1 ); + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, 3 ); + } else + EMIT_PAD( 3 ); +#endif + } + + EMIT_PAD( 4 ); /* VERTEX_?_Z */ + + mmesa->coloroffset = offset >> 2; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, 4 ); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ARGB, 4 ); +#endif + + EMIT_PAD( 4 ); /* VERTEX_?_X_Y */ + + /* projective textures are not supported by the hardware */ + FALLBACK( mmesa, MACH64_FALLBACK_PROJTEX, fallback_projtex ); + + /* Only need to change the vertex emit code if there has been a + * statechange to a TNL index. + */ + if (!RENDERINPUTS_EQUAL( index_bitset, mmesa->tnl_state_bitset )) { + int vertex_size; + FLUSH_BATCH( mmesa ); + mmesa->dirty |= MACH64_UPLOAD_CONTEXT; + + vertex_size = + _tnl_install_attrs( ctx, + mmesa->vertex_attrs, + mmesa->vertex_attr_count, + mmesa->hw_viewport, 0 ); + vertex_size >>= 2; + assert(vertex_size == vc_size); + + mmesa->vertex_format = vc_frmt; + mmesa->vertex_size = vc_size; + } +} +#endif /* MACH64_NATIVE_VTXFMT */ + static void mach64RenderFinish( GLcontext *ctx ) { if (MACH64_CONTEXT(ctx)->RenderIndex & MACH64_FALLBACK_BIT) @@ -1848,6 +1683,7 @@ static const char * const fallbackString "GL_SEPARATE_SPECULAR_COLOR", "glBlendEquation (mode != ADD)", "glBlendFunc", + "Projective texture", "Rasterization disable", }; @@ -1887,9 +1723,25 @@ void mach64Fallback( GLcontext *ctx, GLu tnl->Driver.Render.Start = mach64RenderStart; tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive; tnl->Driver.Render.Finish = mach64RenderFinish; + +#if MACH64_NATIVE_VTXFMT tnl->Driver.Render.BuildVertices = mach64BuildVertices; mmesa->NewGLState |= (_MACH64_NEW_RENDER_STATE| _MACH64_NEW_VERTEX_STATE); +#else + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + _tnl_install_attrs( ctx, + mmesa->vertex_attrs, + mmesa->vertex_attr_count, + mmesa->hw_viewport, 0 ); + + mmesa->NewGLState |= _MACH64_NEW_RENDER_STATE; +#endif if (MACH64_DEBUG & DEBUG_VERBOSE_FALLBACK) { fprintf(stderr, "Mach64 end rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); @@ -1904,6 +1756,7 @@ void mach64Fallback( GLcontext *ctx, GLu void mach64InitTriFuncs( GLcontext *ctx ) { + mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); static int firsttime = 1; @@ -1917,5 +1770,18 @@ void mach64InitTriFuncs( GLcontext *ctx tnl->Driver.Render.Finish = mach64RenderFinish; tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive; tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple; +#if MACH64_NATIVE_VTXFMT tnl->Driver.Render.BuildVertices = mach64BuildVertices; +#else + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + (6 + 2 * ctx->Const.MaxTextureUnits) * sizeof(GLfloat) ); + mmesa->verts = (char *)tnl->clipspace.vertex_buf; + RENDERINPUTS_ONES( mmesa->tnl_state_bitset ); + + mmesa->NewGLState |= _MACH64_NEW_RENDER_STATE; +#endif } diff -aup -x '.*' -x 'depend*' -x '*o' mach64/mach64_vb.c mach64-vtx/mach64_vb.c --- mach64/mach64_vb.c 2004-07-01 16:14:05.000000000 +0300 +++ mach64-vtx/mach64_vb.c 2006-04-05 19:35:48.000000000 +0300 @@ -45,6 +45,7 @@ #include "mach64_state.h" +#if MACH64_NATIVE_VTXFMT #define MACH64_TEX1_BIT 0x1 #define MACH64_TEX0_BIT 0x2 #define MACH64_RGBA_BIT 0x4 @@ -107,20 +108,10 @@ static struct { (GLfloat)mmesa->drawY + SUBPIXEL_Y); \ const GLfloat sz = 1.0 / mmesa->depth_scale -#if MACH64_NATIVE_VTXFMT - #define UNVIEWPORT_X(x) ((GLfloat)(x) / 4.0) + dx #define UNVIEWPORT_Y(y) - ((GLfloat)(y) / 4.0) + dy #define UNVIEWPORT_Z(z) (GLfloat)((z) >> 15) * sz -#else - -#define UNVIEWPORT_X(x) x + dx; -#define UNVIEWPORT_Y(y) - y + dy; -#define UNVIEWPORT_Z(z) z * sz; - -#endif - #define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1) #define IMPORT_FLOAT_COLORS mach64_import_float_colors @@ -133,25 +124,14 @@ static struct { * Generate pv-copying and translation functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - #define TAG(x) mach64_##x #include "mach64_native_vb.c" -#else - -#define TAG(x) mach64_##x -#include "tnl_dd/t_dd_vb.c" - -#endif - /*********************************************************************** * Generate vertex emit and interp functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) #define TAG(x) x##_wg #include "mach64_native_vbtmp.h" @@ -292,150 +272,6 @@ static struct { #define TAG(x) x##_gfst0t1 #include "mach64_native_vbtmp.h" -#else - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) -#define TAG(x) x##_wg -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgs -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgt0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgpt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgst0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgst0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgspt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_wgf -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgfs -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgft0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgft0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgfpt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT) -#define TAG(x) x##_wgfst0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgfst0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgfspt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_TEX0_BIT) -#define TAG(x) x##_t0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_t0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_FOG_BIT) -#define TAG(x) x##_f -#include "mach64_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_ft0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_ft0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT) -#define TAG(x) x##_g -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gs -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gt0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gt0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gst0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gst0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_gf -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gfs -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gft0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gft0t1 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gfst0 -#include "mach64_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_gfst0t1 -#include "mach64_vbtmp.h" - -#endif - static void init_setup_tab( void ) { init_wg(); @@ -640,3 +476,4 @@ void mach64FreeVB( GLcontext *ctx ) mmesa->verts = 0; } } +#endif diff -aup -x '.*' -x 'depend*' -x '*o' mach64/mach64_vb.h mach64-vtx/mach64_vb.h --- mach64/mach64_vb.h 2006-04-07 00:29:37.000000000 +0300 +++ mach64-vtx/mach64_vb.h 2006-04-05 01:13:58.000000000 +0300 @@ -36,9 +36,6 @@ #include "swrast/swrast.h" #include "mach64_context.h" -/* premultiply texture coordinates by homogenous coordinate */ -#define MACH64_PREMULT_TEXCOORDS - #define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR | \ _DD_NEW_TRI_LIGHT_TWOSIDE | \ _DD_NEW_TRI_UNFILLED | \ Common subdirectories: mach64/server and mach64-vtx/server