diff -x 'depend*' -x '*o' -au mach64-vtx/mach64_context.h mach64-vtx-1/mach64_context.h --- mach64-vtx/mach64_context.h 2006-03-12 00:02:32.000000000 +0200 +++ mach64-vtx-1/mach64_context.h 2006-03-27 23:55:07.000000000 +0300 @@ -94,14 +94,45 @@ /* The size of this union is not of relevence: */ -union mach64_vertex_t { +#define TAG(x) mach64##x +typedef struct { + GLubyte blue, green, red, alpha; +} TAG(_color_t); + +typedef union { + struct { + GLfloat u1, v1, q1; + GLfloat u0, v0, q0; + TAG(_color_t) specular; + GLuint z; + TAG(_color_t) color; + GLshort y; + GLshort x; + } pv; /* unused */ + struct { + GLfloat u1, v1; /* tex1 */ + GLfloat w1; + GLfloat u0, v0; /* tex0 */ + GLfloat w; + TAG(_color_t) specular; /* notex */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; + } v; + struct { + GLfloat f[7]; /* pad to sizeof(struct v) */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; + } tv; GLfloat f[16]; GLuint ui[16]; GLushort us2[16][2]; GLubyte ub4[16][4]; -}; - -typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr; +} TAG(Vertex), *TAG(VertexPtr); +#undef TAG #else diff -x 'depend*' -x '*o' -au mach64-vtx/mach64_tris.c mach64-vtx-1/mach64_tris.c --- mach64-vtx/mach64_tris.c 2006-03-27 04:50:58.000000000 +0300 +++ mach64-vtx-1/mach64_tris.c 2006-03-28 01:24:35.000000000 +0300 @@ -70,7 +70,22 @@ * Emit primitives as inline vertices * ***********************************************************************/ +/* + * TODO: add comments in draw_triangle about vertex format (mach64 + * TODO: does not have VF_CNTL, VERTEX_FMT registers ...), and + * TODO: about vbsize, ooa, and other mystic calculations + * + * TODO: cull the xy variables, use symbolic names v->v.x, v->v.y + * + * TODO: pull the LE32_ defines here, drop LE32_IN, document that + * TODO: non-x86 is untested + * + * TODO: order the fields of mach64Vertex from xy to w to sec_w and + * TODO: submit them in reserse order (asm ? perf regression ?) + */ + #if defined(USE_X86_ASM) +/* does not need LE32_OUT() because x86 is LE */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize; \ @@ -89,6 +104,7 @@ : "0" (__s), "1" (vb), "2" (__p) ); \ } while (0) #else +/* mach64 requires that values are in LE format */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ CARD32 *__p = (CARD32 *)v + 10 - vertsize; \ @@ -96,9 +112,9 @@ if ( vertsize > 7 ) { \ LE32_OUT( vb++, (2 << 16) | \ ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ + LE32_OUT( vb++ , *__p++ ); \ + LE32_OUT( vb++ , *__p++ ); \ + LE32_OUT( vb++ , *__p++ ); \ __s -= 3; \ } \ LE32_OUT( vb++, ((__s - 1 + m) << 16) | \ @@ -142,15 +158,15 @@ mach64_print_vertex( ctx, v3 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v3->ui[xyoffset] ); + xy = v3->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -176,7 +192,7 @@ COPY_VERTEX_OOA( vb, vertsize, v3, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); @@ -445,15 +461,15 @@ mach64_print_vertex( ctx, v2 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -696,13 +712,13 @@ pxy0 = &v0->ui[xyoffset]; xy0old = *pxy0; - xy0 = LE32_IN( &xy0old ); + xy0 = xy0old; x0 = (GLshort)( xy0 >> 16 ); y0 = (GLshort)( xy0 & 0xffff ); pxy1 = &v1->ui[xyoffset]; xy1old = *pxy1; - xy1 = LE32_IN( &xy1old ); + xy1 = xy1old; x1 = (GLshort)( xy1 >> 16 ); y1 = (GLshort)( xy1 & 0xffff ); @@ -981,7 +997,7 @@ pxy = &v0->ui[xyoffset]; xyold = *pxy; - xy = LE32_IN( &xyold ); + xy = xyold; x = (GLshort)( xy >> 16 ); y = (GLshort)( xy & 0xffff ); @@ -1254,8 +1270,7 @@ #if MACH64_NATIVE_VTXFMT -/* #define DEPTH_SCALE 65536.0 */ -#define DEPTH_SCALE 1 +#define DEPTH_SCALE 1.0 #define UNFILLED_TRI unfilled_tri #define UNFILLED_QUAD unfilled_quad #define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0) @@ -1576,7 +1591,16 @@ tnl->Driver.Render.Line( ctx, ii, jj ); } -#if MACH64_NATIVE_VTXFMT +#if (0) && MACH64_NATIVE_VTXFMT +/* + * Optimized verions of mach64FastRenderClippedPoly(), it submits (n) vertices + * instead of (3 * n). Does not seem to improve performance over the simple one. + * + * FIXME: + * Further optimization is possible by consolidating the mach64AllocDmaLow() + * calls. However, doing so results in clipping whole polygons if they only + * have one vertex outside of the cliprect. + */ static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, GLuint n ) { @@ -1587,25 +1611,26 @@ GLuint xy; const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2); + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; CARD32 *vb, *vbchk; GLubyte *mach64verts = (GLubyte *)mmesa->verts; mach64VertexPtr v0, v1, v2; int i; + /* draw triangle with vertices elts[0], elts[1], elts[2] */ v0 = (mach64VertexPtr)VERT(elts[1]); v1 = (mach64VertexPtr)VERT(elts[2]); v2 = (mach64VertexPtr)VERT(elts[0]); - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -1631,42 +1656,39 @@ COPY_VERTEX_OOA( vb, vertsize, v2, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - i = 3; - while (1) { - if (i >= n) - break; - v0 = (mach64VertexPtr)VERT(elts[i]); - i++; + assert( vb == vbchk ); - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - - (yy[0] - yy[2]) * (xx[1] - xx[2]); - ooa = 16.0 / a; - - COPY_VERTEX_OOA( vb, vertsize, v0, 1 ); - LE32_OUT( vb++, *(CARD32 *)&ooa ); + /* draw triangles with vertices elts[0], elts[i-1], elts[i] */ + for (i = 3 ; i < n ; i++) { + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 1 + 1; + mach64VertexPtr vk; + int k; + + /* odd elements use v0, even elements use v1 */ + k = 1 - (i & 0x1); + + vk = VERT(elts[i]); + + xy = vk->ui[xyoffset]; + xx[k] = (GLshort)( xy >> 16 ); + yy[k] = (GLshort)( xy & 0xffff ); - if (i >= n) - break; - v1 = (mach64VertexPtr)VERT(elts[i]); - i++; - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); ooa = 16.0 / a; + + vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); + vbchk = vb + vbsiz; - COPY_VERTEX_OOA( vb, vertsize, v1, 2 ); + if (k == 0) + COPY_VERTEX_OOA( vb, vertsize, vk, 1 ); + else + COPY_VERTEX_OOA( vb, vertsize, vk, 2 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - } + + assert( vb == vbchk ); - assert( vb == vbchk ); + } } #else static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, diff -x 'depend*' -x '*o' -au mach64-vtx/mach64_vb.c mach64-vtx-1/mach64_vb.c --- mach64-vtx/mach64_vb.c 2006-03-26 22:53:19.000000000 +0300 +++ mach64-vtx-1/mach64_vb.c 2006-03-28 00:13:42.000000000 +0300 @@ -82,8 +82,9 @@ #define DO_TEX3 0 #define DO_PTEX (IND & MACH64_PTEX_BIT) -#define VERTEX mach64Vertex #define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); +#define VERTEX mach64Vertex +#define VERTEX_COLOR mach64_color_t #define GET_VIEWPORT_MAT() mmesa->hw_viewport #define GET_TEXSOURCE(n) mmesa->tmu_source[n] #define GET_VERTEX_FORMAT() mmesa->vertex_format @@ -123,9 +124,6 @@ #define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1) -#define IMPORT_FLOAT_COLORS mach64_import_float_colors -#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors - #define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp #define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv @@ -136,7 +134,7 @@ #if MACH64_NATIVE_VTXFMT #define TAG(x) mach64_##x -#include "mach64_native_vb.c" +#include "mach64_dd_vb.c" #else @@ -154,143 +152,143 @@ #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) #define TAG(x) x##_wg -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT) #define TAG(x) x##_wgs -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_wgt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_wgt0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT) #define TAG(x) x##_wgpt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_wgst0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ MACH64_TEX1_BIT) #define TAG(x) x##_wgst0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ MACH64_PTEX_BIT) #define TAG(x) x##_wgspt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT) #define TAG(x) x##_wgf -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) #define TAG(x) x##_wgfs -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_wgft0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ MACH64_TEX1_BIT) #define TAG(x) x##_wgft0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ MACH64_PTEX_BIT) #define TAG(x) x##_wgfpt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ MACH64_TEX0_BIT) #define TAG(x) x##_wgfst0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_wgfst0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ MACH64_TEX0_BIT|MACH64_PTEX_BIT) #define TAG(x) x##_wgfspt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_TEX0_BIT) #define TAG(x) x##_t0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_t0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_FOG_BIT) #define TAG(x) x##_f -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_ft0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_ft0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT) #define TAG(x) x##_g -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT) #define TAG(x) x##_gs -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_gt0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_gt0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_gst0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_gst0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT) #define TAG(x) x##_gf -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) #define TAG(x) x##_gfs -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_gft0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) #define TAG(x) x##_gft0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) #define TAG(x) x##_gfst0 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ MACH64_TEX1_BIT) #define TAG(x) x##_gfst0t1 -#include "mach64_native_vbtmp.h" +#include "mach64_dd_vbtmp.h" #else diff -x 'depend*' -x '*o' -au mach64-vtx/mach64_vb.h mach64-vtx-1/mach64_vb.h --- mach64-vtx/mach64_vb.h 2006-03-27 19:30:45.000000000 +0300 +++ mach64-vtx-1/mach64_vb.h 2006-03-25 22:07:17.000000000 +0200 @@ -36,9 +36,6 @@ #include "swrast/swrast.h" #include "mach64_context.h" -/* premultiply texture coordinates by homogenous coordinate */ -#define MACH64_PREMULT_TEXCOORDS - #define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR | \ _DD_NEW_TRI_LIGHT_TWOSIDE | \ _DD_NEW_TRI_UNFILLED | \