diff -au -x 'depend*' -x '*o' mach64-vtx/mach64_context.h mach64-vtx-2/mach64_context.h --- mach64-vtx/mach64_context.h 2006-03-12 00:02:32.000000000 +0200 +++ mach64-vtx-2/mach64_context.h 2006-03-30 03:06:04.000000000 +0300 @@ -94,14 +94,49 @@ /* The size of this union is not of relevence: */ -union mach64_vertex_t { +#define TAG(x) mach64##x +typedef struct { + GLubyte blue, green, red, alpha; +} TAG(_color_t); + +typedef union { + struct { + GLfloat u3, v3, q3; + GLfloat u2, v2, q2; + GLfloat u1, v1, q1; + GLfloat u0, v0, q0; + TAG(_color_t) specular; + GLuint z; + TAG(_color_t) color; + GLshort y; + GLshort x; + } pv; /* unused */ + struct { + GLfloat u3, v3, q3; /* unused */ + GLfloat u2, v2, q2; /* unused */ + GLfloat u1, v1; /* tex1 */ + GLfloat w1; + GLfloat u0, v0; /* tex0 */ + GLfloat w; + TAG(_color_t) specular; /* notex */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; + } v; + struct { + GLfloat f[13]; /* pad to sizeof(struct v) */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; + } tv; GLfloat f[16]; GLuint ui[16]; GLushort us2[16][2]; GLubyte ub4[16][4]; -}; - -typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr; +} TAG(Vertex), *TAG(VertexPtr); +#undef TAG #else @@ -330,18 +365,10 @@ * Byte ordering */ #if MESA_LITTLE_ENDIAN == 1 -#define LE32_IN( x ) ( *(GLuint *)(x) ) -#define LE32_IN_FLOAT( x ) ( *(GLfloat *)(x) ) #define LE32_OUT( x, y ) do { *(GLuint *)(x) = (y); } while (0) #define LE32_OUT_FLOAT( x, y ) do { *(GLfloat *)(x) = (y); } while (0) #else #include -#define LE32_IN( x ) bswap_32( *(GLuint *)(x) ) -#define LE32_IN_FLOAT( x ) \ -({ \ - GLuint __tmp = bswap_32( *(GLuint *)(x) ); \ - *(GLfloat *)&__tmp; \ -}) #define LE32_OUT( x, y ) do { *(GLuint *)(x) = bswap_32( y ); } while (0) #define LE32_OUT_FLOAT( x, y ) \ do { \ diff -au -x 'depend*' -x '*o' mach64-vtx/mach64_tris.c mach64-vtx-2/mach64_tris.c --- mach64-vtx/mach64_tris.c 2006-03-27 04:50:58.000000000 +0300 +++ mach64-vtx-2/mach64_tris.c 2006-03-28 20:49:27.000000000 +0300 @@ -70,10 +70,22 @@ * Emit primitives as inline vertices * ***********************************************************************/ +/* + * TODO: LE32 with non-x86 is untested + * + * TODO: add comments in draw_triangle about vertex format (mach64 + * TODO: does not have VF_CNTL, VERTEX_FMT registers ...), and + * TODO: about vbsize, ooa, and other mystic calculations + * + * TODO: order the fields of mach64Vertex from xy to w to sec_w and + * TODO: submit them in reserse order (asm ? perf regression ?) + */ + #if defined(USE_X86_ASM) +/* does not need LE32_OUT() because x86 is LE */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ - register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize; \ + register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 16 - vertsize; \ register int __s __asm__( "ecx" ) = vertsize; \ if ( vertsize > 7 ) { \ *vb++ = (2 << 16) | ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ); \ @@ -89,22 +101,31 @@ : "0" (__s), "1" (vb), "2" (__p) ); \ } while (0) #else +/* mach64 requires that values are in LE format */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ - CARD32 *__p = (CARD32 *)v + 10 - vertsize; \ + CARD32 *__p = (CARD32 *)v + 16 - vertsize; \ int __s = vertsize; \ if ( vertsize > 7 ) { \ LE32_OUT( vb++, (2 << 16) | \ ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ + __s -= 3; \ + } \ + if ( vertsize > 4 ) { \ + LE32_OUT( vb++, (2 << 16) | \ + ADRINDEX( MACH64_VERTEX_##n##_S ) ); \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ + LE32_OUT_FLOAT( vb++ , *__p++ ); \ __s -= 3; \ } \ LE32_OUT( vb++, ((__s - 1 + m) << 16) | \ (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) ); \ while ( __s-- ) { \ - *vb++ = *__p++; \ + LE32_OUT( vb++, *__p++ ); \ } \ } while (0) #endif @@ -124,8 +145,6 @@ const GLuint vertsize = mmesa->vertex_size; GLint a; GLfloat ooa; - GLuint xy; - const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; CARD32 *vb, *vbchk; @@ -142,17 +161,14 @@ mach64_print_vertex( ctx, v3 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v3->ui[xyoffset] ); - xx[2] = (GLshort)( xy >> 16 ); - yy[2] = (GLshort)( xy & 0xffff ); + xx[0] = v0->v.x; + yy[0] = v0->v.y; + + xx[1] = v1->v.x; + yy[1] = v1->v.y; + + xx[2] = v3->v.x; + yy[2] = v3->v.y; a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); @@ -176,9 +192,8 @@ COPY_VERTEX_OOA( vb, vertsize, v3, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - xy = LE32_IN( &v2->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); + xx[0] = v2->v.x; + yy[0] = v2->v.y; a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); @@ -429,8 +444,6 @@ GLuint vertsize = mmesa->vertex_size; GLint a; GLfloat ooa; - GLuint xy; - const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; CARD32 *vb, *vbchk; @@ -445,17 +458,14 @@ mach64_print_vertex( ctx, v2 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v2->ui[xyoffset] ); - xx[2] = (GLshort)( xy >> 16 ); - yy[2] = (GLshort)( xy & 0xffff ); + xx[0] = v0->v.x; + yy[0] = v0->v.y; + + xx[1] = v1->v.x; + yy[1] = v1->v.y; + + xx[2] = v2->v.x; + yy[2] = v2->v.y; a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); @@ -675,9 +685,7 @@ const GLuint vertsize = mmesa->vertex_size; GLint width = (GLint)(mmesa->glCtx->Line._Width * 2.0); /* 2 fractional bits for hardware */ GLfloat ooa; - GLuint *pxy0, *pxy1; - GLuint xy0old, xy0, xy1old, xy1; - const GLuint xyoffset = 9; + GLshort x0old, y0old, x1old, y1old; GLint x0, y0, x1, y1; GLint dx, dy, ix, iy; unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; @@ -694,17 +702,11 @@ if( !width ) width = 1; /* round to the nearest supported width */ - pxy0 = &v0->ui[xyoffset]; - xy0old = *pxy0; - xy0 = LE32_IN( &xy0old ); - x0 = (GLshort)( xy0 >> 16 ); - y0 = (GLshort)( xy0 & 0xffff ); - - pxy1 = &v1->ui[xyoffset]; - xy1old = *pxy1; - xy1 = LE32_IN( &xy1old ); - x1 = (GLshort)( xy1 >> 16 ); - y1 = (GLshort)( xy1 & 0xffff ); + x0 = x0old = v0->v.x; + y0 = y0old = v0->v.y; + + x1 = x1old = v1->v.x; + y1 = y1old = v1->v.y; if ( (dx = x1 - x0) < 0 ) { dx = -dx; @@ -727,22 +729,31 @@ vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); vbchk = vb + vbsiz; - LE32_OUT( pxy0, (( x0 - ix ) << 16) | (( y0 - iy ) & 0xffff) ); + v0->v.x = x0 - ix; + v0->v.y = y0 - iy; COPY_VERTEX( vb, vertsize, v0, 1 ); - LE32_OUT( pxy1, (( x1 - ix ) << 16) | (( y1 - iy ) & 0xffff) ); + + v1->v.x = x1 - ix; + v1->v.y = y1 - iy; COPY_VERTEX( vb, vertsize, v1, 2 ); - LE32_OUT( pxy0, (( x0 + ix ) << 16) | (( y0 + iy ) & 0xffff) ); + + v0->v.x = x0 + ix; + v0->v.y = y0 + iy; COPY_VERTEX_OOA( vb, vertsize, v0, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); ooa = -ooa; - LE32_OUT( pxy1, (( x1 + ix ) << 16) | (( y1 + iy ) & 0xffff) ); + v1->v.x = x1 + ix; + v1->v.y = y1 + iy; COPY_VERTEX_OOA( vb, vertsize, v1, 1 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - *pxy0 = xy0old; - *pxy1 = xy1old; + v0->v.x = x0old; + v0->v.y = y0old; + + v1->v.x = x1old; + v1->v.y = y1old; #else /* !MACH64_NATIVE_VTXFMT */ GLuint vertsize = mmesa->vertex_size; GLint coloridx; @@ -963,9 +974,7 @@ const GLuint vertsize = mmesa->vertex_size; GLint sz = (GLint)(mmesa->glCtx->Point._Size * 2.0); /* 2 fractional bits for hardware */ GLfloat ooa; - GLuint *pxy; - GLuint xyold, xy; - const GLuint xyoffset = 9; + GLshort xold, yold; GLint x, y; unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2; CARD32 *vb, *vbchk; @@ -979,32 +988,36 @@ if( !sz ) sz = 1; /* round to the nearest supported size */ - pxy = &v0->ui[xyoffset]; - xyold = *pxy; - xy = LE32_IN( &xyold ); - x = (GLshort)( xy >> 16 ); - y = (GLshort)( xy & 0xffff ); + x = xold = v0->v.x; + y = yold = v0->v.y; ooa = 4.0 / (sz * sz); vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); vbchk = vb + vbsiz; - LE32_OUT( pxy, (( x - sz ) << 16) | (( y - sz ) & 0xffff) ); + v0->v.x = x - sz; + v0->v.y = y - sz; COPY_VERTEX( vb, vertsize, v0, 1 ); - LE32_OUT( pxy, (( x + sz ) << 16) | (( y - sz ) & 0xffff) ); + + v0->v.x = x + sz; + v0->v.y = y - sz; COPY_VERTEX( vb, vertsize, v0, 2 ); - LE32_OUT( pxy, (( x - sz ) << 16) | (( y + sz ) & 0xffff) ); + + v0->v.x = x - sz; + v0->v.y = y + sz; COPY_VERTEX_OOA( vb, vertsize, v0, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); ooa = -ooa; - LE32_OUT( pxy, (( x + sz ) << 16) | (( y + sz ) & 0xffff) ); + v0->v.x = x + sz; + v0->v.y = y + sz; COPY_VERTEX_OOA( vb, vertsize, v0, 1 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - *pxy = xyold; + v0->v.x = xold; + v0->v.y = yold; #else /* !MACH64_NATIVE_VTXFMT */ GLuint vertsize = mmesa->vertex_size; GLint coloridx; @@ -1254,57 +1267,18 @@ #if MACH64_NATIVE_VTXFMT -/* #define DEPTH_SCALE 65536.0 */ -#define DEPTH_SCALE 1 +#define DEPTH_SCALE 1.0 #define UNFILLED_TRI unfilled_tri #define UNFILLED_QUAD unfilled_quad -#define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0) -#define VERT_Y(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) >> 16) / 4.0) -#define VERT_Z(_v) ((GLfloat) LE32_IN( &(_v)->ui[zoffset] )) +#define VERT_X(_v) ((GLfloat)_v->v.x / 4.0) +#define VERT_Y(_v) ((GLfloat)_v->v.y / 4.0) +#define VERT_Z(_v) ((GLfloat)_v->v.z) #define INSANE_VERTICES -#define VERT_SET_Z(_v,val) LE32_OUT( &(_v)->ui[zoffset], (GLuint)(val) ) -#define VERT_Z_ADD(_v,val) LE32_OUT( &(_v)->ui[zoffset], LE32_IN( &(_v)->ui[zoffset] ) + (GLuint)(val) ) +#define VERT_SET_Z(_v,val) do { _v->v.z = (GLuint)(val); } while (0) +#define VERT_Z_ADD(_v,val) do { _v->v.z += (GLuint)(val); } while (0) #define AREA_IS_CCW( a ) ((a) < 0) #define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int))) -#define MACH64_COLOR( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]); \ -} while (0) - -#define MACH64_SPEC( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]); \ -} while (0) - -#define VERT_SET_RGBA( v, c ) MACH64_COLOR( v->ub4[coloroffset], c ) -#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] -#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] -#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] - -#define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[specoffset], c ) -#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V( v0->ub4[specoffset], v1->ub4[specoffset] ) -#define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[specoffset] -#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx] - -#define LOCAL_VARS(n) \ - mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \ - GLuint color[n], spec[n]; \ - GLuint vertex_size = mmesa->vertex_size; \ - const GLuint xyoffset = 9; \ - const GLuint coloroffset = 8; \ - const GLuint zoffset = 7; \ - const GLuint specoffset = 6; \ - GLboolean havespec = vertex_size >= 4 ? 1 : 0; \ - (void) color; (void) spec; (void) vertex_size; \ - (void) xyoffset; (void) coloroffset; (void) zoffset; \ - (void) specoffset; (void) havespec; - #else #define DEPTH_SCALE 1.0 @@ -1316,6 +1290,8 @@ #define AREA_IS_CCW( a ) (a > 0) #define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int))) +#endif + #define MACH64_COLOR( dst, src ) \ do { \ UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]); \ @@ -1336,17 +1312,32 @@ #define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] -#define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[5], c ) -#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5]) -#define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[5] -#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx] +#define VERT_SET_SPEC( v, c ) if (havespec) MACH64_SPEC( v->ub4[specoffset], c ) +#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_SPEC( idx ) if (havespec) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx] + +#if MACH64_NATIVE_VTXFMT + +#define LOCAL_VARS(n) \ + mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + const GLuint coloroffset = 14; \ + const GLuint specoffset = 12; \ + GLboolean havespec = mmesa->vertex_size >= 4 ? 1 : 0; \ + (void) color; (void) spec; \ + (void) coloroffset; (void) specoffset; (void) havespec; + +#else #define LOCAL_VARS(n) \ mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); \ GLuint color[n], spec[n]; \ GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4); \ + const GLuint specoffset = 5; \ GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1); \ - (void) color; (void) spec; (void) coloroffset; (void) havespec; + (void) color; (void) spec; \ + (void) coloroffset; (void) specoffset; (void) havespec; #endif @@ -1576,7 +1567,16 @@ tnl->Driver.Render.Line( ctx, ii, jj ); } -#if MACH64_NATIVE_VTXFMT +#if (0) && MACH64_NATIVE_VTXFMT +/* + * Optimized verions of mach64FastRenderClippedPoly(), it submits (n) vertices + * instead of (3 * n). Does not seem to improve performance over the simple one. + * + * FIXME: + * Further optimization is possible by consolidating the mach64AllocDmaLow() + * calls. However, doing so results in clipping whole polygons if they only + * have one vertex outside of the cliprect. + */ static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, GLuint n ) { @@ -1584,30 +1584,26 @@ const GLuint vertsize = mmesa->vertex_size; GLint a; GLfloat ooa; - GLuint xy; - const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2); + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; CARD32 *vb, *vbchk; GLubyte *mach64verts = (GLubyte *)mmesa->verts; mach64VertexPtr v0, v1, v2; int i; + /* draw triangle with vertices elts[0], elts[1], elts[2] */ v0 = (mach64VertexPtr)VERT(elts[1]); v1 = (mach64VertexPtr)VERT(elts[2]); v2 = (mach64VertexPtr)VERT(elts[0]); - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - - xy = LE32_IN( &v2->ui[xyoffset] ); - xx[2] = (GLshort)( xy >> 16 ); - yy[2] = (GLshort)( xy & 0xffff ); + xx[0] = v0->v.x; + yy[0] = v0->v.y; + + xx[1] = v1->v.x; + yy[1] = v1->v.y; + + xx[2] = v2->v.x; + yy[2] = v2->v.y; a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); @@ -1631,42 +1627,38 @@ COPY_VERTEX_OOA( vb, vertsize, v2, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - i = 3; - while (1) { - if (i >= n) - break; - v0 = (mach64VertexPtr)VERT(elts[i]); - i++; + assert( vb == vbchk ); - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - - (yy[0] - yy[2]) * (xx[1] - xx[2]); - ooa = 16.0 / a; + /* draw triangles with vertices elts[0], elts[i-1], elts[i] */ + for (i = 3 ; i < n ; i++) { + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 1 + 1; + mach64VertexPtr vk; + int k; - COPY_VERTEX_OOA( vb, vertsize, v0, 1 ); - LE32_OUT( vb++, *(CARD32 *)&ooa ); + /* odd elements use v0, even elements use v1 */ + k = 1 - (i & 0x1); + + vk = VERT(elts[i]); + + xx[k] = vk->v.x; + yy[k] = vk->v.y; - if (i >= n) - break; - v1 = (mach64VertexPtr)VERT(elts[i]); - i++; - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); ooa = 16.0 / a; + + vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); + vbchk = vb + vbsiz; - COPY_VERTEX_OOA( vb, vertsize, v1, 2 ); + if (k == 0) + COPY_VERTEX_OOA( vb, vertsize, vk, 1 ); + else + COPY_VERTEX_OOA( vb, vertsize, vk, 2 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - } + + assert( vb == vbchk ); - assert( vb == vbchk ); + } } #else static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, diff -au -x 'depend*' -x '*o' mach64-vtx/mach64_vb.c mach64-vtx-2/mach64_vb.c --- mach64-vtx/mach64_vb.c 2006-03-26 22:53:19.000000000 +0300 +++ mach64-vtx-2/mach64_vb.c 2006-03-28 18:35:56.000000000 +0300 @@ -82,8 +82,9 @@ #define DO_TEX3 0 #define DO_PTEX (IND & MACH64_PTEX_BIT) -#define VERTEX mach64Vertex #define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); +#define VERTEX mach64Vertex +#define VERTEX_COLOR mach64_color_t #define GET_VIEWPORT_MAT() mmesa->hw_viewport #define GET_TEXSOURCE(n) mmesa->tmu_source[n] #define GET_VERTEX_FORMAT() mmesa->vertex_format @@ -123,9 +124,6 @@ #define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1) -#define IMPORT_FLOAT_COLORS mach64_import_float_colors -#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors - #define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp #define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv @@ -133,167 +131,14 @@ * Generate pv-copying and translation functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - -#define TAG(x) mach64_##x -#include "mach64_native_vb.c" - -#else - #define TAG(x) mach64_##x #include "tnl_dd/t_dd_vb.c" -#endif - /*********************************************************************** * Generate vertex emit and interp functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) -#define TAG(x) x##_wg -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgt0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgpt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgspt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_wgf -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgfs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgfpt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT) -#define TAG(x) x##_wgfst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgfst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgfspt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_TEX0_BIT) -#define TAG(x) x##_t0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_t0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT) -#define TAG(x) x##_f -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_ft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_ft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT) -#define TAG(x) x##_g -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gt0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_gf -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gfs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gfst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_gfst0t1 -#include "mach64_native_vbtmp.h" - -#else - #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) #define TAG(x) x##_wg #include "mach64_vbtmp.h" @@ -434,8 +279,6 @@ #define TAG(x) x##_gfst0t1 #include "mach64_vbtmp.h" -#endif - static void init_setup_tab( void ) { init_wg(); diff -au -x 'depend*' -x '*o' mach64-vtx/mach64_vb.h mach64-vtx-2/mach64_vb.h --- mach64-vtx/mach64_vb.h 2006-03-27 19:30:45.000000000 +0300 +++ mach64-vtx-2/mach64_vb.h 2006-03-28 18:35:56.000000000 +0300 @@ -36,9 +36,6 @@ #include "swrast/swrast.h" #include "mach64_context.h" -/* premultiply texture coordinates by homogenous coordinate */ -#define MACH64_PREMULT_TEXCOORDS - #define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR | \ _DD_NEW_TRI_LIGHT_TWOSIDE | \ _DD_NEW_TRI_UNFILLED | \ diff -au -x 'depend*' -x '*o' mach64-vtx/mach64_vbtmp.h mach64-vtx-2/mach64_vbtmp.h --- mach64-vtx/mach64_vbtmp.h 2004-06-09 08:04:46.000000000 +0300 +++ mach64-vtx-2/mach64_vbtmp.h 2006-03-30 03:04:34.000000000 +0300 @@ -1,9 +1,9 @@ -/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */ + /* * Mesa 3-D graphics library - * Version: 3.5 + * Version: 5.0.1 * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2003 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,11 +23,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: - * Keith Whitwell - * - * Modified for mach64 by: - * Leif Delgass - * José Fonseca + * Keith Whitwell */ @@ -64,7 +60,10 @@ * unsigned char ub4[4][16]; * } * - + + * VERTEX: hw vertex type as above + * VERTEX_COLOR: hw color struct type in VERTEX + * * DO_XYZW: Emit xyz and maybe w coordinates. * DO_RGBA: Emit color. * DO_SPEC: Emit specular color. @@ -99,10 +98,16 @@ #define VIEWPORT_Y(dst,y) dst = y #define VIEWPORT_Z(dst,z) dst = z #else +#if MACH64_NATIVE_VTXFMT +#define VIEWPORT_X(dst,x) dst = ((GLshort)((s[0] * x + s[12]) * 4.0)) +#define VIEWPORT_Y(dst,y) dst = ((GLshort)((s[5] * y + s[13]) * 4.0)) +#define VIEWPORT_Z(dst,z) dst = (((GLuint) (s[10] * z + s[14])) << 15) +#else #define VIEWPORT_X(dst,x) dst = s[0] * x + s[12] #define VIEWPORT_Y(dst,y) dst = s[5] * y + s[13] #define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14] #endif +#endif #if (HAVE_HW_DIVIDE && !HAVE_PTEX_VERTICES) #error "can't cope with this combination" @@ -124,15 +129,13 @@ GLuint stride ) { LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLfloat (*tc0)[4], (*tc1)[4], (*fog)[4]; GLfloat (*tc2)[4], (*tc3)[4]; - GLfloat (*spec)[4]; - GLfloat (*col)[4]; - GLuint col_stride; - GLuint tc0_stride, tc1_stride, spec_stride, fog_stride; + GLfloat (*col)[4], (*spec)[4]; + GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride; GLuint tc2_stride, tc3_stride; - GLuint tc0_size, tc1_size; + GLuint tc0_size, tc1_size, col_size; GLuint tc2_size, tc3_size; GLfloat (*coord)[4]; GLuint coord_stride; @@ -187,23 +190,27 @@ } if (DO_RGBA) { - col = VB->ColorPtr[0]->data; col_stride = VB->ColorPtr[0]->stride; + col = VB->ColorPtr[0]->data; + col_size = VB->ColorPtr[0]->size; } if (DO_SPEC) { - spec = VB->SecondaryColorPtr[0]->data; - spec_stride = VB->SecondaryColorPtr[0]->stride; - } else { - spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; - spec_stride = 0; + if (VB->SecondaryColorPtr[0]) { + spec_stride = VB->SecondaryColorPtr[0]->stride; + spec = VB->SecondaryColorPtr[0]->data; + } else { + spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; + spec_stride = 0; + } } if (DO_FOG) { if (VB->FogCoordPtr) { fog = VB->FogCoordPtr->data; fog_stride = VB->FogCoordPtr->stride; - } else { + } + else { static GLfloat tmp[4] = {0, 0, 0, 0}; fog = &tmp; fog_stride = 0; @@ -213,76 +220,66 @@ /* May have nonstandard strides: */ if (start) { - coord = (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride); + STRIDE_4F(coord, start * coord_stride); if (DO_TEX0) - tc0 = (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride); + STRIDE_4F(tc0, start * tc0_stride); if (DO_TEX1) - tc1 = (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride); + STRIDE_4F(tc1, start * tc1_stride); if (DO_TEX2) - tc2 = (GLfloat (*)[4])((GLubyte *)tc2 + start * tc2_stride); + STRIDE_4F(tc2, start * tc2_stride); if (DO_TEX3) - tc3 = (GLfloat (*)[4])((GLubyte *)tc3 + start * tc3_stride); + STRIDE_4F(tc3, start * tc3_stride); if (DO_RGBA) STRIDE_4F(col, start * col_stride); if (DO_SPEC) STRIDE_4F(spec, start * spec_stride); if (DO_FOG) STRIDE_4F(fog, start * fog_stride); - // fog = (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride); - /* STRIDE_F(fog, start * fog_stride); */ } - + for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) { + if (DO_TEX0 || DO_TEX1) { + if (mask[i] == 0) { + v->v.w = coord[0][3]; + } + else { + v->v.w = 1.0; + } + } if (DO_XYZW) { if (HAVE_HW_VIEWPORT || mask[i] == 0) { - /* unclipped */ VIEWPORT_X(v->v.x, coord[0][0]); VIEWPORT_Y(v->v.y, coord[0][1]); VIEWPORT_Z(v->v.z, coord[0][2]); - v->v.w = coord[0][3]; - } else { - /* clipped */ - v->v.w = 1.0; - } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %f\n", - __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w); } - coord = (GLfloat (*)[4])((GLubyte *)coord + coord_stride); + STRIDE_4F(coord, coord_stride); } if (DO_RGBA) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)&v->v.color = *(GLuint *)&col[0]; - STRIDE_4F(col, col_stride); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.blue, col[0][2]); + if (col_size == 4) { + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.alpha, col[0][3]); } else { - v->v.color.blue = col[0][2]; - v->v.color.green = col[0][1]; - v->v.color.red = col[0][0]; - v->v.color.alpha = col[0][3]; - STRIDE_4F(col, col_stride); + v->v.color.alpha = CHAN_MAX; } + STRIDE_4F(col, col_stride); } if (DO_SPEC) { - v->v.specular.red = spec[0][0]; - v->v.specular.green = spec[0][1]; - v->v.specular.blue = spec[0][2]; + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.red, spec[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.green, spec[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.blue, spec[0][2]); STRIDE_4F(spec, spec_stride); } if (DO_FOG) { - v->v.specular.alpha = fog[0][0] * 255.0; - /* STRIDE_F(fog, fog_stride); */ - fog = (GLfloat (*)[4])((GLubyte *)fog + fog_stride); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.alpha, fog[0][0]); + STRIDE_4F(fog, fog_stride); } if (DO_TEX0) { v->v.u0 = tc0[0][0]; v->v.v0 = tc0[0][1]; - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "%s: vert (importable) %d: u0: %.2f, v0: %.2f, w: %f\n", - __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w); - } #ifdef MACH64_PREMULT_TEXCOORDS - v->v.u0 *= v->v.w; - v->v.v0 *= v->v.w; + /* dropped */ #endif if (DO_PTEX) { if (HAVE_PTEX_VERTICES) { @@ -293,7 +290,7 @@ } else if (tc0_size == 4) { #ifdef MACH64_PREMULT_TEXCOORDS - v->v.w *= tc0[0][3]; + /* dropped */ #else float rhw = 1.0 / tc0[0][3]; v->v.w *= tc0[0][3]; @@ -302,7 +299,7 @@ #endif } } - tc0 = (GLfloat (*)[4])((GLubyte *)tc0 + tc0_stride); + STRIDE_4F(tc0, tc0_stride); } if (DO_TEX1) { if (DO_PTEX) { @@ -318,10 +315,9 @@ v->v.v1 = tc1[0][1]; } #ifdef MACH64_PREMULT_TEXCOORDS - v->v.u1 *= v->v.w; - v->v.v1 *= v->v.w; + /* dropped */ #endif - tc1 = (GLfloat (*)[4])((GLubyte *)tc1 + tc1_stride); + STRIDE_4F(tc1, tc1_stride); } else if (DO_PTEX) { *(GLuint *)&v->pv.q1 = 0; /* avoid culling on radeon */ @@ -339,7 +335,7 @@ v->v.u2 = tc2[0][0]; v->v.v2 = tc2[0][1]; } - tc2 = (GLfloat (*)[4])((GLubyte *)tc2 + tc2_stride); + STRIDE_4F(tc2, tc2_stride); } if (DO_TEX3) { if (DO_PTEX) { @@ -349,18 +345,17 @@ v->pv.q3 = tc3[0][3]; else v->pv.q3 = 1.0; - } + } else { v->v.u3 = tc3[0][0]; v->v.v3 = tc3[0][1]; } - tc3 = (GLfloat (*)[4])((GLubyte *)tc3 + tc3_stride); + STRIDE_4F(tc3, tc3_stride); } } -} +} #else -#if DO_XYZW #if HAVE_HW_DIVIDE #error "cannot use tiny vertices with hw perspective divide" @@ -370,12 +365,12 @@ void *dest, GLuint stride ) { LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLfloat (*col)[4]; - GLuint col_stride; + GLuint col_stride, col_size; GLfloat (*coord)[4] = VB->NdcPtr->data; GLuint coord_stride = VB->NdcPtr->stride; - GLfloat *v = (GLfloat *)dest; + VERTEX *v = (VERTEX *)dest; const GLubyte *mask = VB->ClipMask; const GLfloat *s = GET_VIEWPORT_MAT(); int i; @@ -386,80 +381,36 @@ col = VB->ColorPtr[0]->data; col_stride = VB->ColorPtr[0]->stride; + col_size = VB->ColorPtr[0]->size; - /* Pack what's left into a 4-dword vertex. Color is in a different - * place, and there is no 'w' coordinate. - */ if (start) { - coord = (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride); + STRIDE_4F(coord, start * coord_stride); STRIDE_4F(col, start * col_stride); } - - for (i=start; i < end; i++, v+=4) { - if (HAVE_HW_VIEWPORT || mask[i] == 0) { - VIEWPORT_X(v[0], coord[0][0]); - VIEWPORT_Y(v[1], coord[0][1]); - VIEWPORT_Z(v[2], coord[0][2]); + + for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) { + if (DO_XYZW) { + if (HAVE_HW_VIEWPORT || mask[i] == 0) { + VIEWPORT_X(v->tv.x, coord[0][0]); + VIEWPORT_Y(v->tv.y, coord[0][1]); + VIEWPORT_Z(v->tv.z, coord[0][2]); + } + STRIDE_4F( coord, coord_stride ); } - coord = (GLfloat (*)[4])((GLubyte *)coord + coord_stride); if (DO_RGBA) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)&v[3] = *(GLuint *)col; - } - else { - GLubyte *b = (GLubyte *)&v[3]; - UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]); - UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]); - UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]); - UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]); + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.blue, col[0][2]); + if (col_size == 4) { + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.alpha, col[0][3]); + } else { + v->tv.color.alpha = CHAN_MAX; } STRIDE_4F( col, col_stride ); } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "vert (importable) %d: %.2f %.2f %.2f %x\n", - i, v[0], v[1], v[2], *(int *)&v[3]); - } } } -#else -static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end, - void *dest, GLuint stride ) -{ - LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; - GLfloat (*col)[4]; - GLuint col_stride; - GLfloat *v = (GLfloat *)dest; - int i; - col = VB->ColorPtr[0]->data; - col_stride = VB->ColorPtr[0]->stride; - - if (start) - STRIDE_4F(col, col_stride * start); - - /* Need to figure out where color is: - */ - if (GET_VERTEX_FORMAT() == TINY_VERTEX_FORMAT) - v += 3; - else - v += 4; - - for (i=start; i < end; i++, STRIDE_F(v, stride)) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)v = *(GLuint *)col[0]; - } - else { - GLubyte *b = (GLubyte *)v; - UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]); - UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]); - UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]); - UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]); - } - STRIDE_4F( col, col_stride ); - } -} -#endif /* emit */ #endif /* emit */ #if (DO_XYZW) && (DO_RGBA) @@ -561,39 +512,32 @@ w = dstclip[3]; } else { - w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]); + w = 1.0 / dstclip[3]; VIEWPORT_X( dst->v.x, dstclip[0] * w ); VIEWPORT_Y( dst->v.y, dstclip[1] * w ); VIEWPORT_Z( dst->v.z, dstclip[2] * w ); } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %f\n", - __FUNCTION__, - dst->v.x, - dst->v.y, - dst->v.z, - w ); - } - if ((HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) || DO_FOG || DO_SPEC || DO_TEX0 || DO_TEX1 || DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES) { - dst->v.w = w; + if (DO_TEX0 || DO_TEX1) { + dst->v.w = w; + } - INTERP_UB( t, dst->ub4[4][0], out->ub4[4][0], in->ub4[4][0] ); - INTERP_UB( t, dst->ub4[4][1], out->ub4[4][1], in->ub4[4][1] ); - INTERP_UB( t, dst->ub4[4][2], out->ub4[4][2], in->ub4[4][2] ); - INTERP_UB( t, dst->ub4[4][3], out->ub4[4][3], in->ub4[4][3] ); + INTERP_UB( t, dst->v.color.red, out->v.color.red, in->v.color.red ); + INTERP_UB( t, dst->v.color.green, out->v.color.green, in->v.color.green ); + INTERP_UB( t, dst->v.color.blue, out->v.color.blue, in->v.color.blue ); + INTERP_UB( t, dst->v.color.alpha, out->v.color.alpha, in->v.color.alpha ); if (DO_SPEC) { - INTERP_UB( t, dst->ub4[5][0], out->ub4[5][0], in->ub4[5][0] ); - INTERP_UB( t, dst->ub4[5][1], out->ub4[5][1], in->ub4[5][1] ); - INTERP_UB( t, dst->ub4[5][2], out->ub4[5][2], in->ub4[5][2] ); + INTERP_UB( t, dst->v.specular.red, out->v.specular.red, in->v.specular.red ); + INTERP_UB( t, dst->v.specular.green, out->v.specular.green, in->v.specular.green ); + INTERP_UB( t, dst->v.specular.blue, out->v.specular.blue, in->v.specular.blue ); } if (DO_FOG) { - INTERP_UB( t, dst->ub4[5][3], out->ub4[5][3], in->ub4[5][3] ); + INTERP_UB( t, dst->v.specular.alpha, out->v.specular.alpha, in->v.specular.alpha ); } if (DO_TEX0) { if (DO_PTEX) { @@ -604,8 +548,8 @@ } else { GLfloat wout = VB->NdcPtr->data[eout][3]; GLfloat win = VB->NdcPtr->data[ein][3]; - GLfloat qout = out->pv.w / wout; - GLfloat qin = in->pv.w / win; + GLfloat qout = out->v.w / wout; + GLfloat qin = in->v.w / win; GLfloat qdst, rqdst; ASSERT( !HAVE_HW_DIVIDE ); @@ -622,14 +566,7 @@ } else { #ifdef MACH64_PREMULT_TEXCOORDS - GLfloat qout = 1 / out->v.w; - GLfloat qin = 1 / in->v.w; - - INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin); - INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin); - - dst->v.u0 *= w; - dst->v.v0 *= w; + /* dropped */ #else INTERP_F( t, dst->v.u0, out->v.u0, in->v.u0 ); INTERP_F( t, dst->v.v0, out->v.v0, in->v.v0 ); @@ -643,14 +580,7 @@ INTERP_F( t, dst->pv.q1, out->pv.q1, in->pv.q1 ); } else { #ifdef MACH64_PREMULT_TEXCOORDS - GLfloat qout = 1 / out->v.w; - GLfloat qin = 1 / in->v.w; - - INTERP_F( t, dst->v.u1, out->v.u1 * qout, in->v.u1 * qin ); - INTERP_F( t, dst->v.v1, out->v.v1 * qout, in->v.v1 * qin ); - - dst->v.u1 *= w; - dst->v.v1 *= w; + /* dropped */ #else INTERP_F( t, dst->v.u1, out->v.u1, in->v.u1 ); INTERP_F( t, dst->v.v1, out->v.v1, in->v.v1 ); @@ -658,7 +588,7 @@ } } else if (DO_PTEX) { - dst->pv.q0 = 0.0; /* must be a valid float on radeon */ + dst->pv.q1 = 0.0; /* must be a valid float on radeon */ } if (DO_TEX2) { if (DO_PTEX) { @@ -683,16 +613,47 @@ } else { /* 4-dword vertex. Color is in v[3] and there is no oow coordinate. */ - INTERP_UB( t, dst->ub4[3][0], out->ub4[3][0], in->ub4[3][0] ); - INTERP_UB( t, dst->ub4[3][1], out->ub4[3][1], in->ub4[3][1] ); - INTERP_UB( t, dst->ub4[3][2], out->ub4[3][2], in->ub4[3][2] ); - INTERP_UB( t, dst->ub4[3][3], out->ub4[3][3], in->ub4[3][3] ); + INTERP_UB( t, dst->tv.color.red, out->tv.color.red, in->tv.color.red ); + INTERP_UB( t, dst->tv.color.green, out->tv.color.green, in->tv.color.green ); + INTERP_UB( t, dst->tv.color.blue, out->tv.color.blue, in->tv.color.blue ); + INTERP_UB( t, dst->tv.color.alpha, out->tv.color.alpha, in->tv.color.alpha ); } } #endif /* rgba && xyzw */ +#if MACH64_NATIVE_VTXFMT +static void TAG(copy_pv)( GLcontext *ctx, GLuint edst, GLuint esrc ) +{ +#if DO_SPEC || DO_FOG || DO_RGBA + LOCALVARS + GLubyte *verts = GET_VERTEX_STORE(); + GLuint size = GET_VERTEX_SIZE(); + GLuint *dst = (GLuint *)(verts + (edst * size)); + GLuint *src = (GLuint *)(verts + (esrc * size)); +#endif + +#if DO_SPEC || DO_FOG + dst[12] = src[12]; /* VERTEX_?_SPEC_ARGB */ +#endif + +#if DO_RGBA + dst[14] = src[14]; /* VERTEX_?_ARGB */ +#endif +} + +#define TEX1_VERTEX_SIZE 10 +#define TEX0_VERTEX_SIZE 7 +#define NOTEX_VERTEX_SIZE 4 +#define TINY_VERTEX_SIZE 3 +#else +#define TEX1_VERTEX_SIZE 10 +#define TEX0_VERTEX_SIZE 8 +#define NOTEX_VERTEX_SIZE 6 +#define TINY_VERTEX_SIZE 4 +#endif + static void TAG(init)( void ) { setup_tab[IND].emit = TAG(emit); @@ -702,6 +663,9 @@ setup_tab[IND].interp = TAG(interp); #endif +#if MACH64_NATIVE_VTXFMT + setup_tab[IND].copy_pv = TAG(copy_pv); +#else if (DO_SPEC) setup_tab[IND].copy_pv = copy_pv_rgba4_spec5; else if (HAVE_HW_DIVIDE || DO_SPEC || DO_FOG || DO_TEX0 || DO_TEX1 || @@ -709,6 +673,7 @@ setup_tab[IND].copy_pv = copy_pv_rgba4; else setup_tab[IND].copy_pv = copy_pv_rgba3; +#endif if (DO_TEX3) { if (DO_PTEX) { @@ -740,7 +705,7 @@ } else { setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT; - setup_tab[IND].vertex_size = 10; + setup_tab[IND].vertex_size = TEX1_VERTEX_SIZE; } } else if (DO_TEX0) { @@ -749,20 +714,19 @@ setup_tab[IND].vertex_size = 12; } else { setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT; - setup_tab[IND].vertex_size = 8; + setup_tab[IND].vertex_size = TEX0_VERTEX_SIZE; } } else if (!HAVE_HW_DIVIDE && !DO_SPEC && !DO_FOG && HAVE_TINY_VERTICES) { setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT; - setup_tab[IND].vertex_size = 4; + setup_tab[IND].vertex_size = TINY_VERTEX_SIZE; } else if (HAVE_NOTEX_VERTICES) { setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT; - setup_tab[IND].vertex_size = 6; + setup_tab[IND].vertex_size = NOTEX_VERTEX_SIZE; } else { setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT; - setup_tab[IND].vertex_size = 8; + setup_tab[IND].vertex_size = TEX0_VERTEX_SIZE; } - }