Common subdirectories: mach64-vtx/CVS and mach64-vtx-1/CVS diff -u -x 'depend*' -x '*o' mach64-vtx/mach64_context.h mach64-vtx-1/mach64_context.h --- mach64-vtx/mach64_context.h 2006-03-12 00:02:32.000000000 +0200 +++ mach64-vtx-1/mach64_context.h 2006-03-28 04:47:36.000000000 +0300 @@ -94,14 +94,49 @@ /* The size of this union is not of relevence: */ -union mach64_vertex_t { +#define TAG(x) mach64##x +typedef struct { + GLubyte blue, green, red, alpha; +} TAG(_color_t); + +typedef union { + struct { + GLfloat u1, v1, q1; + GLfloat u0, v0, q0; + TAG(_color_t) specular; + GLuint z; + TAG(_color_t) color; + GLshort y; + GLshort x; /* struct starts here */ + GLfloat u3, v3, q3; + GLfloat u2, v2, q2; + } pv; /* unused */ + struct { + GLfloat u1, v1; /* tex1 */ + GLfloat w1; + GLfloat u0, v0; /* tex0 */ + GLfloat w; + TAG(_color_t) specular; /* notex */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; /* struct starts here */ + GLfloat u3, v3; /* unused */ + GLfloat u2, v2; /* unused */ + } v; + struct { + GLfloat f[7]; /* pad to sizeof(struct v) */ + GLuint z; /* tiny */ + TAG(_color_t) color; + GLshort y; + GLshort x; + } tv; GLfloat f[16]; GLuint ui[16]; GLushort us2[16][2]; GLubyte ub4[16][4]; -}; - -typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr; +} TAG(Vertex), *TAG(VertexPtr); +#undef TAG #else Only in mach64-vtx: mach64_native_vb.c Only in mach64-vtx: mach64_native_vbtmp.h diff -u -x 'depend*' -x '*o' mach64-vtx/mach64_tris.c mach64-vtx-1/mach64_tris.c --- mach64-vtx/mach64_tris.c 2006-03-27 04:50:58.000000000 +0300 +++ mach64-vtx-1/mach64_tris.c 2006-03-28 03:30:22.000000000 +0300 @@ -70,7 +70,22 @@ * Emit primitives as inline vertices * ***********************************************************************/ +/* + * TODO: add comments in draw_triangle about vertex format (mach64 + * TODO: does not have VF_CNTL, VERTEX_FMT registers ...), and + * TODO: about vbsize, ooa, and other mystic calculations + * + * TODO: cull the xy variables, use symbolic names v->v.x, v->v.y + * + * TODO: pull the LE32_ defines here, drop LE32_IN, document that + * TODO: non-x86 is untested + * + * TODO: order the fields of mach64Vertex from xy to w to sec_w and + * TODO: submit them in reserse order (asm ? perf regression ?) + */ + #if defined(USE_X86_ASM) +/* does not need LE32_OUT() because x86 is LE */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize; \ @@ -89,6 +104,7 @@ : "0" (__s), "1" (vb), "2" (__p) ); \ } while (0) #else +/* mach64 requires that values are in LE format */ #define DO_COPY_VERTEX( vb, vertsize, v, n, m ) \ do { \ CARD32 *__p = (CARD32 *)v + 10 - vertsize; \ @@ -96,9 +112,9 @@ if ( vertsize > 7 ) { \ LE32_OUT( vb++, (2 << 16) | \ ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) ); \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ - *vb++ = *__p++; \ + LE32_OUT( vb++ , *__p++ ); \ + LE32_OUT( vb++ , *__p++ ); \ + LE32_OUT( vb++ , *__p++ ); \ __s -= 3; \ } \ LE32_OUT( vb++, ((__s - 1 + m) << 16) | \ @@ -142,15 +158,15 @@ mach64_print_vertex( ctx, v3 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v3->ui[xyoffset] ); + xy = v3->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -176,7 +192,7 @@ COPY_VERTEX_OOA( vb, vertsize, v3, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); @@ -445,15 +461,15 @@ mach64_print_vertex( ctx, v2 ); } - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -696,13 +712,13 @@ pxy0 = &v0->ui[xyoffset]; xy0old = *pxy0; - xy0 = LE32_IN( &xy0old ); + xy0 = xy0old; x0 = (GLshort)( xy0 >> 16 ); y0 = (GLshort)( xy0 & 0xffff ); pxy1 = &v1->ui[xyoffset]; xy1old = *pxy1; - xy1 = LE32_IN( &xy1old ); + xy1 = xy1old; x1 = (GLshort)( xy1 >> 16 ); y1 = (GLshort)( xy1 & 0xffff ); @@ -981,7 +997,7 @@ pxy = &v0->ui[xyoffset]; xyold = *pxy; - xy = LE32_IN( &xyold ); + xy = xyold; x = (GLshort)( xy >> 16 ); y = (GLshort)( xy & 0xffff ); @@ -1254,8 +1270,7 @@ #if MACH64_NATIVE_VTXFMT -/* #define DEPTH_SCALE 65536.0 */ -#define DEPTH_SCALE 1 +#define DEPTH_SCALE 1.0 #define UNFILLED_TRI unfilled_tri #define UNFILLED_QUAD unfilled_quad #define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0) @@ -1576,7 +1591,16 @@ tnl->Driver.Render.Line( ctx, ii, jj ); } -#if MACH64_NATIVE_VTXFMT +#if (0) && MACH64_NATIVE_VTXFMT +/* + * Optimized verions of mach64FastRenderClippedPoly(), it submits (n) vertices + * instead of (3 * n). Does not seem to improve performance over the simple one. + * + * FIXME: + * Further optimization is possible by consolidating the mach64AllocDmaLow() + * calls. However, doing so results in clipping whole polygons if they only + * have one vertex outside of the cliprect. + */ static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, GLuint n ) { @@ -1587,25 +1611,26 @@ GLuint xy; const GLuint xyoffset = 9; GLint xx[3], yy[3]; /* 2 fractional bits for hardware */ - unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2); + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1; CARD32 *vb, *vbchk; GLubyte *mach64verts = (GLubyte *)mmesa->verts; mach64VertexPtr v0, v1, v2; int i; + /* draw triangle with vertices elts[0], elts[1], elts[2] */ v0 = (mach64VertexPtr)VERT(elts[1]); v1 = (mach64VertexPtr)VERT(elts[2]); v2 = (mach64VertexPtr)VERT(elts[0]); - xy = LE32_IN( &v0->ui[xyoffset] ); + xy = v0->ui[xyoffset]; xx[0] = (GLshort)( xy >> 16 ); yy[0] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v1->ui[xyoffset] ); + xy = v1->ui[xyoffset]; xx[1] = (GLshort)( xy >> 16 ); yy[1] = (GLshort)( xy & 0xffff ); - xy = LE32_IN( &v2->ui[xyoffset] ); + xy = v2->ui[xyoffset]; xx[2] = (GLshort)( xy >> 16 ); yy[2] = (GLshort)( xy & 0xffff ); @@ -1631,42 +1656,39 @@ COPY_VERTEX_OOA( vb, vertsize, v2, 3 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - i = 3; - while (1) { - if (i >= n) - break; - v0 = (mach64VertexPtr)VERT(elts[i]); - i++; + assert( vb == vbchk ); - xy = LE32_IN( &v0->ui[xyoffset] ); - xx[0] = (GLshort)( xy >> 16 ); - yy[0] = (GLshort)( xy & 0xffff ); - - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - - (yy[0] - yy[2]) * (xx[1] - xx[2]); - ooa = 16.0 / a; - - COPY_VERTEX_OOA( vb, vertsize, v0, 1 ); - LE32_OUT( vb++, *(CARD32 *)&ooa ); + /* draw triangles with vertices elts[0], elts[i-1], elts[i] */ + for (i = 3 ; i < n ; i++) { + unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 1 + 1; + mach64VertexPtr vk; + int k; + + /* odd elements use v0, even elements use v1 */ + k = 1 - (i & 0x1); + + vk = VERT(elts[i]); + + xy = vk->ui[xyoffset]; + xx[k] = (GLshort)( xy >> 16 ); + yy[k] = (GLshort)( xy & 0xffff ); - if (i >= n) - break; - v1 = (mach64VertexPtr)VERT(elts[i]); - i++; - - xy = LE32_IN( &v1->ui[xyoffset] ); - xx[1] = (GLshort)( xy >> 16 ); - yy[1] = (GLshort)( xy & 0xffff ); - a = (xx[0] - xx[2]) * (yy[1] - yy[2]) - (yy[0] - yy[2]) * (xx[1] - xx[2]); ooa = 16.0 / a; + + vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) ); + vbchk = vb + vbsiz; - COPY_VERTEX_OOA( vb, vertsize, v1, 2 ); + if (k == 0) + COPY_VERTEX_OOA( vb, vertsize, vk, 1 ); + else + COPY_VERTEX_OOA( vb, vertsize, vk, 2 ); LE32_OUT( vb++, *(CARD32 *)&ooa ); - } + + assert( vb == vbchk ); - assert( vb == vbchk ); + } } #else static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, diff -u -x 'depend*' -x '*o' mach64-vtx/mach64_vb.c mach64-vtx-1/mach64_vb.c --- mach64-vtx/mach64_vb.c 2006-03-26 22:53:19.000000000 +0300 +++ mach64-vtx-1/mach64_vb.c 2006-03-28 03:58:37.000000000 +0300 @@ -82,8 +82,9 @@ #define DO_TEX3 0 #define DO_PTEX (IND & MACH64_PTEX_BIT) -#define VERTEX mach64Vertex #define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx); +#define VERTEX mach64Vertex +#define VERTEX_COLOR mach64_color_t #define GET_VIEWPORT_MAT() mmesa->hw_viewport #define GET_TEXSOURCE(n) mmesa->tmu_source[n] #define GET_VERTEX_FORMAT() mmesa->vertex_format @@ -123,9 +124,6 @@ #define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1) -#define IMPORT_FLOAT_COLORS mach64_import_float_colors -#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors - #define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp #define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv @@ -133,167 +131,14 @@ * Generate pv-copying and translation functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - -#define TAG(x) mach64_##x -#include "mach64_native_vb.c" - -#else - #define TAG(x) mach64_##x #include "tnl_dd/t_dd_vb.c" -#endif - /*********************************************************************** * Generate vertex emit and interp functions * ***********************************************************************/ -#if MACH64_NATIVE_VTXFMT - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) -#define TAG(x) x##_wg -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgt0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgpt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgspt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_wgf -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_wgfs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_wgft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_wgft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\ - MACH64_PTEX_BIT) -#define TAG(x) x##_wgfpt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT) -#define TAG(x) x##_wgfst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_wgfst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\ - MACH64_TEX0_BIT|MACH64_PTEX_BIT) -#define TAG(x) x##_wgfspt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_TEX0_BIT) -#define TAG(x) x##_t0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_t0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT) -#define TAG(x) x##_f -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_ft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_ft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT) -#define TAG(x) x##_g -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gt0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gt0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gst0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT) -#define TAG(x) x##_gf -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT) -#define TAG(x) x##_gfs -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gft0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT) -#define TAG(x) x##_gft0t1 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT) -#define TAG(x) x##_gfst0 -#include "mach64_native_vbtmp.h" - -#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\ - MACH64_TEX1_BIT) -#define TAG(x) x##_gfst0t1 -#include "mach64_native_vbtmp.h" - -#else - #define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT) #define TAG(x) x##_wg #include "mach64_vbtmp.h" @@ -434,8 +279,6 @@ #define TAG(x) x##_gfst0t1 #include "mach64_vbtmp.h" -#endif - static void init_setup_tab( void ) { init_wg(); diff -u -x 'depend*' -x '*o' mach64-vtx/mach64_vb.h mach64-vtx-1/mach64_vb.h --- mach64-vtx/mach64_vb.h 2006-03-27 19:30:45.000000000 +0300 +++ mach64-vtx-1/mach64_vb.h 2006-03-28 03:30:22.000000000 +0300 @@ -36,9 +36,6 @@ #include "swrast/swrast.h" #include "mach64_context.h" -/* premultiply texture coordinates by homogenous coordinate */ -#define MACH64_PREMULT_TEXCOORDS - #define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR | \ _DD_NEW_TRI_LIGHT_TWOSIDE | \ _DD_NEW_TRI_UNFILLED | \ diff -u -x 'depend*' -x '*o' mach64-vtx/mach64_vbtmp.h mach64-vtx-1/mach64_vbtmp.h --- mach64-vtx/mach64_vbtmp.h 2004-06-09 08:04:46.000000000 +0300 +++ mach64-vtx-1/mach64_vbtmp.h 2006-03-28 04:32:19.000000000 +0300 @@ -1,9 +1,9 @@ -/* $XFree86$ */ /* -*- mode: c; c-basic-offset: 3 -*- */ + /* * Mesa 3-D graphics library - * Version: 3.5 + * Version: 5.0.1 * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2003 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,11 +23,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: - * Keith Whitwell - * - * Modified for mach64 by: - * Leif Delgass - * José Fonseca + * Keith Whitwell */ @@ -64,7 +60,10 @@ * unsigned char ub4[4][16]; * } * - + + * VERTEX: hw vertex type as above + * VERTEX_COLOR: hw color struct type in VERTEX + * * DO_XYZW: Emit xyz and maybe w coordinates. * DO_RGBA: Emit color. * DO_SPEC: Emit specular color. @@ -99,10 +98,16 @@ #define VIEWPORT_Y(dst,y) dst = y #define VIEWPORT_Z(dst,z) dst = z #else +#if MACH64_NATIVE_VTXFMT +#define VIEWPORT_X(dst,x) dst = ((GLshort)((s[0] * x + s[12]) * 4.0)) +#define VIEWPORT_Y(dst,y) dst = ((GLshort)((s[5] * y + s[13]) * 4.0)) +#define VIEWPORT_Z(dst,z) dst = (((GLuint) (s[10] * z + s[14])) << 15) +#else #define VIEWPORT_X(dst,x) dst = s[0] * x + s[12] #define VIEWPORT_Y(dst,y) dst = s[5] * y + s[13] #define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14] #endif +#endif #if (HAVE_HW_DIVIDE && !HAVE_PTEX_VERTICES) #error "can't cope with this combination" @@ -124,15 +129,13 @@ GLuint stride ) { LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLfloat (*tc0)[4], (*tc1)[4], (*fog)[4]; GLfloat (*tc2)[4], (*tc3)[4]; - GLfloat (*spec)[4]; - GLfloat (*col)[4]; - GLuint col_stride; - GLuint tc0_stride, tc1_stride, spec_stride, fog_stride; + GLfloat (*col)[4], (*spec)[4]; + GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride; GLuint tc2_stride, tc3_stride; - GLuint tc0_size, tc1_size; + GLuint tc0_size, tc1_size, col_size; GLuint tc2_size, tc3_size; GLfloat (*coord)[4]; GLuint coord_stride; @@ -187,23 +190,27 @@ } if (DO_RGBA) { - col = VB->ColorPtr[0]->data; col_stride = VB->ColorPtr[0]->stride; + col = VB->ColorPtr[0]->data; + col_size = VB->ColorPtr[0]->size; } if (DO_SPEC) { - spec = VB->SecondaryColorPtr[0]->data; - spec_stride = VB->SecondaryColorPtr[0]->stride; - } else { - spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; - spec_stride = 0; + if (VB->SecondaryColorPtr[0]) { + spec_stride = VB->SecondaryColorPtr[0]->stride; + spec = VB->SecondaryColorPtr[0]->data; + } else { + spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1]; + spec_stride = 0; + } } if (DO_FOG) { if (VB->FogCoordPtr) { fog = VB->FogCoordPtr->data; fog_stride = VB->FogCoordPtr->stride; - } else { + } + else { static GLfloat tmp[4] = {0, 0, 0, 0}; fog = &tmp; fog_stride = 0; @@ -213,76 +220,66 @@ /* May have nonstandard strides: */ if (start) { - coord = (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride); + STRIDE_4F(coord, start * coord_stride); if (DO_TEX0) - tc0 = (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride); + STRIDE_4F(tc0, start * tc0_stride); if (DO_TEX1) - tc1 = (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride); + STRIDE_4F(tc1, start * tc1_stride); if (DO_TEX2) - tc2 = (GLfloat (*)[4])((GLubyte *)tc2 + start * tc2_stride); + STRIDE_4F(tc2, start * tc2_stride); if (DO_TEX3) - tc3 = (GLfloat (*)[4])((GLubyte *)tc3 + start * tc3_stride); + STRIDE_4F(tc3, start * tc3_stride); if (DO_RGBA) STRIDE_4F(col, start * col_stride); if (DO_SPEC) STRIDE_4F(spec, start * spec_stride); if (DO_FOG) STRIDE_4F(fog, start * fog_stride); - // fog = (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride); - /* STRIDE_F(fog, start * fog_stride); */ } - + for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) { + if (DO_TEX0 || DO_TEX1) { + if (mask[i] == 0) { + v->v.w = coord[0][3]; + } + else { + v->v.w = 1.0; + } + } if (DO_XYZW) { if (HAVE_HW_VIEWPORT || mask[i] == 0) { - /* unclipped */ VIEWPORT_X(v->v.x, coord[0][0]); VIEWPORT_Y(v->v.y, coord[0][1]); VIEWPORT_Z(v->v.z, coord[0][2]); - v->v.w = coord[0][3]; - } else { - /* clipped */ - v->v.w = 1.0; - } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %f\n", - __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w); } - coord = (GLfloat (*)[4])((GLubyte *)coord + coord_stride); + STRIDE_4F(coord, coord_stride); } if (DO_RGBA) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)&v->v.color = *(GLuint *)&col[0]; - STRIDE_4F(col, col_stride); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.blue, col[0][2]); + if (col_size == 4) { + UNCLAMPED_FLOAT_TO_UBYTE(v->v.color.alpha, col[0][3]); } else { - v->v.color.blue = col[0][2]; - v->v.color.green = col[0][1]; - v->v.color.red = col[0][0]; - v->v.color.alpha = col[0][3]; - STRIDE_4F(col, col_stride); + v->v.color.alpha = CHAN_MAX; } + STRIDE_4F(col, col_stride); } if (DO_SPEC) { - v->v.specular.red = spec[0][0]; - v->v.specular.green = spec[0][1]; - v->v.specular.blue = spec[0][2]; + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.red, spec[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.green, spec[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.blue, spec[0][2]); STRIDE_4F(spec, spec_stride); } if (DO_FOG) { - v->v.specular.alpha = fog[0][0] * 255.0; - /* STRIDE_F(fog, fog_stride); */ - fog = (GLfloat (*)[4])((GLubyte *)fog + fog_stride); + UNCLAMPED_FLOAT_TO_UBYTE(v->v.specular.alpha, fog[0][0]); + STRIDE_4F(fog, fog_stride); } if (DO_TEX0) { v->v.u0 = tc0[0][0]; v->v.v0 = tc0[0][1]; - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "%s: vert (importable) %d: u0: %.2f, v0: %.2f, w: %f\n", - __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w); - } #ifdef MACH64_PREMULT_TEXCOORDS - v->v.u0 *= v->v.w; - v->v.v0 *= v->v.w; + /* dropped */ #endif if (DO_PTEX) { if (HAVE_PTEX_VERTICES) { @@ -293,7 +290,7 @@ } else if (tc0_size == 4) { #ifdef MACH64_PREMULT_TEXCOORDS - v->v.w *= tc0[0][3]; + /* dropped */ #else float rhw = 1.0 / tc0[0][3]; v->v.w *= tc0[0][3]; @@ -302,7 +299,7 @@ #endif } } - tc0 = (GLfloat (*)[4])((GLubyte *)tc0 + tc0_stride); + STRIDE_4F(tc0, tc0_stride); } if (DO_TEX1) { if (DO_PTEX) { @@ -318,14 +315,14 @@ v->v.v1 = tc1[0][1]; } #ifdef MACH64_PREMULT_TEXCOORDS - v->v.u1 *= v->v.w; - v->v.v1 *= v->v.w; + /* dropped */ #endif - tc1 = (GLfloat (*)[4])((GLubyte *)tc1 + tc1_stride); + STRIDE_4F(tc1, tc1_stride); } else if (DO_PTEX) { *(GLuint *)&v->pv.q1 = 0; /* avoid culling on radeon */ } +#if 0 if (DO_TEX2) { if (DO_PTEX) { v->pv.u2 = tc2[0][0]; @@ -339,7 +336,7 @@ v->v.u2 = tc2[0][0]; v->v.v2 = tc2[0][1]; } - tc2 = (GLfloat (*)[4])((GLubyte *)tc2 + tc2_stride); + STRIDE_4F(tc2, tc2_stride); } if (DO_TEX3) { if (DO_PTEX) { @@ -349,30 +346,31 @@ v->pv.q3 = tc3[0][3]; else v->pv.q3 = 1.0; - } + } else { v->v.u3 = tc3[0][0]; v->v.v3 = tc3[0][1]; } - tc3 = (GLfloat (*)[4])((GLubyte *)tc3 + tc3_stride); + STRIDE_4F(tc3, tc3_stride); } +#endif } -} +} #else -#if DO_XYZW #if HAVE_HW_DIVIDE #error "cannot use tiny vertices with hw perspective divide" #endif +#if (!MACH64_NATIVE_VTXFMT) static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end, void *dest, GLuint stride ) { LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLfloat (*col)[4]; - GLuint col_stride; + GLuint col_stride, col_size; GLfloat (*coord)[4] = VB->NdcPtr->data; GLuint coord_stride = VB->NdcPtr->stride; GLfloat *v = (GLfloat *)dest; @@ -386,39 +384,42 @@ col = VB->ColorPtr[0]->data; col_stride = VB->ColorPtr[0]->stride; + col_size = VB->ColorPtr[0]->size; + +/* fprintf(stderr, "%s(small) importable %x\n", */ +/* __FUNCTION__, VB->importable_data); */ /* Pack what's left into a 4-dword vertex. Color is in a different * place, and there is no 'w' coordinate. */ if (start) { - coord = (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride); + STRIDE_4F(coord, start * coord_stride); STRIDE_4F(col, start * col_stride); } - + for (i=start; i < end; i++, v+=4) { - if (HAVE_HW_VIEWPORT || mask[i] == 0) { - VIEWPORT_X(v[0], coord[0][0]); - VIEWPORT_Y(v[1], coord[0][1]); - VIEWPORT_Z(v[2], coord[0][2]); + if (DO_XYZW) { + if (HAVE_HW_VIEWPORT || mask[i] == 0) { + VIEWPORT_X(v[0], coord[0][0]); + VIEWPORT_Y(v[1], coord[0][1]); + VIEWPORT_Z(v[2], coord[0][2]); + } + STRIDE_4F( coord, coord_stride ); } - coord = (GLfloat (*)[4])((GLubyte *)coord + coord_stride); if (DO_RGBA) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)&v[3] = *(GLuint *)col; - } - else { - GLubyte *b = (GLubyte *)&v[3]; - UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]); - UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]); - UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]); - UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]); + VERTEX_COLOR *c = (VERTEX_COLOR *)&v[3]; + UNCLAMPED_FLOAT_TO_UBYTE(c->red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(c->green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(c->blue, col[0][2]); + if (col_size == 4) { + UNCLAMPED_FLOAT_TO_UBYTE(c->alpha, col[0][3]); + } else { + c->alpha = CHAN_MAX; } STRIDE_4F( col, col_stride ); } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf(stderr, "vert (importable) %d: %.2f %.2f %.2f %x\n", - i, v[0], v[1], v[2], *(int *)&v[3]); - } +/* fprintf(stderr, "vert %d: %.2f %.2f %.2f %x\n", */ +/* i, v[0], v[1], v[2], *(int *)&v[3]); */ } } #else @@ -426,40 +427,53 @@ void *dest, GLuint stride ) { LOCALVARS - struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLfloat (*col)[4]; - GLuint col_stride; - GLfloat *v = (GLfloat *)dest; + GLuint col_stride, col_size; + GLfloat (*coord)[4] = VB->NdcPtr->data; + GLuint coord_stride = VB->NdcPtr->stride; + VERTEX *v = (VERTEX *)dest; + const GLubyte *mask = VB->ClipMask; + const GLfloat *s = GET_VIEWPORT_MAT(); int i; + (void) s; + + ASSERT(stride == 4); + col = VB->ColorPtr[0]->data; col_stride = VB->ColorPtr[0]->stride; + col_size = VB->ColorPtr[0]->size; - if (start) - STRIDE_4F(col, col_stride * start); - - /* Need to figure out where color is: - */ - if (GET_VERTEX_FORMAT() == TINY_VERTEX_FORMAT) - v += 3; - else - v += 4; + if (start) { + STRIDE_4F(coord, start * coord_stride); + STRIDE_4F(col, start * col_stride); + } - for (i=start; i < end; i++, STRIDE_F(v, stride)) { - if (HAVE_RGBA_COLOR) { - *(GLuint *)v = *(GLuint *)col[0]; + for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) { + if (DO_XYZW) { + if (HAVE_HW_VIEWPORT || mask[i] == 0) { + VIEWPORT_X(v->tv.x, coord[0][0]); + VIEWPORT_Y(v->tv.y, coord[0][1]); + VIEWPORT_Z(v->tv.z, coord[0][2]); + } + STRIDE_4F( coord, coord_stride ); } - else { - GLubyte *b = (GLubyte *)v; - UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]); - UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]); - UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]); - UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]); + if (DO_RGBA) { + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.red, col[0][0]); + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.green, col[0][1]); + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.blue, col[0][2]); + if (col_size == 4) { + UNCLAMPED_FLOAT_TO_UBYTE(v->tv.color.alpha, col[0][3]); + } else { + v->tv.color.alpha = CHAN_MAX; + } + STRIDE_4F( col, col_stride ); } - STRIDE_4F( col, col_stride ); } } -#endif /* emit */ +#endif + #endif /* emit */ #if (DO_XYZW) && (DO_RGBA) @@ -561,39 +575,39 @@ w = dstclip[3]; } else { - w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]); + w = 1.0 / dstclip[3]; VIEWPORT_X( dst->v.x, dstclip[0] * w ); VIEWPORT_Y( dst->v.y, dstclip[1] * w ); VIEWPORT_Z( dst->v.z, dstclip[2] * w ); } - if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) { - fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %f\n", - __FUNCTION__, - dst->v.x, - dst->v.y, - dst->v.z, - w ); - } - if ((HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) || DO_FOG || DO_SPEC || DO_TEX0 || DO_TEX1 || DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES) { - dst->v.w = w; + if (DO_TEX0 || DO_TEX1) { + dst->v.w = w; + } +#if MACH64_NATIVE_VTXFMT + INTERP_UB( t, dst->ub4[8][0], out->ub4[8][0], in->ub4[8][0] ); + INTERP_UB( t, dst->ub4[8][1], out->ub4[8][1], in->ub4[8][1] ); + INTERP_UB( t, dst->ub4[8][2], out->ub4[8][2], in->ub4[8][2] ); + INTERP_UB( t, dst->ub4[8][3], out->ub4[8][3], in->ub4[8][3] ); +#else INTERP_UB( t, dst->ub4[4][0], out->ub4[4][0], in->ub4[4][0] ); INTERP_UB( t, dst->ub4[4][1], out->ub4[4][1], in->ub4[4][1] ); INTERP_UB( t, dst->ub4[4][2], out->ub4[4][2], in->ub4[4][2] ); INTERP_UB( t, dst->ub4[4][3], out->ub4[4][3], in->ub4[4][3] ); +#endif if (DO_SPEC) { - INTERP_UB( t, dst->ub4[5][0], out->ub4[5][0], in->ub4[5][0] ); - INTERP_UB( t, dst->ub4[5][1], out->ub4[5][1], in->ub4[5][1] ); - INTERP_UB( t, dst->ub4[5][2], out->ub4[5][2], in->ub4[5][2] ); + INTERP_UB( t, dst->v.specular.red, out->v.specular.red, in->v.specular.red ); + INTERP_UB( t, dst->v.specular.green, out->v.specular.green, in->v.specular.green ); + INTERP_UB( t, dst->v.specular.blue, out->v.specular.blue, in->v.specular.blue ); } if (DO_FOG) { - INTERP_UB( t, dst->ub4[5][3], out->ub4[5][3], in->ub4[5][3] ); + INTERP_UB( t, dst->v.specular.alpha, out->v.specular.alpha, in->v.specular.alpha ); } if (DO_TEX0) { if (DO_PTEX) { @@ -604,8 +618,8 @@ } else { GLfloat wout = VB->NdcPtr->data[eout][3]; GLfloat win = VB->NdcPtr->data[ein][3]; - GLfloat qout = out->pv.w / wout; - GLfloat qin = in->pv.w / win; + GLfloat qout = out->v.w / wout; + GLfloat qin = in->v.w / win; GLfloat qdst, rqdst; ASSERT( !HAVE_HW_DIVIDE ); @@ -622,14 +636,7 @@ } else { #ifdef MACH64_PREMULT_TEXCOORDS - GLfloat qout = 1 / out->v.w; - GLfloat qin = 1 / in->v.w; - - INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin); - INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin); - - dst->v.u0 *= w; - dst->v.v0 *= w; + /* dropped */ #else INTERP_F( t, dst->v.u0, out->v.u0, in->v.u0 ); INTERP_F( t, dst->v.v0, out->v.v0, in->v.v0 ); @@ -643,14 +650,7 @@ INTERP_F( t, dst->pv.q1, out->pv.q1, in->pv.q1 ); } else { #ifdef MACH64_PREMULT_TEXCOORDS - GLfloat qout = 1 / out->v.w; - GLfloat qin = 1 / in->v.w; - - INTERP_F( t, dst->v.u1, out->v.u1 * qout, in->v.u1 * qin ); - INTERP_F( t, dst->v.v1, out->v.v1 * qout, in->v.v1 * qin ); - - dst->v.u1 *= w; - dst->v.v1 *= w; + /* dropped */ #else INTERP_F( t, dst->v.u1, out->v.u1, in->v.u1 ); INTERP_F( t, dst->v.v1, out->v.v1, in->v.v1 ); @@ -658,8 +658,9 @@ } } else if (DO_PTEX) { - dst->pv.q0 = 0.0; /* must be a valid float on radeon */ + dst->pv.q1 = 0.0; /* must be a valid float on radeon */ } +#if 0 if (DO_TEX2) { if (DO_PTEX) { INTERP_F( t, dst->pv.u2, out->pv.u2, in->pv.u2 ); @@ -680,19 +681,74 @@ INTERP_F( t, dst->v.v3, out->v.v3, in->v.v3 ); } } +#endif } else { /* 4-dword vertex. Color is in v[3] and there is no oow coordinate. */ +#if MACH64_NATIVE_VTXFMT + INTERP_UB( t, dst->ub4[8][0], out->ub4[8][0], in->ub4[8][0] ); + INTERP_UB( t, dst->ub4[8][1], out->ub4[8][1], in->ub4[8][1] ); + INTERP_UB( t, dst->ub4[8][2], out->ub4[8][2], in->ub4[8][2] ); + INTERP_UB( t, dst->ub4[8][3], out->ub4[8][3], in->ub4[8][3] ); +#else INTERP_UB( t, dst->ub4[3][0], out->ub4[3][0], in->ub4[3][0] ); INTERP_UB( t, dst->ub4[3][1], out->ub4[3][1], in->ub4[3][1] ); INTERP_UB( t, dst->ub4[3][2], out->ub4[3][2], in->ub4[3][2] ); INTERP_UB( t, dst->ub4[3][3], out->ub4[3][3], in->ub4[3][3] ); +#endif } } #endif /* rgba && xyzw */ +#if MACH64_NATIVE_VTXFMT +static void TAG(copy_pv)( GLcontext *ctx, GLuint edst, GLuint esrc ) +{ +#if DO_SPEC || DO_FOG || DO_RGBA + LOCALVARS + GLubyte *verts = GET_VERTEX_STORE(); + GLuint size = GET_VERTEX_SIZE(); + GLuint *dst = (GLuint *)(verts + (edst * size)); + GLuint *src = (GLuint *)(verts + (esrc * size)); +#endif + +#if DO_SPEC || DO_FOG + dst[6] = src[6]; /* VERTEX_?_SPEC_ARGB */ +#endif + +#if DO_RGBA + dst[8] = src[8]; /* VERTEX_?_ARGB */ +#endif +} + +static void TAG(init)( void ) +{ + setup_tab[IND].emit = TAG(emit); + +#if DO_XYZW && DO_RGBA + setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes); + setup_tab[IND].interp = TAG(interp); +#endif + + setup_tab[IND].copy_pv = TAG(copy_pv); + +#if DO_TEX1 + setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT; + setup_tab[IND].vertex_size = 10; +#elif DO_TEX0 + setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT; + setup_tab[IND].vertex_size = 7; +#elif DO_SPEC || DO_FOG + setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT; + setup_tab[IND].vertex_size = 4; +#else + setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT; + setup_tab[IND].vertex_size = 3; +#endif + +} +#else static void TAG(init)( void ) { setup_tab[IND].emit = TAG(emit); @@ -762,8 +818,8 @@ setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT; setup_tab[IND].vertex_size = 8; } - } +#endif #undef IND Only in mach64-vtx-1/: orig_mach64_native_vb.c Only in mach64-vtx-1/: orig_mach64_native_vbtmp.h Only in mach64-vtx-1/: orig_mach64_vbtmp.h Common subdirectories: mach64-vtx/server and mach64-vtx-1/server