Index: radeon_context.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_context.c,v
retrieving revision 1.33
diff -u -r1.33 radeon_context.c
--- radeon_context.c	26 Feb 2005 05:24:04 -0000	1.33
+++ radeon_context.c	28 Feb 2005 00:42:32 -0000
@@ -62,7 +62,7 @@
 #include "radeon_vtxfmt.h"
 #include "radeon_maos.h"
 
-#define DRIVER_DATE	"20041207"
+#define DRIVER_DATE	"20050227"
 
 #include "vblank.h"
 #include "utils.h"
@@ -337,7 +337,7 @@
 				 4,
 				 11, /* max 2D texture size is 2048x2048 */
 				 0,  /* 3D textures unsupported. */
-				 0,  /* cube textures unsupported. */
+				 9,  /* \todo: max cube texture size seems to be 512x512(x6) */
 				 11, /* max rect texture size is 2048x2048. */
 				 12,
 				 GL_FALSE );
@@ -417,6 +417,8 @@
    _math_matrix_set_identity( &rmesa->tmpmat );
 
    driInitExtensions( ctx, card_extensions, GL_TRUE );
+   if (rmesa->radeonScreen->drmSupportsCubeMaps)
+      _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
    if (rmesa->glCtx->Mesa_DXTn) {
       _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
Index: radeon_context.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_context.h,v
retrieving revision 1.21
diff -u -r1.21 radeon_context.h
--- radeon_context.h	10 Feb 2005 22:36:07 -0000	1.21
+++ radeon_context.h	28 Feb 2005 00:42:34 -0000
@@ -38,6 +38,7 @@
 #ifndef __RADEON_CONTEXT_H__
 #define __RADEON_CONTEXT_H__
 
+#include "tnl/t_vertex.h"
 #include "dri_util.h"
 #include "drm.h"
 #include "radeon_drm.h"
@@ -262,6 +263,16 @@
 #define TXR_PP_TEX_PITCH            2 /* 0x1d08, 0x1d10 for NPOT! */
 #define TXR_STATE_SIZE              3
 
+#define CUBE_CMD_0                  0
+#define CUBE_PP_CUBIC_FACES         1
+#define CUBE_CMD_1                  2
+#define CUBE_PP_CUBIC_OFFSET_0      3
+#define CUBE_PP_CUBIC_OFFSET_1      4
+#define CUBE_PP_CUBIC_OFFSET_2      5
+#define CUBE_PP_CUBIC_OFFSET_3      6
+#define CUBE_PP_CUBIC_OFFSET_4      7
+#define CUBE_STATE_SIZE             8
+
 #define ZBS_CMD_0              0
 #define ZBS_SE_ZBIAS_FACTOR             1
 #define ZBS_SE_ZBIAS_CONSTANT           2
@@ -413,6 +424,7 @@
    struct radeon_state_atom tcl;
    struct radeon_state_atom msc;
    struct radeon_state_atom tex[2];
+   struct radeon_state_atom cube[2];
    struct radeon_state_atom zbs;
    struct radeon_state_atom mtl; 
    struct radeon_state_atom mat[5]; 
@@ -530,12 +542,15 @@
 /* radeon_swtcl.c
  */
 struct radeon_swtcl_info {
-   GLuint SetupIndex;
-   GLuint SetupNewInputs;
    GLuint RenderIndex;
    GLuint vertex_size;
-   GLuint vertex_stride_shift;
    GLuint vertex_format;
+   GLuint projformat;	/* bit n (0..2): Qn is used for projtex */
+
+
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+
    GLubyte *verts;
 
    /* Fallback rasterization functions
@@ -548,6 +563,18 @@
    GLenum render_primitive;
    GLuint numverts;
 
+   /**
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+   GLuint coloroffset;
+
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+   GLuint specoffset;
+
+   GLboolean needproj;
+
    struct radeon_dma_region indexed_verts;
 };
 
@@ -591,10 +618,18 @@
    struct dynfn SecondaryColor3fvEXT;
    struct dynfn Normal3f;
    struct dynfn Normal3fv;
+#if 0		/* \todo: vtxfmt doesnt work correctly with cubemaps yet */
+   struct dynfn TexCoord3f;
+   struct dynfn TexCoord3fv;
+#endif
    struct dynfn TexCoord2f;
    struct dynfn TexCoord2fv;
    struct dynfn TexCoord1f;
    struct dynfn TexCoord1fv;
+#if 0
+   struct dynfn MultiTexCoord3fARB;
+   struct dynfn MultiTexCoord3fvARB;
+#endif
    struct dynfn MultiTexCoord2fARB;
    struct dynfn MultiTexCoord2fvARB;
    struct dynfn MultiTexCoord1fARB;
@@ -620,10 +655,18 @@
    struct dynfn *(*SecondaryColor3fvEXT)( GLcontext *, int );
    struct dynfn *(*Normal3f)( GLcontext *, int );
    struct dynfn *(*Normal3fv)( GLcontext *, int );
+#if 0
+   struct dynfn *(*TexCoord3f)( GLcontext *, int );
+   struct dynfn *(*TexCoord3fv)( GLcontext *, int );
+#endif
    struct dynfn *(*TexCoord2f)( GLcontext *, int );
    struct dynfn *(*TexCoord2fv)( GLcontext *, int );
    struct dynfn *(*TexCoord1f)( GLcontext *, int );
    struct dynfn *(*TexCoord1fv)( GLcontext *, int );
+#if 0
+   struct dynfn *(*MultiTexCoord3fARB)( GLcontext *, int );
+   struct dynfn *(*MultiTexCoord3fvARB)( GLcontext *, int );
+#endif
    struct dynfn *(*MultiTexCoord2fARB)( GLcontext *, int );
    struct dynfn *(*MultiTexCoord2fvARB)( GLcontext *, int );
    struct dynfn *(*MultiTexCoord1fARB)( GLcontext *, int );
@@ -638,23 +681,25 @@
    GLuint prim;
 };
 
+#define RADEON_MAX_VERTEX_SIZE 17
+
 struct radeon_vbinfo {
    GLint counter, initial_counter;
    GLint *dmaptr;
    void (*notify)( void );
    GLint vertex_size;
 
-   /* A maximum total of 15 elements per vertex:  3 floats for position, 3
+   /* A maximum total of 17 elements per vertex:  3 floats for position, 3
     * floats for normal, 4 floats for color, 4 bytes for secondary color,
-    * 2 floats for each texture unit (4 floats total).
+    * 3 floats for each texture unit (6 floats total).
     * 
-    * As soon as the 3rd TMU is supported or cube maps (or 3D textures) are
-    * supported, this value will grow.
+    * As soon as the 3rd TMU is supported this value will grow.
+    * If FPCOLOR/FPALPHA isnt used any more this value could shrink.
     * 
     * The position data is never actually stored here, so 3 elements could be
     * trimmed out of the buffer.
     */
-   union { float f; int i; radeon_color_t color; } vertex[15];
+   union { float f; int i; radeon_color_t color; } vertex[RADEON_MAX_VERTEX_SIZE];
 
    GLfloat *normalptr;
    GLfloat *floatcolorptr;
@@ -671,6 +716,7 @@
    GLboolean recheck;
    GLint nrverts;
    GLuint vertex_format;
+   GLuint projformat;	/* bit n (0..2): Qn is used for projtex */
 
    GLuint installed_vertex_format;
    GLuint installed_color_3f_sz;
@@ -707,6 +753,7 @@
    GLuint TclFallback;
    GLuint Fallback;
    GLuint NewGLState;
+   GLuint tnl_index;	/* index of bits for last tnl_install_attrs */
 
    /* Vertex buffers
     */
@@ -813,7 +860,7 @@
    }
 }
 
-#define RADEON_OLD_PACKETS 1
+#define RADEON_OLD_PACKETS 0
 
 
 extern void radeonDestroyContext( __DRIcontextPrivate *driContextPriv );
Index: radeon_ioctl.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_ioctl.c,v
retrieving revision 1.23
diff -u -r1.23 radeon_ioctl.c
--- radeon_ioctl.c	16 Feb 2005 15:02:25 -0000	1.23
+++ radeon_ioctl.c	28 Feb 2005 00:42:37 -0000
@@ -151,6 +151,7 @@
    for (i = 0; i < mtu; ++i) {
        insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]);
        insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]);
+       insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]);
    }
    insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs);
    insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl);
@@ -422,6 +423,12 @@
    rmesa->ioctl.vertex_size = component[0]->aos_size;
    rmesa->ioctl.vertex_offset = 
       (component[0]->aos_start + offset * component[0]->aos_stride * 4);
+   if (RADEON_DEBUG & DEBUG_VERTS) {
+      fprintf( stderr, "%s: RADEON_OLD_PACKETS\n", __FUNCTION__);
+      fprintf( stderr, "  rmesa->ioctl.vertex_size= %d, rmesa->ioctl.vertex_offset= %d\n",
+               rmesa->ioctl.vertex_size,
+               rmesa->ioctl.vertex_offset);
+   }
 #else
    drm_radeon_cmd_header_t *cmd;
    int sz = AOS_BUFSZ(nr);
Index: radeon_maos.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_maos.c,v
retrieving revision 1.2
diff -u -r1.2 radeon_maos.c
--- radeon_maos.c	22 Aug 2003 20:11:45 -0000	1.2
+++ radeon_maos.c	28 Feb 2005 00:42:37 -0000
@@ -4,7 +4,7 @@
  * Otherwise, must use verts.
  */
 #include "radeon_context.h"
-#define RADEON_MAOS_VERTS 1
+#define RADEON_MAOS_VERTS 0
 #if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
 #include "radeon_maos_verts.c"
 #else
Index: radeon_maos_vbtmp.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h,v
retrieving revision 1.3
diff -u -r1.3 radeon_maos_vbtmp.h
--- radeon_maos_vbtmp.h	24 Nov 2003 15:21:16 -0000	1.3
+++ radeon_maos_vbtmp.h	28 Feb 2005 00:42:38 -0000
@@ -47,6 +47,7 @@
    GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
    GLuint tc2_stride, norm_stride;
    GLuint fill_tex = 0;
+   GLuint cubemap_tex = 0;
    GLuint (*coord)[4];
    GLuint coord_stride; /* object coordinates */
    GLubyte dummy[4];
@@ -65,8 +66,11 @@
 	 const GLuint t2 = GET_TEXSOURCE(2);
 	 tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
 	 tc2_stride = VB->TexCoordPtr[t2]->stride;
-	 if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
-	    fill_tex |= (1<<2);
+	 if (DO_PTEX) {
+	    if (VB->TexCoordPtr[t2]->size < 3)
+	       fill_tex |= (1<<2);
+	    else if (VB->TexCoordPtr[t2]->size < 4)
+	       cubemap_tex |= (1<<2);
 	 }
       } else {
 	 tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2];
@@ -79,8 +83,11 @@
 	 const GLuint t1 = GET_TEXSOURCE(1);
 	 tc1 = (GLuint (*)[4])VB->TexCoordPtr[t1]->data;
 	 tc1_stride = VB->TexCoordPtr[t1]->stride;
-	 if (DO_PTEX && VB->TexCoordPtr[t1]->size < 4) {
-	    fill_tex |= (1<<1);
+	 if (DO_PTEX) {
+	    if (VB->TexCoordPtr[t1]->size < 3)
+	       fill_tex |= (1<<1);
+	    else if (VB->TexCoordPtr[t1]->size < 4)
+	       cubemap_tex |= (1<<1);
 	 }
       } else {
 	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1];
@@ -93,8 +100,11 @@
 	 const GLuint t0 = GET_TEXSOURCE(0);
 	 tc0_stride = VB->TexCoordPtr[t0]->stride;
 	 tc0 = (GLuint (*)[4])VB->TexCoordPtr[t0]->data;
-	 if (DO_PTEX && VB->TexCoordPtr[t0]->size < 4) {
-	    fill_tex |= (1<<0);
+	 if (DO_PTEX) {
+	    if (VB->TexCoordPtr[t0]->size < 3)
+	       fill_tex |= (1<<0);
+	    else if (VB->TexCoordPtr[t0]->size < 4)
+	       cubemap_tex |= (1<<0);
 	 }
       } else {
 	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0];
@@ -213,6 +223,8 @@
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<0))
 		  v[2].f = 1.0;
+	       else if (cubemap_tex & (1<<0))
+		  v[2].ui = tc0[0][2];	/* prefer cubemapping/3dtexures */
 	       else
 		  v[2].ui = tc0[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
@@ -229,6 +241,8 @@
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<1))
 		  v[2].f = 1.0;
+	       else if (cubemap_tex & (1<<1))
+		  v[2].ui = tc1[0][2];
 	       else
 		  v[2].ui = tc1[0][3];
 	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
@@ -244,6 +258,8 @@
 	    if (DO_PTEX) {
 	       if (fill_tex & (1<<2))
 		  v[2].f = 1.0;
+	       else if (cubemap_tex & (1<<2))
+		  v[2].ui = tc2[0][2];
 	       else
 		  v[2].ui = tc2[0][3];
 	       v += 3;
Index: radeon_maos_verts.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_maos_verts.c,v
retrieving revision 1.9
diff -u -r1.9 radeon_maos_verts.c
--- radeon_maos_verts.c	8 Apr 2004 08:54:24 -0000	1.9
+++ radeon_maos_verts.c	28 Feb 2005 00:42:38 -0000
@@ -266,16 +266,21 @@
    if (inputs & VERT_BIT_TEX0) {
       req |= RADEON_CP_VC_FRMT_ST0;
 
-      if (VB->TexCoordPtr[0]->size == 4) {
+      if (RADEON_DEBUG & DEBUG_VERTS) 
+	 fprintf( stderr, "%s: st0 used\n", __FUNCTION__);
+
+      if (VB->TexCoordPtr[0]->size >= 3) {
 	 req |= RADEON_CP_VC_FRMT_Q0;
 	 vtx |= RADEON_TCL_VTX_Q0;
+	 if (RADEON_DEBUG & DEBUG_VERTS) 
+	    fprintf( stderr, "%s: q0 used\n", __FUNCTION__);
       }
    }
 
    if (inputs & VERT_BIT_TEX1) {
       req |= RADEON_CP_VC_FRMT_ST1;
 
-      if (VB->TexCoordPtr[1]->size == 4) {
+      if (VB->TexCoordPtr[1]->size >= 3) {
 	 req |= RADEON_CP_VC_FRMT_Q1;
 	 vtx |= RADEON_TCL_VTX_Q1;
       }
Index: radeon_screen.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_screen.c,v
retrieving revision 1.30
diff -u -r1.30 radeon_screen.c
--- radeon_screen.c	26 Feb 2005 05:24:04 -0000	1.30
+++ radeon_screen.c	28 Feb 2005 00:42:40 -0000
@@ -249,6 +249,7 @@
 	    fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
 	    return NULL;
 	 }
+	 screen->drmSupportsCubeMaps = (sPriv->drmMinor >= 15);
       }
    }
 
Index: radeon_screen.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_screen.h,v
retrieving revision 1.13
diff -u -r1.13 radeon_screen.h
--- radeon_screen.h	31 Jan 2005 23:40:06 -0000	1.13
+++ radeon_screen.h	28 Feb 2005 00:42:40 -0000
@@ -98,6 +98,7 @@
 
    /* Configuration cache with default values for all contexts */
    driOptionCache optionCache;
+   GLboolean drmSupportsCubeMaps;
 } radeonScreenRec, *radeonScreenPtr;
 
 extern radeonScreenPtr radeonCreateScreen( __DRIscreenPrivate *sPriv );
Index: radeon_state_init.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_state_init.c,v
retrieving revision 1.14
diff -u -r1.14 radeon_state_init.c
--- radeon_state_init.c	16 Feb 2005 15:02:25 -0000	1.14
+++ radeon_state_init.c	28 Feb 2005 00:42:42 -0000
@@ -113,8 +113,11 @@
 
 
 CHECK( always, GL_TRUE )
+CHECK( never, GL_FALSE )
 CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
 CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT))
+CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT))
 CHECK( fog, ctx->Fog.Enabled )
 TCL_CHECK( tcl, GL_TRUE )
 TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
@@ -230,6 +233,16 @@
    ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
    ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
    ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+   if (rmesa->radeonScreen->drmSupportsCubeMaps)
+   {
+      ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+      ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+   }
+   else
+   {
+      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 );
+      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 );
+   }
    ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
    ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
    ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
@@ -270,6 +283,10 @@
    rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
    rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
    rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
    rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
    rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
@@ -310,8 +327,8 @@
 				     RADEON_CHROMA_FUNC_FAIL |
 				     RADEON_CHROMA_KEY_NEAREST |
 				     RADEON_SHADOW_FUNC_EQUAL |
-				     RADEON_SHADOW_PASS_1 |
-				     RADEON_RIGHT_HAND_CUBE_OGL);
+				     RADEON_SHADOW_PASS_1);
+     /* isn't it strange that we mustn't set RADEON_RIGHT_HAND_CUBE_OGL ? */
 
    rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
 					  RADEON_FOG_USE_DEPTH);
@@ -478,6 +495,18 @@
 	   RADEON_SCALE_1X |
 	   RADEON_CLAMP_TX);
       rmesa->hw.tex[i].cmd[TEX_PP_TFACTOR] = 0;
+
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
+	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
+	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
+	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
+	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
+	  rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
    }
 
    /* Can only add ST1 at the time of doing some multitex but can keep
Index: radeon_swtcl.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_swtcl.c,v
retrieving revision 1.16
diff -u -r1.16 radeon_swtcl.c
--- radeon_swtcl.c	16 Feb 2005 15:02:25 -0000	1.16
+++ radeon_swtcl.c	28 Feb 2005 00:42:45 -0000
@@ -53,224 +53,182 @@
 #include "radeon_swtcl.h"
 #include "radeon_tcl.h"
 
-/***********************************************************************
- *              Build render functions from dd templates               *
- ***********************************************************************/
 
+static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
 
-#define RADEON_XYZW_BIT		0x01
-#define RADEON_RGBA_BIT		0x02
-#define RADEON_SPEC_BIT		0x04
-#define RADEON_TEX0_BIT		0x08
-#define RADEON_TEX1_BIT		0x10
-#define RADEON_PTEX_BIT		0x20
-#define RADEON_MAX_SETUP	0x40
+/* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15 */
+/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
+#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
 
-static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
 
-static struct {
-   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
-   tnl_interp_func		interp;
-   tnl_copy_pv_func	        copy_pv;
-   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
-   GLuint               vertex_size;
-   GLuint               vertex_format;
-} setup_tab[RADEON_MAX_SETUP];
-
-
-#define TINY_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
-					 RADEON_CP_VC_FRMT_Z |		\
-					 RADEON_CP_VC_FRMT_PKCOLOR)
-
-#define NOTEX_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
-					 RADEON_CP_VC_FRMT_Z |		\
-					 RADEON_CP_VC_FRMT_W0 |		\
-					 RADEON_CP_VC_FRMT_PKCOLOR |	\
-					 RADEON_CP_VC_FRMT_PKSPEC)
-
-#define TEX0_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
-					 RADEON_CP_VC_FRMT_Z |		\
-					 RADEON_CP_VC_FRMT_W0 |		\
-					 RADEON_CP_VC_FRMT_PKCOLOR |	\
-					 RADEON_CP_VC_FRMT_PKSPEC |	\
-					 RADEON_CP_VC_FRMT_ST0)
-
-#define TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
-					 RADEON_CP_VC_FRMT_Z |		\
-					 RADEON_CP_VC_FRMT_W0 |		\
-					 RADEON_CP_VC_FRMT_PKCOLOR |	\
-					 RADEON_CP_VC_FRMT_PKSPEC |	\
-					 RADEON_CP_VC_FRMT_ST0 |	\
-					 RADEON_CP_VC_FRMT_ST1)
-
-#define PROJ_TEX1_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
-					 RADEON_CP_VC_FRMT_Z |		\
-					 RADEON_CP_VC_FRMT_W0 |		\
-					 RADEON_CP_VC_FRMT_PKCOLOR |	\
-					 RADEON_CP_VC_FRMT_PKSPEC |	\
-					 RADEON_CP_VC_FRMT_ST0 |	\
-					 RADEON_CP_VC_FRMT_Q0 |         \
-					 RADEON_CP_VC_FRMT_ST1 |	\
-					 RADEON_CP_VC_FRMT_Q1)
-
-#define TEX2_VERTEX_FORMAT 0
-#define TEX3_VERTEX_FORMAT 0
-#define PROJ_TEX3_VERTEX_FORMAT 0
-
-#define DO_XYZW (IND & RADEON_XYZW_BIT)
-#define DO_RGBA (IND & RADEON_RGBA_BIT)
-#define DO_SPEC (IND & RADEON_SPEC_BIT)
-#define DO_FOG  (IND & RADEON_SPEC_BIT)
-#define DO_TEX0 (IND & RADEON_TEX0_BIT)
-#define DO_TEX1 (IND & RADEON_TEX1_BIT)
-#define DO_TEX2 0
-#define DO_TEX3 0
-#define DO_PTEX (IND & RADEON_PTEX_BIT)
-
-#define VERTEX radeonVertex
-#define VERTEX_COLOR radeon_color_t
-#define GET_VIEWPORT_MAT() 0
-#define GET_TEXSOURCE(n)  n
-#define GET_VERTEX_FORMAT() RADEON_CONTEXT(ctx)->swtcl.vertex_format
-#define GET_VERTEX_STORE() RADEON_CONTEXT(ctx)->swtcl.verts
-#define GET_VERTEX_SIZE() RADEON_CONTEXT(ctx)->swtcl.vertex_size * sizeof(GLuint)
+#define EMIT_ATTR( ATTR, STYLE, F0 )					\
+do {									\
+   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
+   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
+   rmesa->swtcl.vertex_attr_count++;					\
+   fmt_0 |= F0;								\
+} while (0)
 
-#define HAVE_HW_VIEWPORT    1
-/* Tiny vertices don't seem to work atm - haven't looked into why.
- */
-#define HAVE_HW_DIVIDE      (IND & ~(RADEON_XYZW_BIT|RADEON_RGBA_BIT))
-#define HAVE_TINY_VERTICES  1
-#define HAVE_RGBA_COLOR     1
-#define HAVE_NOTEX_VERTICES 1
-#define HAVE_TEX0_VERTICES  1
-#define HAVE_TEX1_VERTICES  1
-#define HAVE_TEX2_VERTICES  0
-#define HAVE_TEX3_VERTICES  0
-#define HAVE_PTEX_VERTICES  1
+#define EMIT_PAD( N )							\
+do {									\
+   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->swtcl.vertex_attr_count++;					\
+} while (0)
 
-#define CHECK_HW_DIVIDE    (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE| \
-                                                    DD_TRI_UNFILLED)))
+static GLuint radeon_cp_vc_frmts[3][2] =
+{
+   { RADEON_CP_VC_FRMT_ST0, RADEON_CP_VC_FRMT_ST0 | RADEON_CP_VC_FRMT_Q0 },
+   { RADEON_CP_VC_FRMT_ST1, RADEON_CP_VC_FRMT_ST1 | RADEON_CP_VC_FRMT_Q1 },
+   { RADEON_CP_VC_FRMT_ST2, RADEON_CP_VC_FRMT_ST2 | RADEON_CP_VC_FRMT_Q2 },
+};
 
-#define INTERP_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].interp
-#define COPY_PV_VERTEX setup_tab[RADEON_CONTEXT(ctx)->swtcl.SetupIndex].copy_pv
+static void radeonSetVertexFormat( GLcontext *ctx )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint index = tnl->render_inputs;
+   int fmt_0 = 0;
+   int projtex = 0;
+   int offset = 0;
 
 
-/***********************************************************************
- *         Generate  pv-copying and translation functions              *
- ***********************************************************************/
+   /* Important:
+    */
+   if ( VB->NdcPtr != NULL ) {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   }
+   else {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+   }
 
-#define TAG(x) radeon_##x
-#define IND ~0
-#include "tnl_dd/t_dd_vb.c"
-#undef IND
+   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+   rmesa->swtcl.vertex_attr_count = 0;
 
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if ( !rmesa->swtcl.needproj ||
+        (index & _TNL_BITS_TEX_ANY)) {	/* for projtex */
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, 
+		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z | RADEON_CP_VC_FRMT_W0 );
+      offset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, 
+		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z );
+      offset = 3;
+   }
 
-/***********************************************************************
- *             Generate vertex emit and interp functions               *
- ***********************************************************************/
+   rmesa->swtcl.coloroffset = offset;
+#if MESA_LITTLE_ENDIAN 
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, 
+	      RADEON_CP_VC_FRMT_PKCOLOR );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR,
+	      RADEON_CP_VC_FRMT_PKCOLOR );
+#endif
+   offset += 1;
 
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT)
-#define TAG(x) x##_wg
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT)
-#define TAG(x) x##_wgt0
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_PTEX_BIT)
-#define TAG(x) x##_wgpt0
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT)
-#define TAG(x) x##_wgt0t1
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
-             RADEON_PTEX_BIT)
-#define TAG(x) x##_wgpt0t1
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT)
-#define TAG(x) x##_wgfs
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
-	     RADEON_TEX0_BIT)
-#define TAG(x) x##_wgfst0
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
-	     RADEON_TEX0_BIT|RADEON_PTEX_BIT)
-#define TAG(x) x##_wgfspt0
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
-	     RADEON_TEX0_BIT|RADEON_TEX1_BIT)
-#define TAG(x) x##_wgfst0t1
-#include "tnl_dd/t_dd_vbtmp.h"
-
-#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
-	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_PTEX_BIT)
-#define TAG(x) x##_wgfspt0t1
-#include "tnl_dd/t_dd_vbtmp.h"
+   rmesa->swtcl.specoffset = 0;
+   if (index & (_TNL_BIT_COLOR1|_TNL_BIT_FOG)) {
 
+#if MESA_LITTLE_ENDIAN 
+      if (index & _TNL_BIT_COLOR1) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
 
-/***********************************************************************
- *                         Initialization 
- ***********************************************************************/
+      if (index & _TNL_BIT_FOG) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
+#else
+      if (index & _TNL_BIT_FOG) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
 
-static void init_setup_tab( void )
-{
-   init_wg();
-   init_wgt0();
-   init_wgpt0();
-   init_wgt0t1();
-   init_wgpt0t1();
-   init_wgfs();
-   init_wgfst0();
-   init_wgfspt0();
-   init_wgfst0t1();
-   init_wgfspt0t1();
-}
+      if (index & _TNL_BIT_COLOR1) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
+#endif
+   }
 
+   if (index & _TNL_BITS_TEX_ANY) {
+      int i;
 
+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+	 if (index & _TNL_BIT_TEX(i)) {
+	    GLuint sz = VB->TexCoordPtr[i]->size;
+
+	    switch (sz) {
+	    case 1:
+	    case 2:
+	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F,
+			  radeon_cp_vc_frmts[i][0] );
+	       break;
+	    case 3:
+	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F,
+			  radeon_cp_vc_frmts[i][1] );
+	       break;
+	    case 4:	/* FIXME: what should we do if its still cubemaps ? */
+	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F_XYW,
+			  radeon_cp_vc_frmts[i][1] );
+	       projtex |= 1<<i;
+	       break;
+	    default:
+	       continue;
+	    };
+	 }
+      }
+   }
 
-void radeonPrintSetupFlags(char *msg, GLuint flags )
-{
-   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
-	   msg,
-	   (int)flags,
-	   (flags & RADEON_XYZW_BIT)      ? " xyzw," : "",
-	   (flags & RADEON_RGBA_BIT)     ? " rgba," : "",
-	   (flags & RADEON_SPEC_BIT)     ? " spec/fog," : "",
-	   (flags & RADEON_TEX0_BIT)     ? " tex-0," : "",
-	   (flags & RADEON_TEX1_BIT)     ? " tex-1," : "",
-	   (flags & RADEON_PTEX_BIT)     ? " proj-tex," : "");
+   if ( rmesa->tnl_index != index ||
+	fmt_0 != rmesa->swtcl.vertex_format ||
+	projtex != rmesa->swtcl.projformat) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = fmt_0;
+      rmesa->swtcl.projformat= projtex;
+      rmesa->swtcl.vertex_size =
+	  _tnl_install_attrs( ctx,
+			      rmesa->swtcl.vertex_attrs, 
+			      rmesa->swtcl.vertex_attr_count,
+			      NULL, 0 );
+      rmesa->swtcl.vertex_size /= 4;
+      rmesa->tnl_index = index;
+      if (RADEON_DEBUG & DEBUG_VERTS)
+	 fprintf( stderr, "%s: vertex_size= %d floats\n",
+		  __FUNCTION__, rmesa->swtcl.vertex_size);
+   }
 }
 
 
 static void radeonRenderStart( GLcontext *ctx )
 {
-   TNLcontext *tnl = TNL_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
 
-   if (!setup_tab[rmesa->swtcl.SetupIndex].check_tex_sizes(ctx)) {
-      GLuint ind = rmesa->swtcl.SetupIndex |= (RADEON_PTEX_BIT|RADEON_RGBA_BIT);
-
-      /* Projective textures are handled nicely; just have to change
-       * up to the new vertex format.
-       */
-      if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
-	 RADEON_NEWPRIM(rmesa);
-	 rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
-	 rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
-      }
-
-      if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
-	 tnl->Driver.Render.Interp = setup_tab[rmesa->swtcl.SetupIndex].interp;
-	 tnl->Driver.Render.CopyPV = setup_tab[rmesa->swtcl.SetupIndex].copy_pv;
-      }
-   }
+   radeonSetVertexFormat( ctx );
    
    if (rmesa->dma.flush != 0 && 
        rmesa->dma.flush != flush_last_swtcl_prim)
@@ -278,82 +236,48 @@
 }
 
 
-void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
-			   GLuint newinputs )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
-   GLuint stride = rmesa->swtcl.vertex_size * sizeof(int);
-   GLubyte *v = ((GLubyte *)rmesa->swtcl.verts + (start * stride));
-
-   newinputs |= rmesa->swtcl.SetupNewInputs;
-   rmesa->swtcl.SetupNewInputs = 0;
-
-   if (!newinputs)
-      return;
-
-   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, v, stride );
-}
-
+/**
+ * Set vertex state for SW TCL.  The primary purpose of this function is to
+ * determine in advance whether or not the hardware can / should do the
+ * projection divide or Mesa should do it.
+ */
 void radeonChooseVertexState( GLcontext *ctx )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
    TNLcontext *tnl = TNL_CONTEXT(ctx);
-   GLuint ind = (RADEON_XYZW_BIT | RADEON_RGBA_BIT);
 
-   if (!rmesa->TclFallback || rmesa->Fallback)
-      return;
+   GLuint se_coord_fmt;
 
-   if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
-      ind |= RADEON_SPEC_BIT;
+   /* HW perspective divide is a win, but tiny vertex formats are a
+    * bigger one.
+    */
 
-   if (ctx->Texture._EnabledUnits & 0x2)
-      /* unit 1 enabled */
-      ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT;
-   else if (ctx->Texture._EnabledUnits & 0x1)
-      /* unit 0 enabled */
-      ind |= RADEON_TEX0_BIT;
-
-   rmesa->swtcl.SetupIndex = ind;
-
-   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
-      tnl->Driver.Render.Interp = radeon_interp_extras;
-      tnl->Driver.Render.CopyPV = radeon_copy_pv_extras;
+   /* FIXME: the old code did check only for TINY_VERTEX_FORMAT */
+   /* and not for (x==TINY_VERTEX_FORMAT || x==NOTEX_VERTEX_FORMAT) */
+   /*       */
+   /* Question: Is the w component necessary for specular color ? */
+#if 1
+   if ( ((tnl->render_inputs & (_TNL_BITS_TEX_ANY|_TNL_BIT_COLOR1) ) == 0)
+#else
+   if ( ((tnl->render_inputs & _TNL_BITS_TEX_ANY) == 0)
+#endif
+	|| (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+      rmesa->swtcl.needproj = GL_TRUE;
+      se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+		      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+		      RADEON_TEX1_W_ROUTING_USE_Q1);
    }
    else {
-      tnl->Driver.Render.Interp = setup_tab[ind].interp;
-      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
-   }
-
-   if (setup_tab[ind].vertex_format != rmesa->swtcl.vertex_format) {
-      RADEON_NEWPRIM(rmesa);
-      rmesa->swtcl.vertex_format = setup_tab[ind].vertex_format;
-      rmesa->swtcl.vertex_size = setup_tab[ind].vertex_size;
+      rmesa->swtcl.needproj = GL_FALSE;
+      se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+		      RADEON_TEX1_W_ROUTING_USE_Q1);
    }
 
-   {
-      GLuint se_coord_fmt, needproj;
-
-      /* HW perspective divide is a win, but tiny vertex formats are a
-       * bigger one.
-       */
-      if (setup_tab[ind].vertex_format == TINY_VERTEX_FORMAT ||
-	  (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
-	 needproj = GL_TRUE;
-	 se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
-			 RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
-			 RADEON_TEX1_W_ROUTING_USE_Q1);
-      }
-      else {
-	 needproj = GL_FALSE;
-	 se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
-			 RADEON_TEX1_W_ROUTING_USE_Q1);
-      }
+   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
 
-      if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
-	 RADEON_STATECHANGE( rmesa, set );
-	 rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
-      }
-      _tnl_need_projected_coords( ctx, needproj );
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
    }
 }
 
@@ -430,38 +354,6 @@
 }
 
 
-
-
-static void *radeon_emit_contiguous_verts( GLcontext *ctx, 
-					   GLuint start, 
-					   GLuint count,
-					   void *dest)
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint stride = rmesa->swtcl.vertex_size * 4;
-   setup_tab[rmesa->swtcl.SetupIndex].emit( ctx, start, count, dest, stride );
-   return (void *)((char *)dest + stride * (count - start));
-}
-
-
-
-void radeon_emit_indexed_verts( GLcontext *ctx, GLuint start, GLuint count )
-{
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-
-   radeonAllocDmaRegionVerts( rmesa, 
-			      &rmesa->swtcl.indexed_verts, 
-			      count - start,
-			      rmesa->swtcl.vertex_size * 4, 
-			      64);
-
-   setup_tab[rmesa->swtcl.SetupIndex].emit( 
-      ctx, start, count, 
-      rmesa->swtcl.indexed_verts.address + rmesa->swtcl.indexed_verts.start, 
-      rmesa->swtcl.vertex_size * 4 );
-}
-
-
 /*
  * Render unclipped vertex buffers by emitting vertices directly to
  * dma buffers.  Use strip/fan hardware primitives where possible.
@@ -477,7 +369,8 @@
 #define HAVE_QUADS       0
 #define HAVE_QUAD_STRIPS 0
 #define HAVE_POLYGONS    0
-#define HAVE_ELTS        1
+/* \todo: is it possible to make "ELTS" work with t_vertex code ? */
+#define HAVE_ELTS        0
 
 static const GLuint hw_prim[GL_POLYGON+1] = {
    RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
@@ -499,91 +392,17 @@
    assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
 }
 
-static __inline void radeonEltPrimitive( radeonContextPtr rmesa, GLenum prim )
-{
-   RADEON_NEWPRIM( rmesa );
-   rmesa->swtcl.hw_primitive = hw_prim[prim] | RADEON_CP_VC_CNTL_PRIM_WALK_IND;
-}
-
-
-
-
-#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
-#define ELTS_VARS( buf )  GLushort *dest = buf; (void)rmesa;
+#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx); (void)rmesa
 #define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
-#define ELT_INIT(prim) radeonEltPrimitive( rmesa, prim )
 #define FLUSH()  RADEON_NEWPRIM( rmesa )
 #define GET_CURRENT_VB_MAX_VERTS() \
   (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
 #define GET_SUBSEQUENT_VB_MAX_VERTS() \
   ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
-
-#if RADEON_OLD_PACKETS
-# define GET_CURRENT_VB_MAX_ELTS() \
-  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 24)) / 2)
-#else
-# define GET_CURRENT_VB_MAX_ELTS() \
-  ((RADEON_CMD_BUF_SZ - (rmesa->store.cmd_used + 16)) / 2)
-#endif
-#define GET_SUBSEQUENT_VB_MAX_ELTS() \
-  ((RADEON_CMD_BUF_SZ - 1024) / 2)
-
-
-static void *radeon_alloc_elts( radeonContextPtr rmesa, int nr )
-{
-   if (rmesa->dma.flush == radeonFlushElts &&
-       rmesa->store.cmd_used + nr*2 < RADEON_CMD_BUF_SZ) {
-
-      rmesa->store.cmd_used += nr*2;
-
-      return (void *)(rmesa->store.cmd_buf + rmesa->store.cmd_used);
-   }
-   else {
-      if (rmesa->dma.flush) {
-	 rmesa->dma.flush( rmesa );
-      }
-
-      radeonEmitVertexAOS( rmesa,
-			   rmesa->swtcl.vertex_size,
-			   (rmesa->radeonScreen->gart_buffer_offset +
-			    rmesa->swtcl.indexed_verts.buf->buf->idx *
-			    RADEON_BUFFER_SIZE +
-			    rmesa->swtcl.indexed_verts.start));
-
-      return (void *) radeonAllocEltsOpenEnded( rmesa,
-						rmesa->swtcl.vertex_format,
-						rmesa->swtcl.hw_primitive,
-						nr );
-   }
-}
-
-#define ALLOC_ELTS(nr) radeon_alloc_elts(rmesa, nr)
-
-#ifdef MESA_BIG_ENDIAN
-/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
-#define EMIT_ELT(offset, x) do {				\
-	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
-	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
-	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
-	(void)rmesa; } while (0)
-#else
-#define EMIT_ELT(offset, x) do {				\
-	(dest)[offset] = (GLushort) (x);			\
-	(void)rmesa; } while (0)
-#endif
-#define EMIT_TWO_ELTS(offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
-#define INCR_ELTS( nr ) dest += nr
-#define ELTPTR dest
-#define RELEASE_ELT_VERTS() \
-  radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ )
-#define EMIT_INDEXED_VERTS( ctx, start, count ) \
-  radeon_emit_indexed_verts( ctx, start, count )
-
-
 #define ALLOC_VERTS( nr ) \
   radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
 #define EMIT_VERTS( ctx, j, nr, buf ) \
-  radeon_emit_contiguous_verts(ctx, j, (j)+(nr), buf)
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
 
 #define TAG(x) radeon_dma_##x
 #include "tnl_dd/t_dd_dmatmp.h"
@@ -612,15 +431,6 @@
 
    tnl->Driver.Render.Start( ctx );
 
-   if (VB->Elts) {
-      tab = TAG(render_tab_elts);
-      if (!rmesa->swtcl.indexed_verts.buf) {
-	 if (VB->Count > GET_SUBSEQUENT_VB_MAX_VERTS())
-	    return GL_TRUE;
-	 EMIT_INDEXED_VERTS(ctx, 0, VB->Count);
-      }
-   }
-
    for (i = 0 ; i < VB->PrimitiveCount ; i++)
    {
       GLuint prim = VB->Primitive[i].mode;
@@ -724,7 +534,7 @@
 	 }
       }
 
-      VB->TexCoordPtr[i] = &store->texcoord[i];
+      VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
    }
 
    return GL_TRUE;
@@ -833,12 +643,12 @@
 #define CTX_ARG radeonContextPtr rmesa
 #define CTX_ARG2 rmesa
 #define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, size * 4 )
+#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
 #undef LOCAL_VARS
 #define LOCAL_VARS						\
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
    const char *radeonverts = (char *)rmesa->swtcl.verts;
-#define VERT(x) (radeonVertex *)(radeonverts + (x * vertsize * sizeof(int)))
+#define VERT(x) (radeonVertex *)(radeonverts + ((x) * vertsize * sizeof(int)))
 #define VERTEX radeonVertex 
 #undef TAG
 #define TAG(x) radeon_##x
@@ -860,7 +670,7 @@
 
 #define RADEON_TWOSIDE_BIT	0x01
 #define RADEON_UNFILLED_BIT	0x02
-#define RADEON_MAX_TRIFUNC	0x08
+#define RADEON_MAX_TRIFUNC	0x04
 
 
 static struct {
@@ -895,7 +705,7 @@
 #define VERT_Y(_v) _v->v.y
 #define VERT_Z(_v) _v->v.z
 #define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + (e * rmesa->swtcl.vertex_size * sizeof(int)))
+#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
 
 #define VERT_SET_RGBA( v, c )  					\
 do {								\
@@ -908,20 +718,23 @@
 
 #define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
 
-#define VERT_SET_SPEC( v0, c )					\
+#define VERT_SET_SPEC( v, c )					\
 do {								\
-   if (havespec) {						\
-      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]);	\
-      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]);	\
-      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]);	\
+   if (specoffset) {						\
+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
    }								\
 } while (0)
 #define VERT_COPY_SPEC( v0, v1 )			\
 do {							\
-   if (havespec) {					\
-      v0->v.specular.red   = v1->v.specular.red;	\
-      v0->v.specular.green = v1->v.specular.green;	\
-      v0->v.specular.blue  = v1->v.specular.blue; 	\
+   if (specoffset) {					\
+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
+      spec0->red   = spec1->red;	\
+      spec0->green = spec1->green;	\
+      spec0->blue  = spec1->blue; 	\
    }							\
 } while (0)
 
@@ -930,8 +743,8 @@
  */
 #define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
 #define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
-#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
-#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
 
 #undef LOCAL_VARS
 #undef TAG
@@ -940,9 +753,9 @@
 #define LOCAL_VARS(n)							\
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
    GLuint color[n], spec[n];						\
-   GLuint coloroffset = (rmesa->swtcl.vertex_size == 4 ? 3 : 4);	\
-   GLboolean havespec = (rmesa->swtcl.vertex_size > 4);			\
-   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+   GLuint coloroffset = rmesa->swtcl.coloroffset;	\
+   GLuint specoffset = rmesa->swtcl.specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
 
 /***********************************************************************
  *                Helpers for rendering unfilled primitives            *
@@ -990,7 +803,6 @@
 /*               Render unclipped begin/end objects                   */
 /**********************************************************************/
 
-#define VERT(x) (radeonVertex *)(radeonverts + (x * vertsize * sizeof(int)))
 #define RENDER_POINTS( start, count )		\
    for ( ; start < count ; start++)		\
       radeon_point( rmesa, VERT(start) )
@@ -1153,7 +965,11 @@
 	 tnl->Driver.Render.Start = radeonRenderStart;
 	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
 	 tnl->Driver.Render.Finish = radeonRenderFinish;
-	 tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
 	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
 	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
 	 if (rmesa->TclFallback) {
@@ -1189,12 +1005,10 @@
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
    static int firsttime = 1;
 
    if (firsttime) {
       init_rast_tab();
-      init_setup_tab();
       firsttime = 0;
    }
 
@@ -1202,9 +1016,14 @@
    tnl->Driver.Render.Finish = radeonRenderFinish;
    tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
    tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
-   tnl->Driver.Render.BuildVertices = radeonBuildVertices;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
 
-   rmesa->swtcl.verts = (GLubyte *)ALIGN_MALLOC( size * 16 * 4, 32 );
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       RADEON_MAX_TNL_VERTEX_SIZE);
+   
+   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
    rmesa->swtcl.RenderIndex = ~0;
    rmesa->swtcl.render_primitive = GL_TRIANGLES;
    rmesa->swtcl.hw_primitive = 0;
@@ -1218,10 +1037,4 @@
    if (rmesa->swtcl.indexed_verts.buf) 
       radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
 			      __FUNCTION__ );
-
-   if (rmesa->swtcl.verts) {
-      ALIGN_FREE(rmesa->swtcl.verts);
-      rmesa->swtcl.verts = NULL;
-   }
-
 }
Index: radeon_tcl.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_tcl.c,v
retrieving revision 1.11
diff -u -r1.11 radeon_tcl.c
--- radeon_tcl.c	16 Feb 2005 15:02:25 -0000	1.11
+++ radeon_tcl.c	28 Feb 2005 00:42:46 -0000
@@ -240,6 +240,9 @@
 			  GLuint last,
 			  GLuint flags )
 {
+   if( RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf( stderr, "%s: flags= %0x, prim:%s\n",
+       __FUNCTION__, flags, _mesa_lookup_enum_by_nr( flags & PRIM_MODE_MASK ));
    tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 }
 
@@ -248,6 +251,8 @@
 			     GLuint last,
 			     GLuint flags )
 {
+   if( RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf( stderr, "%s: flags= %0x\n", __FUNCTION__, flags);
    tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
 }
 
@@ -491,7 +496,10 @@
    "Texgen unit 0",
    "Texgen unit 1",
    "Texgen unit 2",
-   "User disable"
+   "User disable",
+   "texture rectangle unit 0",
+   "texture rectangle unit 1",
+   "texture rectangle unit 2"
 };
 
 
Index: radeon_tcl.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_tcl.h,v
retrieving revision 1.4
diff -u -r1.4 radeon_tcl.h
--- radeon_tcl.h	31 Jan 2005 23:40:06 -0000	1.4
+++ radeon_tcl.h	28 Feb 2005 00:42:46 -0000
@@ -59,7 +59,7 @@
 #define RADEON_TCL_FALLBACK_TEXRECT_1         0x200 /* texture rectangle */
 #define RADEON_TCL_FALLBACK_TEXRECT_2         0x400 /* texture rectangle */
 
-#define RADEON_MAX_TCL_VERTSIZE (15*4)
+#define RADEON_MAX_TCL_VERTSIZE (RADEON_MAX_VERTEX_SIZE*4)
 
 #define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
 
Index: radeon_tex.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_tex.c,v
retrieving revision 1.15
diff -u -r1.15 radeon_tex.c
--- radeon_tex.c	7 Oct 2004 23:30:30 -0000	1.15
+++ radeon_tex.c	28 Feb 2005 00:42:48 -0000
@@ -161,6 +161,31 @@
 }
 
 /**
+ * returns a non-mipmap texfilter out of any texfilter:
+ * ugly workaround since mipmaped cubemaps seem to be unsupported on radeon.
+ * I think the mipmaps are generated but are unused.. which is worse when
+ * using dynamically generated textures (reflections, etc)...
+ *
+ * \todo: fix mipmaped cubemaps and get rid of this.
+ *
+ * \param filtermode original filtermode
+ */
+static GLenum radeonFakedCubeTexFilter( GLenum filtermode)
+{
+   switch (filtermode) {
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 return (GL_NEAREST);
+      case GL_LINEAR_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 return (GL_LINEAR);
+      default:
+	 break;
+   }
+   return (filtermode);
+}
+
+/**
  * Set the texture magnification and minification modes.
  * 
  * \param t Texture whose filter modes are to be set
@@ -260,7 +285,10 @@
 
       radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
       radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
-      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      if (texObj->Target != GL_TEXTURE_CUBE_MAP)
+	 radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      else
+	 radeonSetTexFilter( t, radeonFakedCubeTexFilter(texObj->MinFilter), texObj->MagFilter );
       radeonSetTexBorderColor( t, texObj->_BorderChan );
    }
 
@@ -731,7 +759,10 @@
    case GL_TEXTURE_MAG_FILTER:
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
       radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
-      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      if (target != GL_TEXTURE_CUBE_MAP)
+	 radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      else
+	 radeonSetTexFilter( t, radeonFakedCubeTexFilter( texObj->MinFilter), texObj->MagFilter );
       break;
 
    case GL_TEXTURE_WRAP_S:
Index: radeon_texmem.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_texmem.c,v
retrieving revision 1.11
diff -u -r1.11 radeon_texmem.c
--- radeon_texmem.c	10 Feb 2005 22:36:07 -0000	1.11
+++ radeon_texmem.c	28 Feb 2005 00:42:49 -0000
@@ -68,6 +68,7 @@
 	 if ( t == rmesa->state.texture.unit[i].texobj ) {
 	    rmesa->state.texture.unit[i].texobj = NULL;
 	    rmesa->hw.tex[i].dirty = GL_FALSE;
+	    rmesa->hw.cube[i].dirty = GL_FALSE;
 	 }
       }
    }
@@ -226,7 +227,7 @@
    imageWidth = texImage->Width;
    imageHeight = texImage->Height;
 
-   offset = t->bufAddr;
+   offset = t->bufAddr + t->base.totalSize / 6 * face;
 
    if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
       GLint imageX = 0;
Index: radeon_texstate.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_texstate.c,v
retrieving revision 1.14
diff -u -r1.14 radeon_texstate.c
--- radeon_texstate.c	10 Feb 2005 22:36:07 -0000	1.14
+++ radeon_texstate.c	28 Feb 2005 00:42:53 -0000
@@ -284,6 +284,22 @@
     */
    t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
 
+   /* Setup remaining cube face blits, if needed */
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      const GLuint faceSize = t->base.totalSize;
+      GLuint face;
+      /* reuse face 0 x/y/width/height - just update the offset when uploading */
+      for (face = 1; face < 6; face++) {
+         for (i = 0; i < numLevels; i++) {
+            t->image[face][i].x =  t->image[0][i].x;
+            t->image[face][i].y =  t->image[0][i].y;
+            t->image[face][i].width  = t->image[0][i].width;
+            t->image[face][i].height = t->image[0][i].height;
+         }
+      }
+      t->base.totalSize = 6 * faceSize; /* total texmem needed */
+   }
+
    /* Hardware state:
     */
    t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
@@ -291,10 +307,27 @@
 
    t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
 		       RADEON_TXFORMAT_HEIGHT_MASK |
-                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE);
+                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+                       RADEON_TXFORMAT_F5_WIDTH_MASK |
+                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
    t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
 		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
 
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      assert(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+   }
+
    t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
                    ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
 
@@ -816,30 +849,63 @@
    cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
    cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset;
    cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
-   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
 
-   if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      GLuint *cube_cmd = RADEON_DB_STATE( cube[unit] );
+      GLuint bytesPerFace = texobj->base.totalSize / 6;
+      ASSERT(texobj->totalSize % 6 == 0);
+
+      cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      /* dont know if this setup conforms to OpenGL.. 
+       * at least it matches the behavior of mesa software renderer
+       */
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */
+      cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.cube[unit] );
+      cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */
+   }
+   else if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
       GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
       txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
       txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
       RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
    }
 
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tex[unit] );
+
    texobj->dirty_state &= ~(1<<unit);
 }
 
 
 
+/* FIXME: cubemap-patch: I didnt bother trying to understand the following, so just
+ * copied from R200 and/or earlier (nonworking) cubemap-patches I found.
+ * Maybe its only needed if cubemapping is done with hw-tcl.
+ *
+ * btw: is cubemapping even possible in hw-tcl on R100 ?
+ * Or is it only possible if in non-texgen-mode?
+ * Or do we need texgen-mode for Q first? (its a fallback atm)
+ * or some magic between texgen-Q and R ?
+ */
 
 static void set_texgen_matrix( radeonContextPtr rmesa, 
 			       GLuint unit,
 			       const GLfloat *s_plane,
-			       const GLfloat *t_plane )
+			       const GLfloat *t_plane,
+			       const GLfloat *r_plane,
+			       const GLfloat *q_plane)
 {
    static const GLfloat scale_identity[4] = { 1,1,1,1 };
 
+/*
    if (!TEST_EQ_4V( s_plane, scale_identity) ||
-       !TEST_EQ_4V( t_plane, scale_identity)) {
+       !TEST_EQ_4V( t_plane, scale_identity) ||
+       !TEST_EQ_4V( r_plane, scale_identity) ||
+       !TEST_EQ_4V( q_plane, scale_identity))		*/	 {
+
       rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
       rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
       rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
@@ -850,10 +916,60 @@
       rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
       rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
       rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+
+      /* NOTE: r_plane goes in the 4th row, not 3rd! */
+      rmesa->TexGenMatrix[unit].m[3]  = r_plane[0];
+      rmesa->TexGenMatrix[unit].m[7]  = r_plane[1];
+      rmesa->TexGenMatrix[unit].m[11] = r_plane[2];
+      rmesa->TexGenMatrix[unit].m[15] = r_plane[3];
+
+
+/* FIXME: check this: atm we overwrite the r_plane */
+
+      rmesa->TexGenMatrix[unit].m[2]  = 0;
+      rmesa->TexGenMatrix[unit].m[6]  = 0;
+      rmesa->TexGenMatrix[unit].m[10] = 0;
+      rmesa->TexGenMatrix[unit].m[14] = 0;
+
+
+      rmesa->TexGenMatrix[unit].m[3]  = 0;
+      rmesa->TexGenMatrix[unit].m[7]  = 0;
+      rmesa->TexGenMatrix[unit].m[11] = 0;
+      rmesa->TexGenMatrix[unit].m[15] = 0;
+
       rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
    }
 }
 
+/* Need this special matrix to get correct reflection map coords */
+static void
+set_texgen_reflection_matrix( radeonContextPtr rmesa, GLuint unit )
+{
+   static const GLfloat m[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  0, -1,
+       0,  0, -1,  0 };
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
+}
+
+/* Need this special matrix to get correct normal map coords */
+static void
+set_texgen_normal_map_matrix( radeonContextPtr rmesa, GLuint unit )
+{
+   static const GLfloat m[16] = {
+      1, 0, 0, 0,
+      0, 1, 0, 0,
+      0, 0, 0, 1,
+      0, 0, 1, 0 };
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE<<unit;
+}
+
+
 /* Ignoring the Q texcoord for now.
  *
  * Returns GL_FALSE if fallback required.  
@@ -870,13 +986,15 @@
    rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
    rmesa->TexGenNeedNormals[unit] = 0;
 
-   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) == 0) {
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT)) == 0) {
       /* Disabled, no fallback:
        */
+/* FIXME: shouldnt we disable texgen? */
       rmesa->TexGenEnabled |= 
 	 (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
       return GL_TRUE;
    }
+#if 1
    else if (texUnit->TexGenEnabled & Q_BIT) {
       /* Very easy to do this, in fact would remove a fallback case
        * elsewhere, but I haven't done it yet...  Fallback: 
@@ -884,39 +1002,70 @@
       fprintf(stderr, "fallback Q_BIT\n");
       return GL_FALSE;
    }
-   else if ((texUnit->TexGenEnabled & (S_BIT|T_BIT)) != (S_BIT|T_BIT) ||
-	    texUnit->GenModeS != texUnit->GenModeT) {
+#endif
+   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT) &&
+	    texUnit->GenModeS == texUnit->GenModeT) {
+      /* OK */
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+      /* continue */
+   }
+#if 0
+/* doesnt work at the moment */
+   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT) &&
+	    texUnit->GenModeS == texUnit->GenModeT &&
+            texUnit->GenModeT == texUnit->GenModeR) {
+      /* OK */
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+      /* continue */
+   }
+#endif
+/* FIXME: dont know if Q_BIT works ... */
+/* just ignore the R texgenmode */
+   else if (texUnit->TexGenEnabled == (S_BIT|T_BIT|R_BIT|Q_BIT) &&
+	    texUnit->GenModeS == texUnit->GenModeT &&
+            texUnit->GenModeT == texUnit->GenModeQ) {
+      /* OK */
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+      /* continue */
+   }
+   else {
       /* Mixed modes, fallback:
        */
       /* fprintf(stderr, "fallback mixed texgen\n"); */
       return GL_FALSE;
    }
-   else
-      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+
+   rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
 
    switch (texUnit->GenModeS) {
    case GL_OBJECT_LINEAR:
       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
       set_texgen_matrix( rmesa, unit, 
 			 texUnit->ObjectPlaneS,
-			 texUnit->ObjectPlaneT);
+			 texUnit->ObjectPlaneT,
+                         texUnit->ObjectPlaneR,
+                         texUnit->ObjectPlaneQ);
       break;
 
    case GL_EYE_LINEAR:
       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
       set_texgen_matrix( rmesa, unit, 
 			 texUnit->EyePlaneS,
-			 texUnit->EyePlaneT);
+			 texUnit->EyePlaneT,
+			 texUnit->EyePlaneR,
+			 texUnit->EyePlaneQ);
       break;
 
    case GL_REFLECTION_MAP_NV:
       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT<<inputshift;
+      set_texgen_reflection_matrix(rmesa, unit);
       break;
 
    case GL_NORMAL_MAP_NV:
       rmesa->TexGenNeedNormals[unit] = GL_TRUE;
       rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      set_texgen_normal_map_matrix(rmesa, unit);
       break;
 
    case GL_SPHERE_MAP:
@@ -1022,6 +1171,48 @@
    return GL_TRUE;
 }
 
+static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData;
+   GLuint face;
+
+   /* Need to load the 2d images associated with this unit.
+    */
+   if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+      t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2;
+      for (face = 0; face < 6; face++)
+         t->base.dirty_images[face] = ~0;
+   }
+
+   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
+
+   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
+        t->base.dirty_images[2] || t->base.dirty_images[3] ||
+        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
+      /* flush */
+      RADEON_FIREVERTICES( rmesa );
+      /* layout memory space, once for all faces */
+      radeonSetTexImages( rmesa, tObj );
+   }
+
+   /* upload (per face) */
+   for (face = 0; face < 6; face++) {
+      if (t->base.dirty_images[face]) {
+         radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face );
+      }
+   }
+      
+   if ( !t->base.memBlock ) {
+      /* texmem alloc failed, use s/w fallback */
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
 static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -1144,6 +1335,10 @@
       return (enable_tex_2d( ctx, unit ) &&
 	      update_tex_common( ctx, unit ));
    }
+   else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+      return (enable_tex_cube( ctx, unit ) &&
+	      update_tex_common( ctx, unit ));
+   }
    else if ( texUnit->_ReallyEnabled ) {
       return GL_FALSE;
    }
Index: radeon_vtxfmt.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_vtxfmt.c,v
retrieving revision 1.9
diff -u -r1.9 radeon_vtxfmt.c
--- radeon_vtxfmt.c	16 Feb 2005 15:02:25 -0000	1.9
+++ radeon_vtxfmt.c	28 Feb 2005 00:42:56 -0000
@@ -87,10 +87,18 @@
    count_func( "SecondaryColor3ubv", &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
    count_func( "Normal3f", &rmesa->vb.dfn_cache.Normal3f );
    count_func( "Normal3fv", &rmesa->vb.dfn_cache.Normal3fv );
+#if 0		/* \todo: make this working */
+   count_func( "TexCoord3f", &rmesa->vb.dfn_cache.TexCoord3f );
+   count_func( "TexCoord3fv", &rmesa->vb.dfn_cache.TexCoord3fv );
+#endif
    count_func( "TexCoord2f", &rmesa->vb.dfn_cache.TexCoord2f );
    count_func( "TexCoord2fv", &rmesa->vb.dfn_cache.TexCoord2fv );
    count_func( "TexCoord1f", &rmesa->vb.dfn_cache.TexCoord1f );
    count_func( "TexCoord1fv", &rmesa->vb.dfn_cache.TexCoord1fv );
+#if 0
+   count_func( "MultiTexCoord3fARB", &rmesa->vb.dfn_cache.MultiTexCoord3fARB );
+   count_func( "MultiTexCoord3fvARB", &rmesa->vb.dfn_cache.MultiTexCoord3fvARB );
+#endif
    count_func( "MultiTexCoord2fARB", &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
    count_func( "MultiTexCoord2fvARB", &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
    count_func( "MultiTexCoord1fARB", &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
@@ -135,14 +143,22 @@
    if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST0) {
       ctx->Current.Attrib[VERT_ATTRIB_TEX0][0] = rmesa->vb.texcoordptr[0][0];
       ctx->Current.Attrib[VERT_ATTRIB_TEX0][1] = rmesa->vb.texcoordptr[0][1];
-      ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = 0.0F;
+      if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_Q0) {
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = rmesa->vb.texcoordptr[0][2];
+      } else {
+      	 ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] = 0.0F;
+      }
       ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] = 1.0F;
    }
 
    if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST1) {
       ctx->Current.Attrib[VERT_ATTRIB_TEX1][0] = rmesa->vb.texcoordptr[1][0];
       ctx->Current.Attrib[VERT_ATTRIB_TEX1][1] = rmesa->vb.texcoordptr[1][1];
-      ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] = 0.0F;
+      if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_Q1) {
+	 ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] = rmesa->vb.texcoordptr[1][2];
+      } else {
+      	 ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] = 0.0F;
+      }
       ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] = 1.0F;
    }
 
@@ -180,6 +196,13 @@
    rmesa->tcl.nr_aos_components = 1;
    rmesa->dma.flush = NULL;
 
+   if( RADEON_DEBUG & DEBUG_PRIMS) {
+      fprintf( stderr, "%s: \n", __FUNCTION__);
+      fprintf( stderr, "  rmesa->tcl.vertex_format=0x%0x\n", rmesa->tcl.vertex_format);
+      fprintf( stderr, "  rmesa->vb.vertex_size=%d\n", rmesa->vb.vertex_size);
+      fprintf( stderr, "  rmesa->vb.nrprims=%d\n", rmesa->vb.nrprims);
+   }
+      
    /* Optimize the primitive list:
     */
    if (rmesa->vb.nrprims > 1) {
@@ -199,6 +222,10 @@
       rmesa->vb.nrprims = j+1;
    }
 
+   if( RADEON_DEBUG & DEBUG_PRIMS) {
+      fprintf( stderr, "  rmesa->vb.nrprims=%d\n", rmesa->vb.nrprims);
+   }
+
    for (i = 0 ; i < rmesa->vb.nrprims; i++) {
       if (RADEON_DEBUG & DEBUG_PRIMS)
 	 fprintf(stderr, "vtxfmt prim %d: %s %d..%d\n", i,
@@ -215,6 +242,9 @@
 
    rmesa->vb.nrprims = 0;
    radeonReleaseDmaRegion( rmesa, &tmp, __FUNCTION__ );
+
+   if( RADEON_DEBUG & DEBUG_PRIMS)
+      fprintf( stderr, "%s: --end--\n", __FUNCTION__);
 }
 
 
@@ -262,7 +292,7 @@
  * memory.  Could also use the counter/notify mechanism to populate
  * tmp on the fly as vertices are generated.  
  */
-static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[15] )
+static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[RADEON_MAX_VERTEX_SIZE] )
 {
    GLuint ovf, i;
    GLuint nr = (rmesa->vb.initial_counter - rmesa->vb.counter) - rmesa->vb.primlist[rmesa->vb.nrprims].start;
@@ -350,11 +380,11 @@
 }
 
 
-static void VFMT_FALLBACK( const char *caller )
+void VFMT_FALLBACK( const char *caller )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat tmp[3][15];
+   GLfloat tmp[3][RADEON_MAX_VERTEX_SIZE];
    GLuint i, prim;
    GLuint ind = rmesa->vb.vertex_format;
    GLuint nrverts;
@@ -422,13 +452,26 @@
       }
 
       if (ind & RADEON_CP_VC_FRMT_ST0) {
-	 GL_CALL(TexCoord2fv)( &tmp[i][offset] ); 
-	 offset += 2;
+/* FIXME: what should happen if Q0 is set from projective texture? */
+/* unfortunately the vertex_format used here is from the vb struct, not from the swtcl struct */
+/* so I think we cant use the projformat from swtcl */
+      	 if (ind & RADEON_CP_VC_FRMT_Q0) {
+      	    GL_CALL(TexCoord3fv)( &tmp[i][offset] ); 
+      	    offset += 3;
+      	 } else {
+	    GL_CALL(TexCoord2fv)( &tmp[i][offset] ); 
+	    offset += 2;
+	 }
       }
 
       if (ind & RADEON_CP_VC_FRMT_ST1) {
-	 GL_CALL(MultiTexCoord2fvARB)( GL_TEXTURE1_ARB, &tmp[i][offset] );
-	 offset += 2;
+      	 if (ind & RADEON_CP_VC_FRMT_Q1) {
+	    GL_CALL(MultiTexCoord3fvARB)( GL_TEXTURE1, &tmp[i][offset] );
+	    offset += 3;
+	 } else {
+	    GL_CALL(MultiTexCoord2fvARB)( GL_TEXTURE1, &tmp[i][offset] );
+	    offset += 2;
+	 }
       }
       GL_CALL(Vertex3fv)( &tmp[i][0] );
    }
@@ -455,11 +498,21 @@
    if (ind & RADEON_CP_VC_FRMT_PKSPEC) 
        GL_CALL(SecondaryColor3ubEXT)( rmesa->vb.specptr->red, rmesa->vb.specptr->green, rmesa->vb.specptr->blue ); 
 
-   if (ind & RADEON_CP_VC_FRMT_ST0) 
-      GL_CALL(TexCoord2fv)( rmesa->vb.texcoordptr[0] );
+   if (ind & RADEON_CP_VC_FRMT_ST0) {
+      if (ind & RADEON_CP_VC_FRMT_Q0) {
+      	 GL_CALL(TexCoord3fv)( rmesa->vb.texcoordptr[0] );
+      } else {
+      	 GL_CALL(TexCoord2fv)( rmesa->vb.texcoordptr[0] );
+      }
+   }
 
-   if (ind & RADEON_CP_VC_FRMT_ST1) 
-      GL_CALL(MultiTexCoord2fvARB)( GL_TEXTURE1_ARB, rmesa->vb.texcoordptr[1] );
+   if (ind & RADEON_CP_VC_FRMT_ST1) {
+      if (ind & RADEON_CP_VC_FRMT_Q1) {
+         GL_CALL(MultiTexCoord3fvARB)( GL_TEXTURE1, rmesa->vb.texcoordptr[1] );
+      } else {
+         GL_CALL(MultiTexCoord2fvARB)( GL_TEXTURE1, rmesa->vb.texcoordptr[1] );
+      }
+   }
 }
 
 
@@ -468,7 +521,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat tmp[3][15];
+   GLfloat tmp[3][RADEON_MAX_VERTEX_SIZE];
    GLuint i, nrverts;
 
    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
@@ -585,29 +638,53 @@
 	    ind |= RADEON_CP_VC_FRMT_N0;
 	 }
       } else {
-	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] != 0.0F ||
-	     ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] != 1.0) {
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX0][3] != 1.0) {
 	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
-	       fprintf(stderr, "%s: rq0\n", __FUNCTION__);
+	       fprintf(stderr, "%s: q0\n", __FUNCTION__);
 	    return GL_FALSE;
 	 }
 	 ind |= RADEON_CP_VC_FRMT_ST0;
+
+/* FIXME: This doesnt work as expected */
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX0][2] != 0.0F)
+	    ind |= RADEON_CP_VC_FRMT_Q0;
+/* workaround? */
+#if 1
+	 if (ctx->Texture.Unit[0]._ReallyEnabled == TEXTURE_CUBE_BIT)
+	    ind |= RADEON_CP_VC_FRMT_Q0;
+#endif
+	 if( RADEON_DEBUG & DEBUG_VFMT)
+	    fprintf( stderr, "%s: ctx->Current.Attrib[VERT_ATTRIB_TEX0][0..3] = %f %f %f %f\n",
+	       __FUNCTION__,
+	       ctx->Current.Attrib[VERT_ATTRIB_TEX0][0],
+	       ctx->Current.Attrib[VERT_ATTRIB_TEX0][1],
+	       ctx->Current.Attrib[VERT_ATTRIB_TEX0][2],
+	       ctx->Current.Attrib[VERT_ATTRIB_TEX0][3]);
       }
    }
 
+#if 0
    if (ctx->Texture.Unit[1]._ReallyEnabled) {
+#else
+   if (1) {
+#endif
       if (ctx->Texture.Unit[1].TexGenEnabled) {
 	 if (rmesa->TexGenNeedNormals[1]) {
 	    ind |= RADEON_CP_VC_FRMT_N0;
 	 }
       } else {
-	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] != 0.0F ||
-	     ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] != 1.0) {
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] != 1.0) {
 	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
-	       fprintf(stderr, "%s: rq1\n", __FUNCTION__);
+	       fprintf(stderr, "%s: q1\n", __FUNCTION__);
 	    return GL_FALSE;
 	 }
 	 ind |= RADEON_CP_VC_FRMT_ST1;
+/* this wont work.. */
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX1][2] != 0.0F)
+	    ind |= RADEON_CP_VC_FRMT_Q1;
+/* workaround? */
+	 if (ctx->Texture.Unit[1]._ReallyEnabled == TEXTURE_CUBE_BIT)
+	    ind |= RADEON_CP_VC_FRMT_Q1;
       }
    }
 
@@ -674,6 +751,11 @@
       rmesa->vb.vertex_size += 2;
       rmesa->vb.texcoordptr[0][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][0];
       rmesa->vb.texcoordptr[0][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][1];   
+/* FIXME: first check if Q0 is set from projtex or cubemap/3dtex */
+      if (ind & RADEON_CP_VC_FRMT_Q0) {
+         rmesa->vb.texcoordptr[0][2] = ctx->Current.Attrib[VERT_ATTRIB_TEX0][2];   
+         rmesa->vb.vertex_size += 1;
+      }
    } 
 
    if (ind & RADEON_CP_VC_FRMT_ST1) {
@@ -681,8 +763,13 @@
       rmesa->vb.vertex_size += 2;
       rmesa->vb.texcoordptr[1][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][0];
       rmesa->vb.texcoordptr[1][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][1];
+      if (ind & RADEON_CP_VC_FRMT_Q1) {
+         rmesa->vb.texcoordptr[1][2] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][2];   
+         rmesa->vb.vertex_size += 1;
+      }
    } 
 
+/* FIXME: also check projformat/installed_projformat ? */
    if (rmesa->vb.installed_vertex_format != rmesa->vb.vertex_format) {
       if (RADEON_DEBUG & DEBUG_VFMT)
 	 fprintf(stderr, "reinstall on vertex_format change\n");
@@ -811,6 +898,7 @@
    /* Need to arrange to save vertices here?  Or always copy from dma (yuk)?
     */
    if (!rmesa->dma.flush) {
+/* FIXME: what are these constants? */
       if (rmesa->dma.current.ptr + 12*rmesa->vb.vertex_size*4 > 
 	  rmesa->dma.current.end) {
 	 RADEON_NEWPRIM( rmesa );
@@ -982,12 +1070,14 @@
    vfmt->EvalMesh2 = radeon_fallback_EvalMesh2;
    vfmt->EvalPoint1 = radeon_fallback_EvalPoint1;
    vfmt->EvalPoint2 = radeon_fallback_EvalPoint2;
-   vfmt->TexCoord3f = radeon_fallback_TexCoord3f;
-   vfmt->TexCoord3fv = radeon_fallback_TexCoord3fv;
    vfmt->TexCoord4f = radeon_fallback_TexCoord4f;
    vfmt->TexCoord4fv = radeon_fallback_TexCoord4fv;
+#if 1	/* \todo: eliminate fallback  */
+   vfmt->TexCoord3f = radeon_fallback_TexCoord3f;
+   vfmt->TexCoord3fv = radeon_fallback_TexCoord3fv;
    vfmt->MultiTexCoord3fARB = radeon_fallback_MultiTexCoord3fARB;
    vfmt->MultiTexCoord3fvARB = radeon_fallback_MultiTexCoord3fvARB;
+#endif
    vfmt->MultiTexCoord4fARB = radeon_fallback_MultiTexCoord4fARB;
    vfmt->MultiTexCoord4fvARB = radeon_fallback_MultiTexCoord4fvARB;
    vfmt->Vertex4f = radeon_fallback_Vertex4f;
@@ -1027,10 +1117,18 @@
    make_empty_list( &rmesa->vb.dfn_cache.SecondaryColor3ubvEXT );
    make_empty_list( &rmesa->vb.dfn_cache.Normal3f );
    make_empty_list( &rmesa->vb.dfn_cache.Normal3fv );
+#if 0
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord3f );
+   make_empty_list( &rmesa->vb.dfn_cache.TexCoord3fv );
+#endif
    make_empty_list( &rmesa->vb.dfn_cache.TexCoord2f );
    make_empty_list( &rmesa->vb.dfn_cache.TexCoord2fv );
    make_empty_list( &rmesa->vb.dfn_cache.TexCoord1f );
    make_empty_list( &rmesa->vb.dfn_cache.TexCoord1fv );
+#if 0
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord3fARB );
+   make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord3fvARB );
+#endif
    make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
    make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
    make_empty_list( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
@@ -1079,10 +1177,18 @@
    free_funcs( &rmesa->vb.dfn_cache.SecondaryColor3fvEXT );
    free_funcs( &rmesa->vb.dfn_cache.Normal3f );
    free_funcs( &rmesa->vb.dfn_cache.Normal3fv );
+#if 0
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord3f );
+   free_funcs( &rmesa->vb.dfn_cache.TexCoord3fv );
+#endif
    free_funcs( &rmesa->vb.dfn_cache.TexCoord2f );
    free_funcs( &rmesa->vb.dfn_cache.TexCoord2fv );
    free_funcs( &rmesa->vb.dfn_cache.TexCoord1f );
    free_funcs( &rmesa->vb.dfn_cache.TexCoord1fv );
+#if 0
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord3fARB );
+   free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord3fvARB );
+#endif
    free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fARB );
    free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
    free_funcs( &rmesa->vb.dfn_cache.MultiTexCoord1fARB );
Index: radeon_vtxfmt.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_vtxfmt.h,v
retrieving revision 1.4
diff -u -r1.4 radeon_vtxfmt.h
--- radeon_vtxfmt.h	31 Jan 2005 23:40:06 -0000	1.4
+++ radeon_vtxfmt.h	28 Feb 2005 00:42:56 -0000
@@ -49,6 +49,7 @@
 extern void radeonVtxfmtUnbindContext( GLcontext *ctx );
 
 extern void radeon_copy_to_current( GLcontext *ctx );
+extern void VFMT_FALLBACK( const char *caller );
 
 #define DFN( FUNC, CACHE)				\
 do {							\
Index: radeon_vtxfmt_c.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_vtxfmt_c.c,v
retrieving revision 1.6
diff -u -r1.6 radeon_vtxfmt_c.c
--- radeon_vtxfmt_c.c	16 Feb 2005 15:02:25 -0000	1.6
+++ radeon_vtxfmt_c.c	28 Feb 2005 00:42:58 -0000
@@ -510,88 +510,142 @@
 
 /* TexCoord
  */
-static void radeon_TexCoord1f( GLfloat s )
+
+/* MultiTexcoord
+ * 
+ * Technically speaking, these functions should subtract GL_TEXTURE0 from
+ * \c target before masking and using it.  The value of GL_TEXTURE0 is 0x84C0,
+ * which has the low-order 5 bits 0.  For all possible valid values of 
+ * \c target.  Subtracting GL_TEXTURE0 has the net effect of masking \c target
+ * with 0x1F.  Masking with 0x1F and then masking with 0x01 is redundant, so
+ * the subtraction has been omitted.
+ */
+
+/* \todo for tmu3: maybe (target & 2 ? 2 : target & 1) is more save than (target & 2) */
+static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[0];
-   dest[0] = s;
-   dest[1] = 0;
+   GLint unit = (target & 1);
+   GLfloat * const dest = rmesa->vb.texcoordptr[unit];
+
+   switch( ctx->Texture.Unit[unit]._ReallyEnabled ) {
+   case TEXTURE_CUBE_BIT:
+      dest[2] = 0.0;
+      /* FALLTHROUGH */
+   case TEXTURE_2D_BIT:
+   case TEXTURE_RECT_BIT:
+   case TEXTURE_1D_BIT:
+   default:
+      dest[1] = 0.0;
+      dest[0] = s;
+   }
 }
 
-static void radeon_TexCoord1fv( const GLfloat *v )
+static void radeon_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[0];
-   dest[0] = v[0];
-   dest[1] = 0;
+   GLint unit = (target & 1);
+   GLfloat * const dest = rmesa->vb.texcoordptr[unit];
+
+   switch( ctx->Texture.Unit[unit]._ReallyEnabled ) {
+   case TEXTURE_CUBE_BIT:
+      dest[2] = 0.0;
+      /* FALLTHROUGH */
+   case TEXTURE_2D_BIT:
+   case TEXTURE_RECT_BIT:
+   case TEXTURE_1D_BIT:
+   default:
+      dest[1] = t;
+      dest[0] = s;
+   }
 }
 
-static void radeon_TexCoord2f( GLfloat s, GLfloat t )
+#if 0	/* \todo: make this work */
+static void radeon_MultiTexCoord3fARB(GLenum target, GLfloat s, GLfloat t, GLfloat r)
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[0];
-   dest[0] = s;
-   dest[1] = t;
+   GLint unit = (target & 1);
+   GLfloat * const dest = rmesa->vb.texcoordptr[unit];
+
+   if( RADEON_DEBUG & DEBUG_VFMT)
+      fprintf( stderr, "%s: rmesa->vb.texcoordptr[%d]=0x%0x\n", __FUNCTION__, unit, rmesa->vb.texcoordptr[unit]);
+   if( RADEON_DEBUG & DEBUG_VFMT)
+      fprintf( stderr, "%s: rmesa->vb.vertex_size=%d\n", __FUNCTION__, rmesa->vb.vertex_size);
+   if( RADEON_DEBUG & DEBUG_VFMT)
+      fprintf( stderr, "%s: old str: %f %f %f\n", __FUNCTION__, dest[0], dest[1], dest[2]);
+
+   switch( ctx->Texture.Unit[unit]._ReallyEnabled ) {
+   case TEXTURE_CUBE_BIT:
+      dest[2] = r;
+      dest[1] = t;
+      dest[0] = s;
+      if( (RADEON_DEBUG & DEBUG_VFMT) && (RADEON_DEBUG & DEBUG_VERBOSE))
+         fprintf( stderr, "%s: emit s %f  t %f  r %f to dest\n",
+            __FUNCTION__, s, t, r);
+         
+      break;
+   default:
+      VFMT_FALLBACK(__FUNCTION__);
+      GL_CALL(MultiTexCoord3fARB)(target, s, t, r);
+      return;	
+   }
 }
+#endif
 
-static void radeon_TexCoord2fv( const GLfloat *v )
+static void radeon_TexCoord1f(GLfloat s)
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[0];
-   dest[0] = v[0];
-   dest[1] = v[1];
+   radeon_MultiTexCoord1fARB(GL_TEXTURE0, s);
 }
 
+static void radeon_TexCoord2f(GLfloat s, GLfloat t)
+{
+   radeon_MultiTexCoord2fARB(GL_TEXTURE0, s, t);
+}
 
-/* MultiTexcoord
- * 
- * Technically speaking, these functions should subtract GL_TEXTURE0 from
- * \c target before masking and using it.  The value of GL_TEXTURE0 is 0x84C0,
- * which has the low-order 5 bits 0.  For all possible valid values of 
- * \c target.  Subtracting GL_TEXTURE0 has the net effect of masking \c target
- * with 0x1F.  Masking with 0x1F and then masking with 0x01 is redundant, so
- * the subtraction has been omitted.
- */
+#if 0
+static void radeon_TexCoord3f(GLfloat s, GLfloat t, GLfloat r)
+{
+   radeon_MultiTexCoord3fARB(GL_TEXTURE0, s, t, r);
+}
+#endif
 
-static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
+static void radeon_TexCoord1fv(const GLfloat *v)
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
-   dest[0] = s;
-   dest[1] = 0;
+   radeon_MultiTexCoord1fARB(GL_TEXTURE0, v[0]);
 }
 
-static void radeon_MultiTexCoord1fvARB( GLenum target, const GLfloat *v )
+static void radeon_TexCoord2fv(const GLfloat *v)
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
-   dest[0] = v[0];
-   dest[1] = 0;
+   radeon_MultiTexCoord2fARB(GL_TEXTURE0, v[0], v[1]);
 }
 
-static void radeon_MultiTexCoord2fARB( GLenum target, GLfloat s, GLfloat t )
+#if 0
+static void radeon_TexCoord3fv(const GLfloat *v)
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
-   dest[0] = s;
-   dest[1] = t;
+   radeon_MultiTexCoord3fARB(GL_TEXTURE0, v[0], v[1], v[2]);
 }
+#endif
 
-static void radeon_MultiTexCoord2fvARB( GLenum target, const GLfloat *v )
+static void radeon_MultiTexCoord1fvARB(GLenum target, const GLfloat *v)
 {
-   GET_CURRENT_CONTEXT(ctx);
-   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
-   dest[0] = v[0];
-   dest[1] = v[1];
+   radeon_MultiTexCoord1fARB(target, v[0]);
+}
+
+static void radeon_MultiTexCoord2fvARB(GLenum target, const GLfloat *v)
+{
+   radeon_MultiTexCoord2fARB(target, v[0], v[1]);
+}
+
+#if 0
+static void radeon_MultiTexCoord3fvARB(GLenum target, const GLfloat *v)
+{
+   radeon_MultiTexCoord3fARB(target, v[0], v[1], v[2]);
 }
+#endif
+
 
 static struct dynfn *lookup( struct dynfn *l, int key )
 {
@@ -615,7 +669,8 @@
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
    int key = rmesa->vb.vertex_format & (MASK|ACTIVE);			\
    struct dynfn *dfn;							\
-									\
+if( RADEON_DEBUG & DEBUG_VFMT) fprintf( stderr, "%s: mask:%0x active:%0x\n", \
+__FUNCTION__ , MASK, ACTIVE);									\
    dfn = lookup( &rmesa->vb.dfn_cache.FN, key );			\
    if (dfn == 0)							\
       dfn = rmesa->vb.codegen.FN( ctx, key );				\
@@ -746,6 +801,10 @@
 #define ACTIVE_ST1 RADEON_CP_VC_FRMT_ST1
 #define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST0)
 
+#define ACTIVE_STQ0 (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_Q0)
+#define ACTIVE_STQ1 (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_Q1)
+#define ACTIVE_STQ_ALL (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_Q0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_Q1)
+
 /* Each codegen function should be able to be fully specified by a
  * subsetted version of rmesa->vb.vertex_format.
  */
@@ -755,13 +814,19 @@
 #define MASK_ST0     (MASK_SPEC|ACTIVE_SPEC)
 #define MASK_ST1     (MASK_ST0|ACTIVE_ST0)
 #define MASK_ST_ALL  (MASK_ST1|ACTIVE_ST1)
-#define MASK_VERTEX  (MASK_ST_ALL|ACTIVE_FPALPHA) 
+
+#define MASK_STQ0    (MASK_ST0)
+#define MASK_STQ1    (MASK_ST_ALL|ACTIVE_STQ1)
+#define MASK_STQ_ALL (MASK_ST_ALL|ACTIVE_STQ_ALL)
+
+#define MASK_VERTEX  (MASK_STQ_ALL|ACTIVE_FPALPHA) 
 
 
 typedef void (*p4f)( GLfloat, GLfloat, GLfloat, GLfloat );
 typedef void (*p3f)( GLfloat, GLfloat, GLfloat );
 typedef void (*p2f)( GLfloat, GLfloat );
 typedef void (*p1f)( GLfloat );
+typedef void (*pe3f)( GLenum, GLfloat, GLfloat, GLfloat );
 typedef void (*pe2f)( GLenum, GLfloat, GLfloat );
 typedef void (*pe1f)( GLenum, GLfloat );
 typedef void (*p4ub)( GLubyte, GLubyte, GLubyte, GLubyte );
@@ -808,6 +873,12 @@
 CHOOSE_SECONDARY_COLOR(SecondaryColor3fvEXT, pfv, MASK_SPEC, ACTIVE_SPEC,
 	(const GLfloat *v), (v))
 
+#if 0
+CHOOSE(TexCoord3f, p3f, MASK_STQ0, ACTIVE_STQ0, 
+       (GLfloat a,GLfloat b,GLfloat c), (a,b,c))
+CHOOSE(TexCoord3fv, pfv, MASK_STQ0, ACTIVE_STQ0, 
+       (const GLfloat *v), (v))
+#endif
 CHOOSE(TexCoord2f, p2f, MASK_ST0, ACTIVE_ST0, 
        (GLfloat a,GLfloat b), (a,b))
 CHOOSE(TexCoord2fv, pfv, MASK_ST0, ACTIVE_ST0, 
@@ -817,6 +888,12 @@
 CHOOSE(TexCoord1fv, pfv, MASK_ST0, ACTIVE_ST0, 
        (const GLfloat *v), (v))
 
+#if 0
+CHOOSE(MultiTexCoord3fARB, pe3f, MASK_STQ_ALL, ACTIVE_STQ_ALL,
+	 (GLenum u,GLfloat a,GLfloat b,GLfloat c), (u,a,b,c))
+CHOOSE(MultiTexCoord3fvARB, pefv, MASK_STQ_ALL, ACTIVE_STQ_ALL,
+	(GLenum u,const GLfloat *v), (u,v))
+#endif
 CHOOSE(MultiTexCoord2fARB, pe2f, MASK_ST_ALL, ACTIVE_ST_ALL,
 	 (GLenum u,GLfloat a,GLfloat b), (u,a,b))
 CHOOSE(MultiTexCoord2fvARB, pefv, MASK_ST_ALL, ACTIVE_ST_ALL,
@@ -851,12 +928,20 @@
    vfmt->MultiTexCoord1fvARB = choose_MultiTexCoord1fvARB;
    vfmt->MultiTexCoord2fARB = choose_MultiTexCoord2fARB;
    vfmt->MultiTexCoord2fvARB = choose_MultiTexCoord2fvARB;
+#if 0
+   vfmt->MultiTexCoord3fARB = choose_MultiTexCoord3fARB;
+   vfmt->MultiTexCoord3fvARB = choose_MultiTexCoord3fvARB;
+#endif
    vfmt->Normal3f = choose_Normal3f;
    vfmt->Normal3fv = choose_Normal3fv;
    vfmt->TexCoord1f = choose_TexCoord1f;
    vfmt->TexCoord1fv = choose_TexCoord1fv;
    vfmt->TexCoord2f = choose_TexCoord2f;
    vfmt->TexCoord2fv = choose_TexCoord2fv;
+#if 0
+   vfmt->TexCoord3f = choose_TexCoord3f;
+   vfmt->TexCoord3fv = choose_TexCoord3fv;
+#endif
    vfmt->Vertex2f = choose_Vertex2f;
    vfmt->Vertex2fv = choose_Vertex2fv;
    vfmt->Vertex3f = choose_Vertex3f;
@@ -887,10 +972,25 @@
    gen->Color4ubv = codegen_noop;
    gen->Normal3f = codegen_noop;
    gen->Normal3fv = codegen_noop;
+
+#if 0
+   gen->TexCoord3f = codegen_noop;
+   gen->TexCoord3fv = codegen_noop;
+#endif
    gen->TexCoord2f = codegen_noop;
    gen->TexCoord2fv = codegen_noop;
+   gen->TexCoord1f = codegen_noop;
+   gen->TexCoord1fv = codegen_noop;
+
+#if 0
+   gen->MultiTexCoord3fARB = codegen_noop;
+   gen->MultiTexCoord3fvARB = codegen_noop;
+#endif
    gen->MultiTexCoord2fARB = codegen_noop;
    gen->MultiTexCoord2fvARB = codegen_noop;
+   gen->MultiTexCoord1fARB = codegen_noop;
+   gen->MultiTexCoord1fvARB = codegen_noop;
+
    gen->Vertex2f = codegen_noop;
    gen->Vertex2fv = codegen_noop;
    gen->Color3ub = codegen_noop;
@@ -903,10 +1003,6 @@
    gen->SecondaryColor3fvEXT = codegen_noop;
    gen->SecondaryColor3ubEXT = codegen_noop;
    gen->SecondaryColor3ubvEXT = codegen_noop;
-   gen->TexCoord1f = codegen_noop;
-   gen->TexCoord1fv = codegen_noop;
-   gen->MultiTexCoord1fARB = codegen_noop;
-   gen->MultiTexCoord1fvARB = codegen_noop;
 
    if (useCodegen) {
 #if defined(USE_X86_ASM)
Index: radeon_vtxfmt_sse.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_vtxfmt_sse.c,v
retrieving revision 1.5
diff -u -r1.5 radeon_vtxfmt_sse.c
--- radeon_vtxfmt_sse.c	16 Feb 2005 15:02:25 -0000	1.5
+++ radeon_vtxfmt_sse.c	28 Feb 2005 00:42:58 -0000
@@ -154,6 +154,7 @@
    }
 }
 
+#if 0 /* from r200: Temporarily disabled as it is broken w/the new cubemap code. - idr */
 static struct dynfn *radeon_makeSSETexCoord2fv( GLcontext *ctx, int key )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -207,6 +208,7 @@
    }
    return dfn;
 }
+#endif
 
 void radeonInitSSECodegen( struct dfn_generators *gen )
 {
@@ -215,10 +217,12 @@
       gen->Normal3f = (void *) radeon_makeSSENormal3f;
       gen->Color3fv = (void *) radeon_makeSSEColor3fv;
       gen->Color3f = (void *) radeon_makeSSEColor3f;
+#if 0 /* from r200: Temporarily disabled as it is broken w/the new cubemap code. - idr */
       gen->TexCoord2fv = (void *) radeon_makeSSETexCoord2fv;
       gen->TexCoord2f = (void *) radeon_makeSSETexCoord2f;
       gen->MultiTexCoord2fvARB = (void *) radeon_makeSSEMultiTexCoord2fv;
       gen->MultiTexCoord2fARB = (void *) radeon_makeSSEMultiTexCoord2f;
+#endif
    }
 }
 
Index: radeon_vtxfmt_x86.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/radeon/radeon_vtxfmt_x86.c,v
retrieving revision 1.4
diff -u -r1.4 radeon_vtxfmt_x86.c
--- radeon_vtxfmt_x86.c	16 Feb 2005 15:02:25 -0000	1.4
+++ radeon_vtxfmt_x86.c	28 Feb 2005 00:42:58 -0000
@@ -335,6 +335,7 @@
 
 
 
+#if 0 /* from r200: Temporarily disabled as it is broken w/the new cubemap code. - idr */
 struct dynfn *radeon_makeX86TexCoord2fv( GLcontext *ctx, int key )
 {
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
@@ -395,6 +396,7 @@
    }      
    return dfn;
 }
+#endif
 
 
 void radeonInitX86Codegen( struct dfn_generators *gen )
@@ -405,10 +407,12 @@
    gen->Color4ubv = radeon_makeX86Color4ubv; /* PKCOLOR only */
    gen->Normal3f = radeon_makeX86Normal3f;
    gen->Normal3fv = radeon_makeX86Normal3fv;
+#if 0 /* from r200: Temporarily disabled as it is broken w/the new cubemap code. - idr */
    gen->TexCoord2f = radeon_makeX86TexCoord2f;
    gen->TexCoord2fv = radeon_makeX86TexCoord2fv;
    gen->MultiTexCoord2fARB = radeon_makeX86MultiTexCoord2fARB;
    gen->MultiTexCoord2fvARB = radeon_makeX86MultiTexCoord2fvARB;
+#endif
    gen->Color3f = radeon_makeX86Color3f;
    gen->Color3fv = radeon_makeX86Color3fv;