diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index 236a5be..9f218c6 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -107,7 +107,7 @@ static INLINE void insert_3f_viewport_2( const struct tnl_clipspace_attr *a, GLu out[0] = vp[0] * in[0] + vp[12]; out[1] = vp[5] * in[1] + vp[13]; - out[2] = vp[10] * in[2] + vp[14]; + out[2] = vp[14]; } static INLINE void insert_3f_viewport_1( const struct tnl_clipspace_attr *a, GLubyte *v, @@ -838,6 +838,7 @@ static void NAME( GLcontext *ctx, \ struct tnl_clipspace_attr *a = vtx->attr; \ GLuint i; \ \ + _mesa_memset(v, 0, vtx->vertex_size * count); \ for (i = 0 ; i < count ; i++, v += vtx->vertex_size) { \ if (NR > 0) { \ F0( &a[0], v + a[0].vertoffset, (GLfloat *)a[0].inputptr ); \ @@ -961,6 +962,7 @@ void _tnl_generic_emit( GLcontext *ctx, const GLuint stride = vtx->vertex_size; GLuint i, j; + _mesa_memset(v, 0, stride * count); for (i = 0 ; i < count ; i++, v += stride) { for (j = 0; j < attr_count; j++) { GLfloat *in = (GLfloat *)a[j].inputptr; diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index f1c98fe..7520fe3 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -126,11 +126,11 @@ static void emit_load3f_3( struct x86_program *p, } else { /* c 0 0 0 - * c c c c - * a b c c + * c c c 0 + * a b c 0 */ sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); + sse_shufps(&p->func, dest, dest, SHUF(X,X,X,W)); sse_movlps(&p->func, dest, arg0); } } @@ -139,14 +139,17 @@ static void emit_load3f_2( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_2(p, dest, arg0); + sse_xorps(&p->func, dest, dest); + sse_movlps(&p->func, dest, arg0); } static void emit_load3f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. + */ + sse_movss(&p->func, dest, arg0); } static void emit_load2f_2( struct x86_program *p, @@ -160,7 +163,9 @@ static void emit_load2f_1( struct x86_program *p, struct x86_reg dest, struct x86_reg arg0 ) { - emit_load4f_1(p, dest, arg0); + /* Loading from memory erases the upper bits. + */ + sse_movss(&p->func, dest, arg0); } static void emit_load1f_1( struct x86_program *p, @@ -352,6 +357,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) struct x86_reg temp = x86_make_reg(file_XMM, 0); struct x86_reg vp0 = x86_make_reg(file_XMM, 1); struct x86_reg vp1 = x86_make_reg(file_XMM, 2); + struct x86_reg temp2 = x86_make_reg(file_XMM, 3); GLubyte *fixup, *label; /* Push a few regs? @@ -488,7 +494,7 @@ static GLboolean build_vertex_emit( struct x86_program *p ) { get_src_ptr(p, srcECX, vtxESI, a); emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); + sse_shufps(&p->func, temp, temp, SHUF(Y,Y,Y,X)); emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ update_src_ptr(p, srcECX, vtxESI, a); } @@ -523,8 +529,12 @@ static GLboolean build_vertex_emit( struct x86_program *p ) */ sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + /* Use another temporary register for loading, since loading + * from memory erases the upper bits of the register: + */ get_src_ptr(p, srcECX, vtxESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + sse_movss(&p->func, temp2, x86_deref(srcECX)); + sse_movss(&p->func, temp, temp2); update_src_ptr(p, srcECX, vtxESI, &a[1]); /* Rearrange and possibly do BGR conversion: @@ -539,8 +549,8 @@ static GLboolean build_vertex_emit( struct x86_program *p ) } else { _mesa_printf("Can't emit 3ub\n"); + return GL_FALSE; /* add this later */ } - return GL_FALSE; /* add this later */ break; case EMIT_4UB_4F_RGBA: