From 1b5d0bc5489d8bdc2466ee52fd739db175436234 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Thu, 14 May 2009 21:41:49 +0200 Subject: [PATCH] r300: fix LIT 0^0 case in vertex shaders --- src/mesa/drivers/dri/r300/r300_vertprog.c | 121 ++++++++++++++++++++++++++++- 1 files changed, 117 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 949c0b4..5315b4c 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -572,16 +572,43 @@ static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, struct prog_instruction *vpi, GLuint * inst, - struct prog_src_register src[3]) + struct prog_src_register src[3], + int *u_temp_i) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + /** + * Rewrite LIT to deal with 0^0 -> 1 case + * + * Rule: + * dst.z += ((src.x > 0 && src.w == 0) ? 1.0 : 0.0) + * + * + * Hardware code: + * LIT tmp1, src + * MUL tmp2.w, src.???w, src.???w + * SGE tmp2.w, tmp2.???-w, 0 + * SLT tmp2.x, src.-x???, 0 + * MAD dst, tmp2.00x0, tmp2.00w0, tmp1.xyzw + * + * + * Implementation details: + * - src.x > 0 is implemented by negating the condition: SLT tmp2.x, -src.xxxx, 0 + * - src.w == 0 condition equals SGE tmp2.w, -|tmp.wwww|, 0 + * we use tmp2^2 to emulate absolute value + * - && operator is implemented with multiplication + */ + GLint tmp1, tmp2; + + tmp1 = (*u_temp_i)--; + tmp2 = (*u_temp_i)--; + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, GL_TRUE, GL_FALSE, - t_dst_index(vp, &vpi->DstReg), + tmp1, t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); + PVS_DST_REG_TEMPORARY); /* NOTE: Users swizzling might not work. */ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W @@ -605,6 +632,92 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst += 4; + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + tmp2, + WRITEMASK_W, + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + inst += 4; + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + tmp2, + WRITEMASK_W, + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(tmp2, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, + VSF_FLAG_W); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + inst += 4; + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + tmp2, + WRITEMASK_X, + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + t_src_class(src[0].File), + VSF_FLAG_X) | (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + inst += 4; + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(tmp2, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_TEMPORARY, + VSF_FLAG_NONE); + inst[2] = PVS_SRC_OPERAND(tmp2, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_W, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_TEMPORARY, + VSF_FLAG_NONE); + inst[3] = PVS_SRC_OPERAND(tmp1, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, + VSF_FLAG_NONE); + return inst; } @@ -1130,7 +1243,7 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); break; case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); + inst = r300TranslateOpcodeLIT(vp, vpi, inst, src, &u_temp_i); break; case OPCODE_LOG: inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); -- 1.6.0.4