diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0677c54..c7ec20c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2865,15 +2865,34 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - int tmp; + int tmp1,tmp2; checkop1(pAsm); - tmp = gethelpr(pAsm); + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + +/* algo to normalize radians to a range of -pi to +pi + +inst count unit opcode DEST SRC0, SRC1, SRC3 + 0 x: MUL tmp1.x, ang, 1/(2.0 * pi) + 1 x: FRACT tmp1.x, tmp1.x + 2 x: MULADD tmp2, tmp1.x, (2.0 * pi), -(2.0 * pi) + 3 x: MUL tmp1.y, tmp1.x, (2.0 * pi) + 4 x: ADD tmp1.x, tmp1.x, -0.5F + 5 x: CMOVGT tmp1.x, tmp1.x, tmp2.x, tmp1.y + 6 t: SIN or COS dest, tmp1.x + +NOTE: No parallelism + +*/ + + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; + pAsm->D.dst.reg = tmp1; pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); @@ -2881,9 +2900,145 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); pAsm->D2.dst2.literal_slots = 1; - pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[0].f = 1/(3.1415926535897 * 2); pAsm->C[1].f = 0.0F; - next_ins(pAsm); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[2].src)); + + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = (3.1415926535897 * 2); + pAsm->C[1].f = 0.0F; + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 1; + + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = (3.1415926535897 * 2); + pAsm->C[1].f = 0.0F; + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 0.5F ; + pAsm->C[1].f = 0.0F; + neg_PVSSRC(&(pAsm->S[1].src)); + + +/* the following doesn't work for some reason */ +/* pAsm->S[1].src.rtype = SQ_ALU_SRC_0_5; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); */ + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp2; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = DST_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; @@ -2892,7 +3047,7 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->S[0].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); noneg_PVSSRC(&(pAsm->S[0].src)); @@ -4018,16 +4173,34 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { - BITS tmp; - + int tmp1,tmp2; checkop1(pAsm); - tmp = gethelpr(pAsm); - /* tmp.x = src /2*PI */ + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + +/* algo to normalize radians to a range of -pi to +pi + +inst count unit opcode DEST SRC0, SRC1, SRC3 + 0 x: MUL tmp1.x, ang, 1/(2.0 * pi) + 1 x: FRACT tmp1.x, tmp1.x + 2 x: MULADD tmp2, tmp1.x, (2.0 * pi), -(2.0 * pi) + 3 x: MUL tmp1.y, tmp1.x, (2.0 * pi) + 4 x: ADD tmp1.x, tmp1.x, -0.5F + 5 x: CMOVGT tmp1.x, tmp1.x, tmp2.x, tmp1.y + 6 t: SIN or COS dest, tmp1.x + +NOTE: No parallelism + +*/ + + + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; + pAsm->D.dst.reg = tmp1; pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); @@ -4035,10 +4208,144 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); pAsm->D2.dst2.literal_slots = 1; - pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[0].f = 1/(3.1415926535897 * 2); pAsm->C[1].f = 0.0F; - next_ins(pAsm); + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[2].src)); + + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = (3.1415926535897 * 2); + pAsm->C[1].f = 0.0F; + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 1; + + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = (3.1415926535897 * 2); + pAsm->C[1].f = 0.0F; + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 0.5F ; + pAsm->C[1].f = 0.0F; + neg_PVSSRC(&(pAsm->S[1].src)); + + +/* the following doesn't work for some reason */ +/* pAsm->S[1].src.rtype = SQ_ALU_SRC_0_5; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); */ + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + + + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp2; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = DST_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } // COS dst.x, a.x pAsm->D.dst.opcode = SQ_OP2_INST_COS; @@ -4050,7 +4357,7 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->S[0].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); noneg_PVSSRC(&(pAsm->S[0].src)); @@ -4069,7 +4376,7 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->S[0].src.reg = tmp1; setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); noneg_PVSSRC(&(pAsm->S[0].src));