From d5caccacab24f67dd072f326a71e680d54019b35 Mon Sep 17 00:00:00 2001 From: Owen W. Taylor Date: Thu, 17 Apr 2008 01:44:59 -0400 Subject: [PATCH] Don't emit mask coordinates when without a mask texture This prevents a very long per-vertex delay/timeout on the R300 and speeds up compositing their enormously. It may be a small performance win on other chipsets --- src/radeon_exa_render.c | 106 +++++++++++++++++++++++++++++++--------------- 1 files changed, 71 insertions(+), 35 deletions(-) diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c index 860ca2e..a43652a 100644 --- a/src/radeon_exa_render.c +++ b/src/radeon_exa_render.c @@ -55,6 +55,7 @@ /* Only include the following (generic) bits once. */ #ifdef ONLY_ONCE +static Bool have_mask; static Bool is_transform[2]; static PictTransform *transform[2]; /* Whether we are tiling horizontally and vertically */ @@ -558,10 +559,12 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; if (pMask != NULL) { + have_mask = TRUE; if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1)) return FALSE; pp_cntl |= RADEON_TEX_1_ENABLE; } else { + have_mask = FALSE; is_transform[1] = FALSE; } @@ -610,7 +613,7 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op, OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend); OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0 | - RADEON_SE_VTX_FMT_ST1); + (pMask ? RADEON_SE_VTX_FMT_ST1 : 0)); /* Op operator. */ blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -846,10 +849,12 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; if (pMask != NULL) { + have_mask = TRUE; if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1)) return FALSE; pp_cntl |= RADEON_TEX_1_ENABLE; } else { + have_mask = FALSE; is_transform[1] = FALSE; } @@ -864,7 +869,7 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | - (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); + (pMask != NULL ? (2 << R200_VTX_TEX1_COMP_CNT_SHIFT) : 0)); OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); @@ -1195,16 +1200,18 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, txenable = R300_TEX_0_ENABLE; if (pMask != NULL) { + have_mask = TRUE; if (!FUNC_NAME(R300TextureSetup)(pMaskPicture, pMask, 1)) return FALSE; txenable |= R300_TEX_1_ENABLE; } else { + have_mask = FALSE; is_transform[1] = FALSE; } RADEON_SWITCH_TO_3D(); - BEGIN_ACCEL(8); + BEGIN_ACCEL(pMask != NULL ? 8 : 6); OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0, ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) | (0 << R300_SKIP_DWORDS_0_SHIFT) | @@ -1213,13 +1220,15 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) | (0 << R300_SKIP_DWORDS_1_SHIFT) | (6 << R300_DST_VEC_LOC_1_SHIFT) | + (pMask == NULL ? R300_LAST_VEC_1 : 0) | R300_SIGNED_1)); - OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, - ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | - (0 << R300_SKIP_DWORDS_2_SHIFT) | - (7 << R300_DST_VEC_LOC_2_SHIFT) | - R300_LAST_VEC_2 | - R300_SIGNED_2)); + if (pMask != NULL) + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1, + ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) | + (0 << R300_SKIP_DWORDS_2_SHIFT) | + (7 << R300_DST_VEC_LOC_2_SHIFT) | + R300_LAST_VEC_2 | + R300_SIGNED_2)); OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) | (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) | @@ -1233,18 +1242,19 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) | ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_1_SHIFT))); - OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1, - ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) | - (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) | - (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) | - (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) - << R300_WRITE_ENA_2_SHIFT))); + if (pMask != NULL) + OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1, + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) | + (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) + << R300_WRITE_ENA_2_SHIFT))); OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT); OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT) | - (2 << R300_TEX_1_COMP_CNT_SHIFT))); + (pMask != NULL ? 2 << R300_TEX_1_COMP_CNT_SHIFT : 0))); OUT_ACCEL_REG(R300_TX_INVALTAGS, 0); OUT_ACCEL_REG(R300_TX_ENABLE, txenable); @@ -1847,11 +1857,17 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture, return TRUE; } -#define VTX_COUNT 6 - #ifdef ACCEL_CP -#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ +#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ +do { \ + OUT_RING_F(_dstX); \ + OUT_RING_F(_dstY); \ + OUT_RING_F(_srcX); \ + OUT_RING_F(_srcY); \ +} while (0) + +#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ do { \ OUT_RING_F(_dstX); \ OUT_RING_F(_dstY); \ @@ -1863,7 +1879,15 @@ do { \ #else /* ACCEL_CP */ -#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ +#define VTX_OUT_4(_dstX, _dstY, _srcX, _srcY) \ +do { \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcX); \ + OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _srcY); \ +} while (0) + +#define VTX_OUT_6(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ do { \ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstX); \ OUT_ACCEL_REG_F(RADEON_SE_PORT_DATA0, _dstY); \ @@ -1936,7 +1960,7 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, transformPoint(transform[1], &maskBottomRight); } - vtx_count = VTX_COUNT; + vtx_count = have_mask ? 6 : 4; if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { BEGIN_ACCEL(1); @@ -1988,18 +2012,29 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, } #endif - VTX_OUT((float)dstX, (float)dstY, - xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0], - xFixedToFloat(maskTopLeft.x) / info->texW[1], xFixedToFloat(maskTopLeft.y) / info->texH[1]); - VTX_OUT((float)dstX, (float)(dstY + h), - xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0], - xFixedToFloat(maskBottomLeft.x) / info->texW[1], xFixedToFloat(maskBottomLeft.y) / info->texH[1]); - VTX_OUT((float)(dstX + w), (float)(dstY + h), - xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0], - xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]); - VTX_OUT((float)(dstX + w), (float)dstY, - xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0], - xFixedToFloat(maskTopRight.x) / info->texW[1], xFixedToFloat(maskTopRight.y) / info->texH[1]); + if (have_mask) { + VTX_OUT_6((float)dstX, (float)dstY, + xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0], + xFixedToFloat(maskTopLeft.x) / info->texW[1], xFixedToFloat(maskTopLeft.y) / info->texH[1]); + VTX_OUT_6((float)dstX, (float)(dstY + h), + xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0], + xFixedToFloat(maskBottomLeft.x) / info->texW[1], xFixedToFloat(maskBottomLeft.y) / info->texH[1]); + VTX_OUT_6((float)(dstX + w), (float)(dstY + h), + xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0], + xFixedToFloat(maskBottomRight.x) / info->texW[1], xFixedToFloat(maskBottomRight.y) / info->texH[1]); + VTX_OUT_6((float)(dstX + w), (float)dstY, + xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0], + xFixedToFloat(maskTopRight.x) / info->texW[1], xFixedToFloat(maskTopRight.y) / info->texH[1]); + } else { + VTX_OUT_4((float)dstX, (float)dstY, + xFixedToFloat(srcTopLeft.x) / info->texW[0], xFixedToFloat(srcTopLeft.y) / info->texH[0]); + VTX_OUT_4((float)dstX, (float)(dstY + h), + xFixedToFloat(srcBottomLeft.x) / info->texW[0], xFixedToFloat(srcBottomLeft.y) / info->texH[0]); + VTX_OUT_4((float)(dstX + w), (float)(dstY + h), + xFixedToFloat(srcBottomRight.x) / info->texW[0], xFixedToFloat(srcBottomRight.y) / info->texH[0]); + VTX_OUT_4((float)(dstX + w), (float)dstY, + xFixedToFloat(srcTopRight.x) / info->texW[0], xFixedToFloat(srcTopRight.y) / info->texH[0]); + } if (IS_R300_VARIANT || IS_AVIVO_VARIANT) { OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D); @@ -2014,7 +2049,8 @@ static void FUNC_NAME(RadeonCompositeTile)(PixmapPtr pDst, LEAVE_DRAW(0); } -#undef VTX_OUT +#undef VTX_OUT_4 +#undef VTX_OUT_6 static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, int srcX, int srcY, -- 1.5.4.5