From a43a6005d4a68e2441fd370bed53727d364e4d57 Mon Sep 17 00:00:00 2001 From: dafox Date: Mon, 3 Jan 2011 13:04:44 +0100 Subject: [PATCH 1/3] Implement RGB formats for textured-Xv with pass-through shader (i.e. no hue/contrast/brightness/saturation adjustment) --- src/radeon_textured_video.c | 152 ++++- src/radeon_textured_video_pixelshaders.c | 1086 ++++++++++++++++++++++++++++++ src/radeon_textured_videofuncs.c | 1001 ++-------------------------- 3 files changed, 1288 insertions(+), 951 deletions(-) create mode 100644 src/radeon_textured_video_pixelshaders.c diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index 36bcb56..f61fbde 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -43,6 +43,113 @@ #include #include "fourcc.h" +/// Begin RGB FOURCCs +/// NOTE: This is copy&pasted from "radeon_video.c", but looks like +/// it should probably be part of "xorg/fourcc.h" +/// Also: "xorg/fourcc.h" lacks a convenient way of defining FOURCCs + +#define FOURCC(a, b, c, d) (((uint32_t)(unsigned char)(a) << 0)|((uint32_t)(unsigned char)(b) << 8)|((uint32_t)(unsigned char)(c) << 16)|((uint32_t)(unsigned char)(d) << 24)) +#define FOURCC_INVALID FOURCC(0, 0, 0, 0) +// fourccs: http://abcavi.kibi.ru/fourcc.php + +/* Note: GUIDs are bogus... - but nothing uses them anyway */ + +#define FOURCC_RGBA32 FOURCC('R', 'G', 'B', 'A') + +#define XVIMAGE_RGBA32(byte_order) \ + { \ + FOURCC_RGBA32, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'A', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 32, \ + XvPacked, \ + 1, \ + 32, 0x00FF0000, 0x0000FF00, 0x000000FF, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'A', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define FOURCC_RGBT16 FOURCC('R', 'G', 'B', 'T') + +#define XVIMAGE_RGBT16(byte_order) \ + { \ + FOURCC_RGBT16, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'T', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 16, 0x00007C00, 0x000003E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'A', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define FOURCC_RGB16 FOURCC('R', 'G', 'B', '2') + +#define XVIMAGE_RGB16(byte_order) \ + { \ + FOURCC_RGB16, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 0x00, \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 16, 0x0000F800, 0x000007E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'R', 'G', 'B', \ + 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define FOURCC_RGB15 FOURCC('R', 'G', 'B', 'O') + +#define XVIMAGE_RGB15(byte_order) \ + { \ + FOURCC_RGB15, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'O', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 15, 0x00007C00, 0x000003E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'R', 'G', 'B', \ + 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define FOURCC_BGR15 FOURCC('B', 'G', 'R', 'O') + +#define XVIMAGE_BGR15(byte_order) \ + { \ + FOURCC_BGR15, \ + XvRGB, \ + byte_order, \ + { 'B', 'G', 'R', 'O', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 15, 0x0000001F, 0x000003E0, 0x00007C00, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'B', 'G', 'R', \ + 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } +/// End RGB FOURCCs + extern void R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv); @@ -302,8 +409,16 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, dstPitch2 = RADEON_ALIGN(dstPitch >> 1, hw_align); } break; + case FOURCC_RGBA32: + srcPitch = width << 2; + dstPitch = ((dst_width << 2) + hw_align) & ~hw_align; + break; case FOURCC_UYVY: case FOURCC_YUY2: + case FOURCC_RGB15: + case FOURCC_BGR15: + case FOURCC_RGB16: + case FOURCC_RGBT16: default: dstPitch = RADEON_ALIGN(dst_width << 1, hw_align); srcPitch = (width << 1); @@ -442,15 +557,29 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, break; case FOURCC_UYVY: case FOURCC_YUY2: + case FOURCC_RGB15: + case FOURCC_BGR15: + case FOURCC_RGB16: + case FOURCC_RGBT16: default: if (info->ChipFamily >= CHIP_FAMILY_R600) R600CopyData(pScrn, buf + (top * srcPitch), - pPriv->src_addr + (top * dstPitch), - srcPitch, dstPitch, nlines, width, 2); + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width, 2); else RADEONCopyData(pScrn, buf + (top * srcPitch), - pPriv->src_addr + (top * dstPitch), - srcPitch, dstPitch, nlines, width, 2); + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width, 2); + break; + case FOURCC_RGBA32: + if (info->ChipFamily >= CHIP_FAMILY_R600) + R600CopyData(pScrn, buf + (top * srcPitch), + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width * 4, 1); + else + RADEONCopyData(pScrn, buf + (top * srcPitch), + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width, 4); break; } @@ -632,10 +761,23 @@ static Atom xvBrightness, xvContrast, xvSaturation, xvHue; static Atom xvGamma, xvColorspace; static Atom xvCRTC; -#define NUM_IMAGES 4 +#define NUM_IMAGES 9 static XF86ImageRec Images[NUM_IMAGES] = { +#if X_BYTE_ORDER == X_BIG_ENDIAN + XVIMAGE_RGBA32(MSBFirst), + XVIMAGE_RGBT16(MSBFirst), + XVIMAGE_RGB16(MSBFirst), + XVIMAGE_RGB15(MSBFirst), + XVIMAGE_BGR15(MSBFirst), +#else + XVIMAGE_RGBA32(LSBFirst), + XVIMAGE_RGBT16(LSBFirst), + XVIMAGE_RGB16(LSBFirst), + XVIMAGE_RGB15(LSBFirst), + XVIMAGE_BGR15(LSBFirst), +#endif XVIMAGE_YUY2, XVIMAGE_YV12, XVIMAGE_I420, diff --git a/src/radeon_textured_video_pixelshaders.c b/src/radeon_textured_video_pixelshaders.c new file mode 100644 index 0000000..be7cef1 --- /dev/null +++ b/src/radeon_textured_video_pixelshaders.c @@ -0,0 +1,1086 @@ +/* + * Copyright 2008 Alex Deucher + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * + * Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al. + * + */ + +static void FUNC_NAME(R300SetupPixelShaderBicubicEnabled)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + BEGIN_ACCEL(79); + + /* 4 components: 2 for tex0 and 2 for tex1 */ + OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | R300_RS_COUNT_HIRES_EN)); + + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); + + /* Pixel stack frame size. */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 5); + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX)); + + /* Set nodes. */ + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(14) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(6))); + + /* Nodes are allocated highest first, but executed lowest first */ + OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); + OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0))); + OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | + R300_ALU_SIZE(9) | + R300_TEX_START(1) | + R300_TEX_SIZE(0))); + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | + R300_ALU_SIZE(2) | + R300_TEX_START(2) | + R300_TEX_SIZE(3) | + R300_RGBA_OUT)); + + /* ** BICUBIC FP ** */ + + /* texcoord0 => temp0 + * texcoord1 => temp1 */ + + // first node + /* TEX temp2, temp1.rrr0, tex1, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(1) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(2))); + + /* MOV temp1.r, temp1.ggg0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + + // second node + /* TEX temp1, temp1, tex1, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(1) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(1))); + + /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + + /* MUL temp2.rg, temp2.rrr0, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(5) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR2(3) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR2(5) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR2(4) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + + + // third node + /* TEX temp4, temp1.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(1) | + R300_TEX_DST_ADDR(4))); + + /* TEX temp3, temp3.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(3) | + R300_TEX_DST_ADDR(3))); + + /* TEX temp5, temp2.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(2) | + R300_TEX_DST_ADDR(5))); + + /* TEX temp0, temp0.rg--, tex0, 1D */ + OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | + R300_TEX_ID(0) | + R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0))); + + /* LRP temp3, temp1.bbbb, temp4, temp3 -> + * - PRESUB temps, temp4 - temp3 + * - MAD temp3, temp1.bbbb, temps, temp3 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(4) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(4) | + R300_ALU_ALPHA_ADDR2(1) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); + + /* LRP temp0, temp1.bbbb, temp5, temp0 -> + * - PRESUB temps, temp5 - temp0 + * - MAD temp0, temp1.bbbb, temps, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | + R300_ALU_RGB_INSERT_NOP)); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(5) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(5) | + R300_ALU_ALPHA_ADDR2(1) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); + + /* LRP output, temp2.bbbb, temp3, temp0 -> + * - PRESUB temps, temp3 - temp0 + * - MAD output, temp2.bbbb, temps, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(3) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(3) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); + + /* Shader constants. */ + OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); + + OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); + + FINISH_ACCEL(); +} + +static void FUNC_NAME(R300SetupPixelShaderBicubicDisabled)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + BEGIN_ACCEL(11); + /* 2 components: 2 for tex0 */ + OUT_ACCEL_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(1) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* RGB */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | + R300_ALU_RGB_MASK_G | + R300_ALU_RGB_MASK_B)) | + R300_ALU_RGB_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* Alpha */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | + R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | + R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | + R300_ALU_ALPHA_CLAMP)); + FINISH_ACCEL(); +} + +static void FUNC_NAME(R300SetupPixelShaderYUVtoRGB)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + + /* + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * DP3 might look like the straightforward solution + * but we'd need to move the texture yuv values in + * the same reg for this to work. Therefore use MADs. + * Brightness just adds to the off constant. + * Contrast is multiplication of luminance. + * Saturation and hue change the u and v coeffs. + * Default values (before adjustments - depend on colorspace): + * yco = 1.1643 + * uco = 0, -0.39173, 2.017 + * vco = 1.5958, -0.8129, 0 + * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], + * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], + * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], + * + * temp = MAD(yco, yuv.yyyy, off) + * temp = MAD(uco, yuv.uuuu, temp) + * result = MAD(vco, yuv.vvvv, temp) + */ + /* TODO: don't recalc consts always */ + const float Loff = -0.0627; + const float Coff = -0.502; + float uvcosf, uvsinf; + float yco; + float uco[3], vco[3], off[3]; + float bright, cont, gamma; + int ref = pPriv->transform_index; + Bool needgamma = FALSE; + + cont = RTFContrast(pPriv->contrast); + bright = RTFBrightness(pPriv->brightness); + gamma = (float)pPriv->gamma / 1000.0; + uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); + uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); + /* overlay video also does pre-gamma contrast/sat adjust, should we? */ + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; + off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; + off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; + + if (gamma != 1.0) { + needgamma = TRUE; + /* note: gamma correction is out = in ^ gamma; + gpu can only do LG2/EX2 therefore we transform into + in ^ gamma = 2 ^ (log2(in) * gamma). + Lots of scalar ops, unfortunately (better solution?) - + without gamma that's 3 inst, with gamma it's 10... + could use different gamma factors per channel, + if that's of any use. */ + } + + if (pPriv->is_planar) { + BEGIN_ACCEL(needgamma ? 28 + 33 : 33); + /* 2 components: same 2 for tex0/1/2 */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(3))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | + R300_TEX_START(0) | + R300_TEX_SIZE(2) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(2) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(1) | + R300_TEX_ID(1) | + R300_TEX_INST(R300_TEX_INST_LD))); + OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(2) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDR1(2) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but need to set up alpha source for rgb usage */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | + R300_ALU_ALPHA_ADDR1(2) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR1(1) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* write alpha 1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); + + if (needgamma) { + /* rgb temp0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MUL const1, temp1, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but set up const1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + } + } else { + BEGIN_ACCEL(needgamma ? 28 + 31 : 31); + /* 2 components */ + OUT_ACCEL_REG(R300_RS_COUNT, + ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | + R300_RS_COUNT_HIRES_EN)); + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); + + OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ + + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | + R300_FIRST_TEX)); + + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + /* tex inst */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /* ALU inst */ + /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but need to set up alpha source for rgb usage */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* write alpha 1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); + + if (needgamma) { + /* rgb temp0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MUL const1, temp1, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but set up const1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + } + } + + /* Shader constants. */ + /* constant 0: off, yco */ + OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); + /* constant 1: uco */ + OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); + /* constant 2: vco */ + OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); + + FINISH_ACCEL(); +} + +static void FUNC_NAME(R300SetupPixelShaderRGB)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + /* Needs to perform (in RGB space): + * - gamma correction, + * - contrast, + * - saturation, + * - brightness, + * But for now it is just a raw pass-through + */ + + BEGIN_ACCEL(11); + + /** + * Specifies that there will be 2 'texture address components' used. + * E.g. the (u,v) location where texture 0 is sampled uses 2 + * components, 'u' and 'v'. (?) + */ + OUT_ACCEL_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | R300_RS_COUNT_HIRES_EN)); + + // FIXME: what does this mean? 'number of rasterizer instructions' + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); + + /** + * Specifies the number of temporary variables that can be used in this + * shader, e.g. temp[0..n] are valid to use in this shader. + */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ + + // FIXME: What does this mean? 'specifies the valid indirection levels' + // current setting (0) means 'Level 3 only (normal DX7-style texturing)' + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX)); + + // FIXME: What does this mean? + // The next two pokes set some values related to the micro-code of the + // ALU and texture units. No idea what they mean. + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(1) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(0) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + /** + * Texture lookup instruction: + * pixel_stack[0] <- texture_lookup[pixel_stack[0].rgb] + */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD))); + + /** + * RGB ALU instruction + * rgb0 <- pixel_stack[0] + * rgb1 <- pixel_stack[0] + * rgb2 <- pixel_stack[0] + * dest <- pixel_stack[0] + * writes pixel to output fifo of render target A + * + * A=pixel_stack[0].rgb + * B=1 + * C=0 + * + * + * + * MAD(pixel_stack[0], 1, 0) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | + R300_ALU_RGB_MASK_G | + R300_ALU_RGB_MASK_B)) | + R300_ALU_RGB_TARGET_A)); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_CLAMP)); + /* Alpha */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_TARGET_A | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | + R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | + R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | + R300_ALU_ALPHA_CLAMP)); + FINISH_ACCEL(); +} diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c index a22c416..8f4e2d7 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c @@ -87,6 +87,8 @@ do { \ #endif /* !ACCEL_CP */ +#include "radeon_textured_video_pixelshaders.c" + static Bool FUNC_NAME(RADEONPrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { @@ -1179,13 +1181,16 @@ FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) PixmapPtr pPixmap = pPriv->pPixmap; struct radeon_exa_pixmap_priv *driver_priv; struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer]; - uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; + uint32_t txfilter, txformat0, txformat1, txoffset, txpitch; /* txpitch is in pixels */ uint32_t dst_pitch, dst_format; uint32_t txenable, colorpitch, bicubic_offset; uint32_t output_fmt; int pixel_shift; - ACCEL_PREAMBLE(); + // is_rgb_format set to TRUE if the pixel-shader shouldn't perform YUV->RGB conversion + // TODO: See if we can make this part of pPriv, similar to pPriv->is_planar + Bool is_rgb_format = FALSE; + ACCEL_PREAMBLE(); #ifdef XF86DRM_MODE if (info->cs) { int ret; @@ -1280,16 +1285,47 @@ FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0; txpitch = pPriv->src_pitch; } else { - if (pPriv->id == FOURCC_UYVY) - txformat1 = R300_TX_FORMAT_YVYU422; - else - txformat1 = R300_TX_FORMAT_VYUY422; + txpitch = pPriv->src_pitch / 2; // only RGBA32 differs, everything else is 2 bytes/pixel + switch (pPriv->id) { + case FOURCC_RGBA32: /* A8R8G8B8 */ + txformat1 = R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8); + txpitch = pPriv->src_pitch / 4; + is_rgb_format = TRUE; + break; + case FOURCC_RGB16: /* R5G6B5 */ + txformat1 = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + is_rgb_format = TRUE; + break; + case FOURCC_RGBT16: /* A1R5G5B5 */ + txformat1 = R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5); + is_rgb_format = TRUE; + break; + case FOURCC_RGB15: /* R5G5B5 */ + txformat1 = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5); + is_rgb_format = TRUE; + break; + case FOURCC_BGR15: /* B5G5R5 */ + txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, ONE, W1Z5Y5X5); + is_rgb_format = TRUE; + break; + case FOURCC_UYVY: + txformat1 = R300_TX_FORMAT_YVYU422; + break; + case FOURCC_YUY2: + case FOURCC_YV12: + case FOURCC_I420: + case FOURCC_IA44: + txformat1 = R300_TX_FORMAT_VYUY422; + break; + default: + ErrorF("radeon: FOURCC 0x%08x is not yet supported!\n", pPriv->id); + ErrorF("radeon: You get R300_TX_FORMAT_VYUY422 (YUY2, YV12, I420 and IA44) instead.\n"); + txformat1 = R300_TX_FORMAT_VYUY422; + break; + } - if (pPriv->bicubic_state != BICUBIC_OFF) + if ((pPriv->bicubic_state != BICUBIC_OFF) && (!is_rgb_format)) txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP; - - /* pitch is in pixels */ - txpitch = pPriv->src_pitch / 2; } txpitch -= 1; @@ -1475,947 +1511,17 @@ FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* setup pixel shader */ if (pPriv->bicubic_state != BICUBIC_OFF) { if (pPriv->bicubic_enabled) { - BEGIN_ACCEL(79); - - /* 4 components: 2 for tex0 and 2 for tex1 */ - OUT_ACCEL_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1)); - - /* Pixel stack frame size. */ - OUT_ACCEL_REG(R300_US_PIXSIZE, 5); - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - /* Set nodes. */ - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(14) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(6))); - - /* Nodes are allocated highest first, but executed lowest first */ - OUT_ACCEL_REG(R300_US_CODE_ADDR_0, 0); - OUT_ACCEL_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) | - R300_ALU_SIZE(0) | - R300_TEX_START(0) | - R300_TEX_SIZE(0))); - OUT_ACCEL_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) | - R300_ALU_SIZE(9) | - R300_TEX_START(1) | - R300_TEX_SIZE(0))); - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) | - R300_ALU_SIZE(2) | - R300_TEX_START(2) | - R300_TEX_SIZE(3) | - R300_RGBA_OUT)); - - /* ** BICUBIC FP ** */ - - /* texcoord0 => temp0 - * texcoord1 => temp1 */ - - // first node - /* TEX temp2, temp1.rrr0, tex1, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(1) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(2))); - - /* MOV temp1.r, temp1.ggg0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - // second node - /* TEX temp1, temp1, tex1, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(1) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(1))); - - /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - /* MUL temp2.rg, temp2.rrr0, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(4) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(5) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) | - R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(3) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(5) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR2(4) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R | R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - - - // third node - /* TEX temp4, temp1.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(1) | - R300_TEX_DST_ADDR(4))); - - /* TEX temp3, temp3.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(3) | - R300_TEX_DST_ADDR(3))); - - /* TEX temp5, temp2.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(2) | - R300_TEX_DST_ADDR(5))); - - /* TEX temp0, temp0.rg--, tex0, 1D */ - OUT_ACCEL_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) | - R300_TEX_ID(0) | - R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0))); - - /* LRP temp3, temp1.bbbb, temp4, temp3 -> - * - PRESUB temps, temp4 - temp3 - * - MAD temp3, temp1.bbbb, temps, temp3 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) | - R300_ALU_RGB_ADDR1(4) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(3) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) | - R300_ALU_ALPHA_ADDR1(4) | - R300_ALU_ALPHA_ADDR2(1) | - R300_ALU_ALPHA_ADDRD(3) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); - - /* LRP temp0, temp1.bbbb, temp5, temp0 -> - * - PRESUB temps, temp5 - temp0 - * - MAD temp0, temp1.bbbb, temps, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) | - R300_ALU_RGB_INSERT_NOP)); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(5) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) | - R300_ALU_ALPHA_ADDR1(5) | - R300_ALU_ALPHA_ADDR2(1) | - R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A))); - - /* LRP output, temp2.bbbb, temp3, temp0 -> - * - PRESUB temps, temp3 - temp0 - * - MAD output, temp2.bbbb, temps, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), (R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0))); - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(3) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) | - R300_ALU_ALPHA_ADDR1(3) | - R300_ALU_ALPHA_ADDR2(2) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A))); - - /* Shader constants. */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w)); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), 0); - - OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h)); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), 0); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0); - - FINISH_ACCEL(); + FUNC_NAME(R300SetupPixelShaderBicubicEnabled)(pScrn, pPriv); } else { - BEGIN_ACCEL(11); - /* 2 components: 2 for tex0 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); - - OUT_ACCEL_REG(R300_US_PIXSIZE, 0); /* highest temp used */ - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(1) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(1))); - - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(0) | - R300_TEX_START(0) | - R300_TEX_SIZE(0) | - R300_RGBA_OUT)); - - /* tex inst */ - OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0) | - R300_TEX_ID(0) | - R300_TEX_INST(R300_TEX_INST_LD))); - - /* ALU inst */ - /* RGB */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R | - R300_ALU_RGB_MASK_G | - R300_ALU_RGB_MASK_B)) | - R300_ALU_RGB_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST_0, (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | - R300_ALU_RGB_CLAMP)); - /* Alpha */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) | - R300_ALU_ALPHA_ADDR1(0) | - R300_ALU_ALPHA_ADDR2(0) | - R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | - R300_ALU_ALPHA_TARGET_A | - R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST_0, (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | - R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | - R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | - R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | - R300_ALU_ALPHA_CLAMP)); - FINISH_ACCEL(); + FUNC_NAME(R300SetupPixelShaderBicubicDisabled)(pScrn, pPriv); } } else { - /* - * y' = y - .0625 - * u' = u - .5 - * v' = v - .5; - * - * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' - * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' - * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' - * - * DP3 might look like the straightforward solution - * but we'd need to move the texture yuv values in - * the same reg for this to work. Therefore use MADs. - * Brightness just adds to the off constant. - * Contrast is multiplication of luminance. - * Saturation and hue change the u and v coeffs. - * Default values (before adjustments - depend on colorspace): - * yco = 1.1643 - * uco = 0, -0.39173, 2.017 - * vco = 1.5958, -0.8129, 0 - * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], - * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], - * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], - * - * temp = MAD(yco, yuv.yyyy, off) - * temp = MAD(uco, yuv.uuuu, temp) - * result = MAD(vco, yuv.vvvv, temp) - */ - /* TODO: don't recalc consts always */ - const float Loff = -0.0627; - const float Coff = -0.502; - float uvcosf, uvsinf; - float yco; - float uco[3], vco[3], off[3]; - float bright, cont, gamma; - int ref = pPriv->transform_index; - Bool needgamma = FALSE; - - cont = RTFContrast(pPriv->contrast); - bright = RTFBrightness(pPriv->brightness); - gamma = (float)pPriv->gamma / 1000.0; - uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); - uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); - /* overlay video also does pre-gamma contrast/sat adjust, should we? */ - - yco = trans[ref].RefLuma * cont; - uco[0] = -trans[ref].RefRCr * uvsinf; - uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; - uco[2] = trans[ref].RefBCb * uvcosf; - vco[0] = trans[ref].RefRCr * uvcosf; - vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; - vco[2] = trans[ref].RefBCb * uvsinf; - off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; - off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; - off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; - - if (gamma != 1.0) { - needgamma = TRUE; - /* note: gamma correction is out = in ^ gamma; - gpu can only do LG2/EX2 therefore we transform into - in ^ gamma = 2 ^ (log2(in) * gamma). - Lots of scalar ops, unfortunately (better solution?) - - without gamma that's 3 inst, with gamma it's 10... - could use different gamma factors per channel, - if that's of any use. */ + if(is_rgb_format) { + FUNC_NAME(R300SetupPixelShaderRGB)(pScrn, pPriv); } - - if (pPriv->is_planar) { - BEGIN_ACCEL(needgamma ? 28 + 33 : 33); - /* 2 components: same 2 for tex0/1/2 */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); - - OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */ - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(3))); - - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | - R300_TEX_START(0) | - R300_TEX_SIZE(2) | - R300_RGBA_OUT)); - - /* tex inst */ - OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(2) | - R300_TEX_ID(0) | - R300_TEX_INST(R300_TEX_INST_LD))); - OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(1) | - R300_TEX_ID(1) | - R300_TEX_INST(R300_TEX_INST_LD))); - OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0) | - R300_TEX_ID(2) | - R300_TEX_INST(R300_TEX_INST_LD))); - - /* ALU inst */ - /* MAD temp2.rgb, const0.aaa, temp2.rgb, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDR1(2) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but need to set up alpha source for rgb usage */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | - R300_ALU_ALPHA_ADDR1(2) | - R300_ALU_ALPHA_ADDR2(0) | - R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD temp2.rgb, const1.rgb, temp1.rgb, temp2.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR1(1) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(2) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(2) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD result.rgb, const2.rgb, temp0.rgb, temp2.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(2) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | - (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | - R300_ALU_RGB_CLAMP)); - /* write alpha 1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | - R300_ALU_ALPHA_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); - - if (needgamma) { - /* rgb temp0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MUL const1, temp1, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but set up const1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - } - } else { - BEGIN_ACCEL(needgamma ? 28 + 31 : 31); - /* 2 components */ - OUT_ACCEL_REG(R300_RS_COUNT, - ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | - R300_RS_COUNT_HIRES_EN)); - /* R300_INST_COUNT_RS - highest RS instruction used */ - OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); - - OUT_ACCEL_REG(R300_US_PIXSIZE, 1); /* highest temp used */ - - /* Indirection levels */ - OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | - R300_FIRST_TEX)); - - OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | - R300_TEX_CODE_OFFSET(0) | - R300_TEX_CODE_SIZE(1))); - - OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | - R300_TEX_START(0) | - R300_TEX_SIZE(0) | - R300_RGBA_OUT)); - - /* tex inst */ - OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) | - R300_TEX_DST_ADDR(0) | - R300_TEX_ID(0) | - R300_TEX_INST(R300_TEX_INST_LD))); - - /* ALU inst */ - /* MAD temp1.rgb, const0.aaa, temp0.ggg, const0.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(0)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but need to set up alpha source for rgb usage */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(0)) | - R300_ALU_ALPHA_ADDR1(0) | - R300_ALU_ALPHA_ADDR2(0) | - R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD temp1.rgb, const1.rgb, temp0.bbb, temp1.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(1)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(1) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MAD result.rgb, const2.rgb, temp0.rrr, temp1.rgb */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(R300_ALU_RGB_CONST(2)) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(1) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | - (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RRR) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | - R300_ALU_RGB_CLAMP)); - /* write alpha 1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | - R300_ALU_ALPHA_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); - - if (needgamma) { - /* rgb temp0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb temp0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha lg2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* MUL const1, temp1, temp0 */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_ADDR1(0) | - R300_ALU_RGB_ADDR2(0) | - R300_ALU_RGB_ADDRD(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | - R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | - R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | - R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | - R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); - /* alpha nop, but set up const1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.r = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.r */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.g = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.g */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - - /* rgb out0.b = op_sop, set up src0 reg */ - OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | - R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); - OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), - R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | - R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); - /* alpha ex2 temp0, temp0.b */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | - R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | - R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | - R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | - R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); - } + else { + FUNC_NAME(R300SetupPixelShaderYUVtoRGB)(pScrn, pPriv); } - - /* Shader constants. */ - /* constant 0: off, yco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24(off[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(off[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(yco)); - /* constant 1: uco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); - /* constant 2: vco */ - OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); - OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); - OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(vco[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0)); - - FINISH_ACCEL(); } BEGIN_ACCEL_RELOC(6, 2); @@ -2957,6 +2063,8 @@ FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) OUT_ACCEL_REG(R300_US_OUT_FMT_0, output_fmt); FINISH_ACCEL(); + + /* setup pixel shader */ if (pPriv->bicubic_state != BICUBIC_OFF) { if (pPriv->bicubic_enabled) { @@ -3527,6 +2635,7 @@ FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) FINISH_ACCEL(); } } else { + /* * y' = y - .0625 * u' = u - .5 -- 1.7.3.4