diff --git a/src/radeon.h b/src/radeon.h index a8acf9a..6d3b1c7 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -377,6 +377,11 @@ typedef enum { (info->ChipFamily == CHIP_FAMILY_RS400) || \ (info->ChipFamily == CHIP_FAMILY_RS480)) +#define IS_R200_3D ((info->ChipFamily == CHIP_FAMILY_RV250) || \ + (info->ChipFamily == CHIP_FAMILY_RV280) || \ + (info->ChipFamily == CHIP_FAMILY_RS300) || \ + (info->ChipFamily == CHIP_FAMILY_R200)) + /* * Errata workarounds */ diff --git a/src/radeon_accelfuncs.c b/src/radeon_accelfuncs.c index 45eb6d5..2d6fe01 100644 --- a/src/radeon_accelfuncs.c +++ b/src/radeon_accelfuncs.c @@ -1345,10 +1345,7 @@ FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a) xf86DrvMsg(pScrn->scrnIndex, X_INFO, "XAA Render acceleration " "unsupported on Radeon 9500/9700 and newer. " "Please use EXA instead.\n"); - } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200)) { + } else if (IS_R200_3D) { a->SetupForCPUToScreenAlphaTexture2 = FUNC_NAME(R200SetupForCPUToScreenAlphaTexture); a->SubsequentCPUToScreenAlphaTexture = diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c index a9bc7d2..3dbe617 100644 --- a/src/radeon_commonfuncs.c +++ b/src/radeon_commonfuncs.c @@ -565,10 +565,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA); OUT_ACCEL_REG(R300_SC_SCREENDOOR, 0xffffff); FINISH_ACCEL(); - } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200)) { + } else if (IS_R200_3D) { BEGIN_ACCEL(6); if (info->ChipFamily == CHIP_FAMILY_RS300) { diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c index 59cb46f..6a2b25c 100644 --- a/src/radeon_exa_funcs.c +++ b/src/radeon_exa_funcs.c @@ -505,10 +505,7 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) info->accel_state->exa->DoneComposite = FUNC_NAME(RadeonDoneComposite); } else xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Composite requires CP on R5xx/IGP\n"); - } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200)) { + } else if (IS_R200_3D) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " "enabled for R200 type cards.\n"); info->accel_state->exa->CheckComposite = R200CheckComposite; diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index 79671c0..bf8a276 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -128,6 +128,21 @@ static __inline__ uint32_t float4touint(float fr, float fg, float fb, float fa) return (ua << 24) | (ur << 16) | (ug << 8) | ub; } +/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces + note the difference to the parameters used in overlay are due + to 10bit vs. float calcs */ +static REF_TRANSFORM trans[2] = +{ + {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ + {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ +}; + + +#define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) +#define RTFBrightness(a) (((a)*1.0)/2000.0) +#define RTFContrast(a) (1.0 + ((a)*1.0)/1000.0) +#define RTFHue(a) (((a)*3.1416)/1000.0) + #define ACCEL_MMIO #define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) @@ -359,12 +374,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, } pPriv->planar_hw = pPriv->planar_state; - if (pPriv->bicubic_enabled || !( IS_R300_3D || - (info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200) )) - pPriv->planar_hw = 0; + if (pPriv->bicubic_enabled || !( IS_R300_3D || IS_R200_3D )) + pPriv->planar_hw = 0; switch(id) { case FOURCC_YV12: @@ -636,28 +647,58 @@ static XF86VideoFormatRec Formats[NUM_FORMATS] = {15, TrueColor}, {16, TrueColor}, {24, TrueColor} }; -#define NUM_ATTRIBUTES 2 +#define NUM_ATTRIBUTES 1 static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] = { {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, + {0, 0, 0, NULL} +}; + +#define NUM_ATTRIBUTES_R200 7 + +static XF86AttributeRec Attributes_r200[NUM_ATTRIBUTES_R200+1] = +{ + {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"}, + {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, + {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, + {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, + {XvSettable | XvGettable, 100, 10000, "XV_COLORSPACE"}, {0, 0, 0, NULL} }; -#define NUM_ATTRIBUTES_R300 3 +#define NUM_ATTRIBUTES_R300 9 static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] = { {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"}, {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"}, + {XvSettable | XvGettable, -1000, 1000, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, -1000, 1000, "XV_CONTRAST"}, + {XvSettable | XvGettable, -1000, 1000, "XV_SATURATION"}, + {XvSettable | XvGettable, -1000, 1000, "XV_HUE"}, + {XvSettable | XvGettable, 100, 10000, "XV_GAMMA"}, + {XvSettable | XvGettable, 0, 1, "XV_COLORSPACE"}, + {0, 0, 0, NULL} +}; + +#define NUM_ATTRIBUTES_R500 2 + +static XF86AttributeRec Attributes_r500[NUM_ATTRIBUTES_R500+1] = +{ + {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"}, + {XvSettable | XvGettable, 0, 1, "XV_VSYNC"}, {0, 0, 0, NULL} }; static Atom xvBicubic; static Atom xvVSync; static Atom xvHWPlanar; +static Atom xvBrightness, xvContrast, xvSaturation, xvHue; +static Atom xvGamma, xvColorspace; #define NUM_IMAGES 4 @@ -686,6 +727,18 @@ RADEONGetTexPortAttribute(ScrnInfoPtr pScrn, *value = pPriv->vsync; else if (attribute == xvHWPlanar) *value = pPriv->planar_state; + else if (attribute == xvBrightness) + *value = pPriv->brightness; + else if (attribute == xvContrast) + *value = pPriv->contrast; + else if (attribute == xvSaturation) + *value = pPriv->saturation; + else if (attribute == xvHue) + *value = pPriv->hue; + else if (attribute == xvGamma) + *value = pPriv->gamma; + else if(attribute == xvColorspace) + *value = pPriv->transform_index; else return BadMatch; @@ -709,6 +762,20 @@ RADEONSetTexPortAttribute(ScrnInfoPtr pScrn, pPriv->vsync = ClipValue (value, 0, 1); else if (attribute == xvHWPlanar) pPriv->planar_state = ClipValue (value, 0, 1); + else if (attribute == xvHWPlanar) + pPriv->planar_state = ClipValue (value, 0, 1); + else if (attribute == xvBrightness) + pPriv->brightness = ClipValue (value, -1000, 1000); + else if (attribute == xvContrast) + pPriv->contrast = ClipValue (value, -1000, 1000); + else if (attribute == xvSaturation) + pPriv->saturation = ClipValue (value, -1000, 1000); + else if (attribute == xvHue) + pPriv->hue = ClipValue (value, -1000, 1000); + else if (attribute == xvGamma) + pPriv->gamma = ClipValue (value, 100, 10000); + else if(attribute == xvColorspace) + pPriv->transform_index = ClipValue (value, 0, 1); else return BadMatch; @@ -733,6 +800,12 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) xvBicubic = MAKE_ATOM("XV_BICUBIC"); xvVSync = MAKE_ATOM("XV_VSYNC"); xvHWPlanar = MAKE_ATOM("XV_HWPLANAR"); + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); + xvSaturation = MAKE_ATOM("XV_SATURATION"); + xvHue = MAKE_ATOM("XV_HUE"); + xvGamma = MAKE_ATOM("XV_GAMMA"); + xvColorspace = MAKE_ATOM("XV_COLORSPACE"); adapt->type = XvWindowMask | XvInputMask | XvImageMask; adapt->flags = 0; @@ -752,10 +825,19 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) pPortPriv = (RADEONPortPrivPtr)(&adapt->pPortPrivates[num_texture_ports]); - if (IS_R300_3D || IS_R500_3D) { + if (IS_R300_3D) { adapt->pAttributes = Attributes_r300; adapt->nAttributes = NUM_ATTRIBUTES_R300; - } else { + } + else if (IS_R500_3D) { + adapt->pAttributes = Attributes_r500; + adapt->nAttributes = NUM_ATTRIBUTES_R500; + } + else if (IS_R200_3D) { + adapt->pAttributes = Attributes_r200; + adapt->nAttributes = NUM_ATTRIBUTES_R200; + } + else { adapt->pAttributes = Attributes; adapt->nAttributes = NUM_ATTRIBUTES; } @@ -783,6 +865,12 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) pPriv->bicubic_state = BICUBIC_AUTO; pPriv->vsync = TRUE; pPriv->planar_state = 1; + pPriv->brightness = 0; + pPriv->contrast = 0; + pPriv->saturation = 0; + pPriv->hue = 0; + pPriv->gamma = 1000; + pPriv->transform_index = 0; /* gotta uninit this someplace, XXX: shouldn't be necessary for textured */ REGION_NULL(pScreen, &pPriv->clip); diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c index 05acb93..6492534 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c @@ -743,9 +743,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv * DP3 might look like the straightforward solution * but we'd need to move the texture yuv values in * the same reg for this to work. Therefore use MADs. - * Without changing the shader at all (only the constants) - * could also provide hue/saturation/brightness/contrast control. - * + * Brightness just adds to the off constant. + * Contrast is multiplication of luminance. + * Saturation and hue change the u and v coeffs. + * Default values (before adjustments - depend on colorspace): * yco = 1.1643 * uco = 0, -0.39173, 2.017 * vco = 1.5958, -0.8129, 0 @@ -757,14 +758,46 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv * temp = MAD(uco, yuv.uuuu, temp) * result = MAD(vco, yuv.vvvv, temp) */ - float yco = 1.1643; - float uco[3] = {0.0, -0.39173, 2.018}; - float vco[3] = {1.5958, -0.8129, 0.0}; - float off[3] = {-0.0625 * yco + -0.5 * uco[0] + -0.5 * vco[0], - -0.0625 * yco + -0.5 * uco[1] + -0.5 * vco[1], - -0.0625 * yco + -0.5 * uco[2] + -0.5 * vco[2]}; - - BEGIN_ACCEL(33); + /* TODO: don't recalc consts always */ + const float Loff = -0.0627; + const float Coff = -0.502; + float uvcosf, uvsinf; + float yco; + float uco[3], vco[3], off[3]; + float bright, cont, gamma; + int ref = pPriv->transform_index; + Bool needgamma = FALSE; + + cont = RTFContrast(pPriv->contrast); + bright = RTFBrightness(pPriv->brightness); + gamma = (float)pPriv->gamma / 1000.0; + uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); + uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); + /* overlay video also does pre-gamma contrast/sat adjust, should we? */ + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; + off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; + off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; + + if (gamma != 1.0) { + needgamma = TRUE; + /* note: gamma correction is out = in ^ gamma; + gpu can only do LG2/EX2 therefore we transform into + in ^ gamma = 2 ^ (log2(in) * gamma). + Lots of scalar ops, unfortunately (better solution?) - + without gamma that's 3 inst, with gamma it's 10... + could use different gamma factors per channel, + if that's of any use. */ + } + + BEGIN_ACCEL(needgamma ? 28 + 33 : 33); /* 2 components: same 2 for tex0/1/2 */ OUT_ACCEL_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | @@ -779,12 +812,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R300_FIRST_TEX)); OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | - R300_ALU_CODE_SIZE(3) | + R300_ALU_CODE_SIZE(needgamma ? 7 + 3 : 3) | R300_TEX_CODE_OFFSET(0) | R300_TEX_CODE_SIZE(3))); OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | - R300_ALU_SIZE(2) | + R300_ALU_SIZE(needgamma ? 7 + 2 : 2) | R300_TEX_START(0) | R300_TEX_SIZE(2) | R300_RGBA_OUT)); @@ -857,7 +890,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R300_ALU_RGB_ADDR2(0) | R300_ALU_RGB_ADDRD(0) | R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | - R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB))); + (needgamma ? 0 : R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)))); OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | @@ -868,14 +901,126 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | R300_ALU_RGB_CLAMP)); /* write alpha 1 */ - OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) | R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | R300_ALU_ALPHA_TARGET_A)); - OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0))); + if (needgamma) { + /* rgb temp0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb temp0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha lg2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* MUL const1, temp1, temp0 */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | + R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC0_AAA) | + R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | + R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE))); + /* alpha nop, but set up const1 */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_ADDR0(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.r = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.r */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.g = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.g */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_G) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + + /* rgb out0.b = op_sop, set up src0 reg */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B))); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), + R300_ALU_RGB_OP(R300_ALU_RGB_OP_SOP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + /* alpha ex2 temp0, temp0.b */ + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE))); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0))); + } + /* Shader constants. */ /* constant 0: off, yco */ OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24(off[0])); @@ -886,7 +1031,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(uco[0])); OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(uco[1])); OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(uco[2])); - OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0)); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(gamma)); /* constant 2: vco */ OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(vco[0])); OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(vco[1])); @@ -1601,20 +1746,52 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv FINISH_ACCEL(); - if ((info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200)) { + if (IS_R200_3D) { info->accel_state->texW[0] = pPriv->w; info->accel_state->texH[0] = pPriv->h; if (isplanar) { /* note: in contrast to r300, use input biasing on uv components */ - float yco = 1.1643; - float yoff = -0.0625 * yco; - float uco[3] = {0.0, -0.39173, 2.018}; - float vco[3] = {1.5958, -0.8129, 0.0}; + const float Loff = -0.0627; + float uvcosf, uvsinf; + float yco, yoff; + float uco[3], vco[3]; + float bright, cont, sat; + int ref = pPriv->transform_index; + float ucscale = 0.25, vcscale = 0.25; + Bool needux8 = FALSE, needvx8 = FALSE; + + /* contrast can cause constant overflow, clamp */ + cont = RTFContrast(pPriv->contrast); + if (cont * trans[ref].RefLuma > 2.0) + cont = 2.0 / trans[ref].RefLuma; + /* brightness is only from -0.5 to 0.5 should be safe */ + bright = RTFBrightness(pPriv->brightness); + /* saturation can also cause overflow, clamp */ + sat = RTFSaturation(pPriv->saturation); + if (sat * trans[ref].RefBCb > 4.0) + sat = 4.0 / trans[ref].RefBCb; + uvcosf = sat * cos(RTFHue(pPriv->hue)); + uvsinf = sat * sin(RTFHue(pPriv->hue)); + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + yoff = Loff * yco + bright; + + if ((uco[0] > 2.0) || (uco[2] > 2.0)) { + needux8 = TRUE; + ucscale = 0.125; + } + if ((vco[0] > 2.0) || (vco[2] > 2.0)) { + needvx8 = TRUE; + vcscale = 0.125; + } /* need 2 texcoord sets (even though they are identical) due to denormalization! hw apparently can't premultiply @@ -1678,7 +1855,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv * seems the values we need seem to fit better than worst case (get about * 6 fractional bits for this instead of 5, at least when not correcting for * hue/saturation/contrast/brightness, which is the same as for vco - yco and - * yoff get 8 fractional bits). + * yoff get 8 fractional bits). Try to preserve as much accuracy as possible + * even with non-default saturation/hue/contrast/brightness adjustments, + * it gets a little crazy and ultimately precision might still be lacking. * * A higher precision (8 fractional bits) version might just put uco into * a texcoord, and calculate a new vcoconst in the shader, like so: @@ -1709,7 +1888,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_ARG_B_R0_COLOR | R200_TXC_ARG_C_TFACTOR_COLOR | - R200_TXC_NEG_ARG_C | + (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | R200_TXC_OP_DOT2_ADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, (0 << R200_TXC_TFACTOR_SEL_SHIFT) | @@ -1730,7 +1909,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_SCALE_ARG_A | R200_TXC_ARG_B_R1_COLOR | R200_TXC_BIAS_ARG_B | - R200_TXC_SCALE_ARG_B | + (needux8 ? R200_TXC_SCALE_ARG_B : 0) | R200_TXC_ARG_C_R0_COLOR | R200_TXC_OP_MADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, @@ -1751,6 +1930,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_SCALE_ARG_A | R200_TXC_ARG_B_R2_COLOR | R200_TXC_BIAS_ARG_B | + (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | R200_TXC_ARG_C_R0_COLOR | R200_TXC_OP_MADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, @@ -1767,28 +1947,64 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); /* shader constants */ - OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ - yco - 1.0, - -yoff, /* range [-1, 0] */ + OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ + yco > 1.0 ? yco - 1.0: yco, + yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 0.0)); - OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ - uco[1] * 0.125 + 0.5, - uco[2] * 0.125 + 0.5, + OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ + uco[1] * ucscale + 0.5, /* or [-2, 2] */ + uco[2] * ucscale + 0.5, 0.0)); - OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ - vco[1] * 0.25 + 0.5, - vco[2] * 0.25 + 0.5, + OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ + vco[1] * vcscale + 0.5, /* or [-4, 4] */ + vco[2] * vcscale + 0.5, 0.0)); FINISH_ACCEL(); } else if (info->ChipFamily == CHIP_FAMILY_RV250) { /* fix up broken packed yuv - shader same as above except - yuv compoents are all in same reg */ - float yco = 1.1643; - float yoff = -0.0625 * yco; - float uco[3] = {0.0, -0.39173, 2.018}; - float vco[3] = {1.5958, -0.8129, 0.0}; + yuv components are all in same reg */ + /* note: in contrast to r300, use input biasing on uv components */ + const float Loff = -0.0627; + float uvcosf, uvsinf; + float yco, yoff; + float uco[3], vco[3]; + float bright, cont, sat; + int ref = pPriv->transform_index; + float ucscale = 0.25, vcscale = 0.25; + Bool needux8 = FALSE, needvx8 = FALSE; + + /* contrast can cause constant overflow, clamp */ + cont = RTFContrast(pPriv->contrast); + if (cont * trans[ref].RefLuma > 2.0) + cont = 2.0 / trans[ref].RefLuma; + /* brightness is only from -0.5 to 0.5 should be safe */ + bright = RTFBrightness(pPriv->brightness); + /* saturation can also cause overflow, clamp */ + sat = RTFSaturation(pPriv->saturation); + if (sat * trans[ref].RefBCb > 4.0) + sat = 4.0 / trans[ref].RefBCb; + uvcosf = sat * cos(RTFHue(pPriv->hue)); + uvsinf = sat * sin(RTFHue(pPriv->hue)); + + yco = trans[ref].RefLuma * cont; + uco[0] = -trans[ref].RefRCr * uvsinf; + uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; + uco[2] = trans[ref].RefBCb * uvcosf; + vco[0] = trans[ref].RefRCr * uvcosf; + vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; + vco[2] = trans[ref].RefBCb * uvsinf; + yoff = Loff * yco + bright; + + if ((uco[0] > 2.0) || (uco[2] > 2.0)) { + needux8 = TRUE; + ucscale = 0.125; + } + if ((vco[0] > 2.0) || (vco[2] > 2.0)) { + needvx8 = TRUE; + vcscale = 0.125; + } txformat0 = (((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) | (((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << RADEON_TEX_VSIZE_SHIFT)); @@ -1824,7 +2040,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_ARG_B_R0_COLOR | R200_TXC_ARG_C_TFACTOR_COLOR | - R200_TXC_NEG_ARG_C | + (yoff < 0 ? R200_TXC_NEG_ARG_C : 0) | R200_TXC_OP_DOT2_ADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, (0 << R200_TXC_TFACTOR_SEL_SHIFT) | @@ -1846,7 +2062,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_SCALE_ARG_A | R200_TXC_ARG_B_R0_COLOR | R200_TXC_BIAS_ARG_B | - R200_TXC_SCALE_ARG_B | + (needux8 ? R200_TXC_SCALE_ARG_B : 0) | R200_TXC_ARG_C_R1_COLOR | R200_TXC_OP_MADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_1, @@ -1868,6 +2084,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXC_SCALE_ARG_A | R200_TXC_ARG_B_R0_COLOR | R200_TXC_BIAS_ARG_B | + (needvx8 ? R200_TXC_SCALE_ARG_B : 0) | R200_TXC_ARG_C_R1_COLOR | R200_TXC_OP_MADD); OUT_ACCEL_REG(R200_PP_TXCBLEND2_2, @@ -1885,17 +2102,17 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); /* shader constants */ - OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(1.0, /* src range [1, 2] */ - yco - 1.0, - -yoff, /* range [-1, 0] */ + OUT_ACCEL_REG(R200_PP_TFACTOR_0, float4touint(yco > 1.0 ? 1.0 : 0.0, /* range special [0, 2] */ + yco > 1.0 ? yco - 1.0: yco, + yoff < 0 ? -yoff : yoff, /* range special [-1, 1] */ 0.0)); - OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * 0.125 + 0.5, /* range [-4, 4] */ - uco[1] * 0.125 + 0.5, - uco[2] * 0.125 + 0.5, + OUT_ACCEL_REG(R200_PP_TFACTOR_1, float4touint(uco[0] * ucscale + 0.5, /* range [-4, 4] */ + uco[1] * ucscale + 0.5, /* or [-2, 2] */ + uco[2] * ucscale + 0.5, 0.0)); - OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * 0.25 + 0.5, /* range [-2, 2] */ - vco[1] * 0.25 + 0.5, - vco[2] * 0.25 + 0.5, + OUT_ACCEL_REG(R200_PP_TFACTOR_2, float4touint(vco[0] * vcscale + 0.5, /* range [-2, 2] */ + vco[1] * vcscale + 0.5, /* or [-4, 4] */ + vco[2] * vcscale + 0.5, 0.0)); FINISH_ACCEL(); diff --git a/src/radeon_video.c b/src/radeon_video.c index a2a4696..8b71a0c 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -544,18 +544,6 @@ static XF86ImageRec Images[NUM_IMAGES] = #endif -/* Reference color space transform data */ -typedef struct tagREF_TRANSFORM -{ - float RefLuma; - float RefRCb; - float RefRCr; - float RefGCb; - float RefGCr; - float RefBCb; - float RefBCr; -} REF_TRANSFORM; - /* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces */ static REF_TRANSFORM trans[2] = { @@ -563,7 +551,6 @@ static REF_TRANSFORM trans[2] = {1.1678, 0.0, 1.7980, -0.2139, -0.5345, 2.1186, 0.0} /* BT.709 */ }; - /* Gamma curve definition for preset gammas */ typedef struct tagGAMMA_CURVE_R100 { diff --git a/src/radeon_video.h b/src/radeon_video.h index 34fb07f..2f00206 100644 --- a/src/radeon_video.h +++ b/src/radeon_video.h @@ -123,6 +123,18 @@ typedef struct { int vsync; } RADEONPortPrivRec, *RADEONPortPrivPtr; +/* Reference color space transform data */ +typedef struct tagREF_TRANSFORM +{ + float RefLuma; + float RefRCb; + float RefRCr; + float RefGCb; + float RefGCr; + float RefBCb; + float RefBCr; +} REF_TRANSFORM; + int radeon_covering_crtc_num(ScrnInfoPtr pScrn, int x1, int x2, int y1, int y2,