From db26f593d519b866ba84f69fd4674522ffee2566 Mon Sep 17 00:00:00 2001 From: dafox Date: Mon, 3 Jan 2011 13:06:43 +0100 Subject: [PATCH 2/3] Implement hue/contrast/brightness/saturation adjustments for RGB textured-Xv --- src/radeon_textured_video.c | 3 +- src/radeon_textured_video_pixelshaders.c | 1306 +++++++++++++++++++++++++++++- src/radeon_textured_videofuncs.c | 11 +- 3 files changed, 1309 insertions(+), 11 deletions(-) diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index f61fbde..c4c89f7 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -420,9 +420,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, case FOURCC_RGB16: case FOURCC_RGBT16: default: + srcPitch = width << 1; dstPitch = RADEON_ALIGN(dst_width << 1, hw_align); - srcPitch = (width << 1); - srcPitch2 = 0; break; } diff --git a/src/radeon_textured_video_pixelshaders.c b/src/radeon_textured_video_pixelshaders.c index be7cef1..5a0d211 100644 --- a/src/radeon_textured_video_pixelshaders.c +++ b/src/radeon_textured_video_pixelshaders.c @@ -974,16 +974,13 @@ static void FUNC_NAME(R300SetupPixelShaderYUVtoRGB)(ScrnInfoPtr pScrn, RADEONPor FINISH_ACCEL(); } -static void FUNC_NAME(R300SetupPixelShaderRGB)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { +/** + * A pixel shader which does nothing more than pass through the input pixel + * to the output pixel. Used when there is no need to do gamma/hue/brightness/contrast etc. + */ +static void FUNC_NAME(R300SetupPixelShaderRGBNop)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { RADEONInfoPtr info = RADEONPTR(pScrn); ACCEL_PREAMBLE(); - /* Needs to perform (in RGB space): - * - gamma correction, - * - contrast, - * - saturation, - * - brightness, - * But for now it is just a raw pass-through - */ BEGIN_ACCEL(11); @@ -1084,3 +1081,1296 @@ static void FUNC_NAME(R300SetupPixelShaderRGB)(ScrnInfoPtr pScrn, RADEONPortPriv R300_ALU_ALPHA_CLAMP)); FINISH_ACCEL(); } + +/** + * A pixel shader which performs gamma/hue/brightness/contrast adjustment on + * RGB pixels. Used when the gamma/hue/brightness/contrast does require + * adjustment. + * + * Steps: + * 1) Transform RGB -> HSL + * 2) Apply contrast (multiplication of lightness value L) + * 3) Apply brightness (addition with lightness value L) + * 4) Perform gamma correction (power of lightness value L) + * Apply saturation (addition with saturation value S) + * Apply hue (addition with hue value H) + * 5) Transform HSL -> RGB + * + * The order of contrast, brightness and gamma is probably important and was + * taken from mplayer's vf_eq2 source code. (They probably know the right order + * for this stuff). + * + * + * + * Definitions: + * == Convert RGB to HSL == (from the wikipedia article on HSL) + * M(AX) = max(R, G, B) + * m(in) = min(R, G, B) + * C(hroma) = M - m + * H' = undefined (0), if C=0 + * ((G - B) / C + 0) % 6, if M=R + * ((B - R) / C + 2) % 6, if M=G + * ((R - G) / C + 4) % 6, if M=B + * H(ue) = 60 * H' + * L(ightness) = (M + m) / 2 + * S(aturation) = C / (1 - |2*L - 1|) + * + * == Convert HSL to RGB == (from the wikipedia article on HSL) + * C(hroma) = (1 - |2*L - 1|) * S(aturation) + * H' = H(ue) / 60 + * X = C * (1 - | mod2(H') - 1|) + * (R', G', B') = (0, 0, 0), if H is undefined (0) + * = (C, X, 0), if 0 <= H' < 1 + * = (X, C, 0), if 1 <= H' < 2 + * = (0, C, X), if 2 <= H' < 3 + * = (0, X, C), if 3 <= H' < 4 + * = (X, 0, C), if 4 <= H' < 5 + * = (C, 0, X), if 5 <= H' < 6 + * m(in) = L(ightness) - 0.5 * C(hroma) + * (R, G, B) = (R' + m, G' + m, B' + m) + * + * + * + * Code: + * 1) TEX_LD(pixel_stack[0], pixel_stack[0], input_texture) // pixel_stack[0] contains the input color/pixel/texel + * + * + * First we compute C (all on the RGB ALU): + * 2) R_MAX(pixel_stack[0].rrr, pixel_stack[0].ggg, , pixel_stack[1].r) + * A_NOP + * 3) R MAX(pixel_stack[1].rrr, pixel_stack[0].bbb, , pixel_stack[1].r) // pixel_stack[1].r = M + * A_NOP + * 4) R MIN(pixel_stack[0].rrr, pixel_stack[0].ggg, , pixel_stack[1].g) + * A_NOP + * 5) R MIN(pixel_stack[1].ggg, pixel_stack[0].bbb, , pixel_stack[1].g) // pixel_stack[1].g = m + * A_NOP + * 6) R_MAD(pixel_stack[1].rrr, 1.0, -pixel_stack[1].ggg, pixel_stack[1].b) // pixel_stack[1].b = C + * + * Together with the last computation step of C we compute L (on the Alpha ALU): + * A_MAD(pixel_stack[1].r, 1.0, pixel_stack[1].g, 0.5 * pixel_stack[4].a) // pixel_stack[4].a = L = (M + m) / 2 + * + * + * Now we have to compute H'. We calculate the three cases in parallel, + * and then use a DP3 with a mask-vector to get at the result. However we + * may need to do a fix-up on the result, since more than one branch of the + * case-statement can be true. However when this happens both branches give + * the same result. Consider for example cyan, (0,1,1): M=1, m=0, C=1, + * and both M=G and M=B, but then also both ((B-R) / C + 2) = 3 and + * ((R-G) / C + 4) = 3. Therefor we count the number of 'true' branches, and + * divide H' by that. Note that in order for this to work we need to have + * the values H' in the right range, e.g. for case M=R we need 0..1 or 5..6, + * so that the case H'=5 will work properly. Most of these calculations + * happen on the RGB ALU. Meanwhile we use the Alpha ALU to compute 1/C, S, + * and adjust the brightness and contrast with L: + * NOTE: We use (6, 8, 10) instead of (0, 2, 4) because the add-all-and-divide + * tactic fails for example with pink (0xff00ff). This yields -1 and 5, + * for cases M=R and M=B wich are equal modulo six, but (-1 + 5) / 2 = 2 + * instead of 5, so we shift the whole range to to 5..11 and perform a + * modulo 6. Then we have hue in [0..6] as required, and we get + * ((-1+6)+(5+6))/2 = 8. + * We keep H' in [0..1] so that we can do the later modulus's at once + * without doing a division first. + * 7) R_MAD(-pixel_stack[1].rrr, 1, pixel_stack[0].rgb, pixel_stack[2].rgb) // pixel_stack[2].rgb = (r - M, g - M, b - M) + * A_RCP(pixel_stack[1].b, , , pixel_stack[1].a) // pixel_stack[1].a = 1/C + * 8) R_MAD(1.0, pixel_stack[0].gbr, -pixel_stack[0].brg, pixel_stack[3].rgb) // pixel_stack[3].rgb = (g-b, b-r, r-g) + * A_MAD(NABS(1 - 2*pixel_stack[4].a), 1, 1, pixel_stack[3].a) // pixel_stack[3].a = 1 + -|1 - 2*L| = 1 - |2*L - 1| + * 9) R_MAD(pixel_stack[3].rgb, pixel_stack[1].aaa, (6, 8, 10), pixel_stack[3].rgb) // pixel_stack[3].rgb = ((g-b)/C + 0, (b-r)/C + 2, (r-g)/C + 4) + 6 = 3 possible values of H' (depending on wether M=R, M=G or M=B) plus 6, to prepare for modulo + * A_RCP(pixel_stack[3].a, , , pixel_stack[3].a) // pixel_stack[3].a = 1 / ( 1 - |2*L - 1|) + * 10) R_MAD(pixel_stack[3].rgb, 1/6, 0, pixel_stack[3].rgb) // pixel_stack[3].rgb = pixel_stack[3].rgb / 6 (prepare for modulo) = (6/6..7/6 | 11/6..12/6, 7/6..9/6, 9/6..11/6) + * A_MAD(pixel_stack[4].a, contrast, brightness, pixel_stack[4].a) // pixel_stack[4].a = L, with adjusted contrast & brightness + * 11) R_FRC(pixel_stack[3].rgb, , , pixel_stack[3].rgb) // pixel_stack[3].rgb = pixel_stack[3].rgb mod 6, so now -1..0 --> 5..6 = (0..1/6 | 5/6..1, 1/6..3/6, 3/6..5/6) + * A_MAD(pixel_stack[3].a, pixel_stack[1].b, 0, pixel_stack[3].a) // pixel_stack[3].a = C / (1 - |2*L - 1|) = S + * + * Note: The next instruction should enable clamping for the alpha (saturation) channel + * 12) R_CMP(1, 0, pixel_stack[2].rgb, pixel_stack[2].rgb) // pixel_stack[2].rgb = vector which is 1 where x=M and 0 otherwise + * A_MAD(pixel_stack[3].aaa, 1, saturation, pixel_stack[3].a) // pixel_stack[3].a = Adjusted saturation + * 13) R_DP3(pixel_stack[2].rgb, (1, 1, 1), , ) // The next instruction can not use an RGB input for the Alpa ALU since the DP3 needs to operate on both pixel_stack[3].rgb and pixel_stack[2].rgb, which conflicts with the RCP on the alpha unit. + * A_DP(, , , pixel_stack[1].a) // pixel_stack[1].a = 'nr of true branches' + * 14) R_DP3(pixel_stack[3].rgb, pixel_stack[2].rgb, , pixel_stack[4].r) // pixel_stack[4].r = H' * 'nr of true branches' + * A_RCP(pixel_stack[1].a, , , pixel_stack[1].a) // pixel_stack[1].a = 1 / 'nr of true branches' + * 15) R_MAD(pixel_stack[4].rrr, pixel_stack[1].aaa, (0,0,0), pixel_stack[4].r) // pixel_stack[4].r = H' + * + * + * At this point we have pixel_stack[4].ra = HL, pixel_stack[3].a = S + * Next up: do gamma correction with L (pixel_stack[4].a) and hue adjustment + * with H (pixel_stack[4].r). + * Note: Hue is pre-computed as ( + 6) / 6. This is because at this point + * H' is [0..1], and the FRC doesn't work as we want unless all + * value are `on the same side' of 0, e.g. all values should be kept + * positive. + * Gamma correction is out = in ^ gamma, but the r3xx gpu can only do LG2/EX2. + * Therefore we transform into in ^ gamma = 2 ^ (log2(in) * gamma). + * We're really still on instruction 15 + * A_LN2(pixel_stack[4].a, , , pixel_stack[4].a) // pixel_stack[4].a = log2(i) + * 16) R_MAD(pixel_stack[4].rrr, 1, hue, pixel_stack[4].r) // pixel_stack[4].r = adjust hue (no wrap-around yet) + * A_MAD(pixel_stack[4].a, gamma, 0, pixel_stack[4].a) // pixel_stack[4].a = log2(i) * gamma + * + * Note: The next instruction should enable clamping for the alpha (brightness) channel + * 17) R_FRC(pixel_stack[4].rrr, , , pixel_stack[4].r) // pixel_stack[4].r = H'/6 mod 1/6 (wrap-around hue) + * A_EX2(pixel_stack[4].a, , , pixel_stack[4].a) // pixel_stack[4].a = 2 ^ (log2(i) * gamma) + * + * + * Now that we have performed all adjustments we have to do the whole thing + * in reverse and produce RGB again. Start with calculating the new Chroma. + * NOTE: in instructions 18 and 19 we perform a CMP with -C (negative of the + * input Chroma). Because 0 <= C <= 1, this means that this comparison + * is true if and only if C==0, i.e. at the point that Hue and + * Saturation are undefined. We use this to set those to 0 in that case. + * 18) R_MAD(NABS(1 - 2*pixel_stack[4].a), 1, 1, pixel_stack[4].b) // pixel_stack[4].b = 1 - |1 - 2*L| = 1 - |2*L - 1| + * A_CMP(0, pixel_stack[3].a, -pixel_stack[1].b, pixel_stack[3].a) // pixel_stack[3].a = 0, if C was 0 initially, S otherwise + * 19) R_MAD(pixel_stack[4].b, pixel_stack[3].a, 0, pixel_stack[4].b) // pixel_stack[4].b = (1 - |2*L - 1|) * S = C + * A_CMP(0, pixel_stack[4].r, -pixel_stack[1].b, pixel_stack[3].a) // pixel_stack[3].a = 0, if C was 0 initially, H' otherwise + * + * + * Now we calculate m(in) and the output values (C+m, X+m, 0+m). + * NOTE: m(in) = = L(ightness) - 0.5 * C(hroma), + * X = C * (1 - | mod2(H') - 1|). + * We also do some preparatory work to get the final color swizzles + * right, see the comments later on for more details. + * 20) R_MAD(pixel_stack[4].bbb, -0.5, pixel_stack[4].aaa, pixel_stack[2].b) // pixel_stack[2].b = L - 0.5*C = m(in) + * A_MAD(pixel_stack[3].a, 6, 0, 0.5 * pixel_stack[2].a) // pixel_stack[2].a = H=0..1 -> H=0..3 == H=(0..6)/2 + * 21) R_MAD(pixel_stack[4].bbb, 1, pixel_stack[2].bbb, pixel_stack[2].r) // pixel_stack[2].r = C + m + * A_FRC(pixel_stack[2].a, , , 2 * pixel_stack[2].a) // pixel_stack[2].a = mod2(H), 0..2 + * 22) R_NOP // Next RGB instruction depends on H' which we still need to calculate. + * A_MAD(NABS(1 - pixel_stack[2].a), 1, 1, pixel_stack[2].a) // pixel_stack[2].a = 1 - | mod2(H') - 1 | + * 23) R_MAD(pixel_stack[4].bbb, pixel_stack[2].aaa, pixel_stack[2].bbb, pixel_stack[2].g) // pixel_stack[2].g = C * (1 - |mod2(H') - 1|) + m = X + m + * A_MAD(pixel_stack[4].b, 1, pixel_stack[2].b, pixel_stack[2].a) // pixel_stack[2].a = C + m = pixel_stack[2].r, to create pixel_stack[1].rgb=(C+m, 0+m, X+m) from pixel_stack[2].abg=(C+m, X+m, 0+m) + * 24) R_MAD(1, pixel_stack[2].abg, 0, pixel_stack[1].rgb) // pixel_stack[1].rgb = pixel_stack[2].rbg + * A_MAD(pixel_stack[3].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H' ([-1 .. 5]/6) + * + * Now we have that: + * pixel_stack[2].r = C + m pixel_stack[1].r = C + m + * pixel_stack[2].g = X + m pixel_stack[1].g = 0 + m + * pixel_stack[2].b = 0 + m pixel_stack[1].b = X + m + * pixel_stack[2].a = H' % 6, [-1/6..5/6] + * And we can perform the final construction of the output RGB pixel. Note + * that we can not perform swizzles GRB and BGR directly, so we set up pixel + * stack[1].rgb = stack[2].rbg = (C+M, 0+m, X+m), and use that with an + * additional swizzle to make pixel_stack[2].grb and pixel_stack[2].bgr. + * Only the last of these instructions needs to write to the output fifo. + * 25) R_CMP(pixel_stack[2].rgb, pixel_stack[1].brg, pixel_stack[2].aaa, pixel_stack[0].rgb) // -1 <= H < 0 || 0 <= h < 1, (C, X, 0) || (X, C, 0) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + * 26) R_CMP(pixel_stack[2].brg, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 1 <= H < 2, (0, C, X) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + * 27) R_CMP(pixel_stack[1].gbr, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 2 <= H < 3, (0, X, C) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + * 28) R_CMP(pixel_stack[2].gbr, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 3 <= H < 4, (X, 0, C) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + * 29) R_CMP(pixel_stack[1].rgb, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 4 <= H < 5, (C, 0, X) + * A_NOP + * + */ +static void FUNC_NAME(R300SetupPixelShaderRGBFull)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { + #define DEBUG 0 + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + + /** + * Specifies the number of OUT_ACCEL_REG() operations used to program + * the shader into the hardware registers. + * 6 to setup the HW, 3*4 for the constants, 1 texture lookup, 4 for + * each RGB/Alpha ALU instruction. + */ + #define NR_RGBA_INSTRUCTIONS 28 + BEGIN_ACCEL(6 + 3*4 + 1 + NR_RGBA_INSTRUCTIONS*4 + DEBUG*4); // 1*4 debug + + /** + * Specifies that there will be 2 'texture address components' used. + * E.g. the (u,v) location where texture 0 is sampled uses 2 + * components, 'u' and 'v'. (?) + */ + OUT_ACCEL_REG(R300_RS_COUNT, ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) | R300_RS_COUNT_HIRES_EN)); + + // FIXME: what does this mean? 'number of rasterizer instructions' + /* R300_INST_COUNT_RS - highest RS instruction used */ + OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0)); + + /** + * Specifies the number of temporary variables that can be used in this + * shader, e.g. temp[0..n] are valid to use in this shader. + */ + OUT_ACCEL_REG(R300_US_PIXSIZE, 4); /* highest temp used */ + + // FIXME: What does this mean? 'specifies the valid indirection levels' + // current setting (0) means 'Level 3 only (normal DX7-style texturing)' + /* Indirection levels */ + OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX)); + + + /** + * ALU_CODE_SIZE is the total number of instructions, + * the R300_CODE_US_ADDR_* fields are for when you are using texture + * lookups. - tstellar, IRC + */ + OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) | + R300_ALU_CODE_SIZE(NR_RGBA_INSTRUCTIONS) | + R300_TEX_CODE_OFFSET(0) | + R300_TEX_CODE_SIZE(1))); + + /** + * r300 can only do 4 "sets" of texture lookups, each time you start a + * new "set" of texture lookups you need to load your code into a new + * R300_US_CODE_ADDR slot. - tstellar, IRC + */ + OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) | + R300_ALU_SIZE(NR_RGBA_INSTRUCTIONS-1) | + R300_TEX_START(0) | + R300_TEX_SIZE(0) | + R300_RGBA_OUT)); + + // TODO: find out why RTFSaturation and friends are defined the way + // they are, it prevents us from using them here directly. + // TODO: merge const(0).r and const(1).g would not gain much, due to hue/gamma. _maybe_ if only use 6 and calculate 1/6 in shader + + + // XV_HUE = -1000 .. 1000 + // XV_SATURATION = -1000 .. 1000 + // XV_BRIGHTNESS = -1000 .. 1000 + // XV_CONTRAST = -1000 .. 1000 + // XV_GAMMA = 100 .. 10000 + double hue = (2000-pPriv->hue) / 2000.0; // Note: -pPriv->hue because the YUV code rotates hue in the opposite direction + double saturation = pPriv->saturation / 1000.0; + double brightness = pPriv->brightness / 1000.0; + double contrast = pPriv->contrast / 1000.0; + double gamma = pPriv->gamma / 1000.0; + + double saturation_mul = 0.0, saturation_add = 0.0; + if(pPriv->saturation < 0) { + saturation_mul = 1.0 + saturation; + saturation_add = 0.0; + } + else { + saturation_mul = 1.0 - saturation; + saturation_add = saturation; + } + + double brightness_mul = 0.0, brightness_add = 0.0; + if(pPriv->brightness < 0) { + brightness_mul = 1.0 + brightness; + brightness_add = 0.0; + } + else { + brightness_mul = 1.0 - brightness; + brightness_add = brightness; + } + + double contrast_mul = 1.0 + contrast; + double contrast_add = -0.5 * contrast; + + // NOTE: We apply contrast first because applying brightness first + // causes brightness adjust to become ineffective at extreme + // contrast levels. + double lightness_mul = contrast_mul * brightness_mul; + double lightness_add = contrast_add * brightness_mul + brightness_add; + + // constant 0: (6, 8, 10, Saturation) + OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24( 6.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24( 8.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(10.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(saturation_add)); + + // constant 1: (Contrast, 6, 1/6, Brightness) + OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(saturation_mul)); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(lightness_mul)); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(1.0f/6.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(lightness_add)); + + // constant 2: (Hue, , , Gamma) + OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(hue)); + OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(0.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(0.0f)); + OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(gamma)); + + + /** + * 1) TEX_LD(pixel_stack[0], pixel_stack[0], input_texture) // pixel_stack[0] contains the input color/pixel/texel + */ + OUT_ACCEL_REG(R300_US_TEX_INST_0, R300_TEX_SRC_ADDR(0) | + R300_TEX_DST_ADDR(0) | + R300_TEX_ID(0) | + R300_TEX_INST(R300_TEX_INST_LD)); + + /** + * 2) R_MAX(pixel_stack[0].rrr, pixel_stack[0].ggg, , pixel_stack[1].r) + * A_NOP + * + * FIXME: Strangely enough we're the only shader to specify the render target? + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAX) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 3) R MAX(pixel_stack[1].rrr, pixel_stack[0].bbb, , pixel_stack[1].r) // pixel_stack[1].r = M + * A_NOP + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAX) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 4) R MIN(pixel_stack[0].rrr, pixel_stack[0].ggg, , pixel_stack[1].g) + * A_NOP + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GGG) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MIN) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 5) R MIN(pixel_stack[1].ggg, pixel_stack[0].bbb, , pixel_stack[1].g) // pixel_stack[1].g = m + * A_NOP + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MIN) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + /** + * 6) R_MAD(pixel_stack[1].rrr, 1.0, -pixel_stack[1].ggg, pixel_stack[1].b) // pixel_stack[1].b = C + * A_MAD(pixel_stack[1].r, 1.0, -pixel_stack[1].g, 0.5 * pixel_stack[4].a) // pixel_stack[4].a = L + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_GGG) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NEG) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), R300_ALU_ALPHA_ADDR0(1) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(1) | + R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_R) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_G) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_DIV_2)); + + /** + * 7) R_MAD(-pixel_stack[1].rrr, 1, pixel_stack[0].rgb, pixel_stack[2].rgb) // pixel_stack[2].rgb = (r - M, g - M, b - M) + * A_RCP(pixel_stack[1].b, , , pixel_stack[1].a) // pixel_stack[1].a = 1/C + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(5), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(5), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NEG) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(5), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(5), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_RCP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 8) R_MAD(1.0, pixel_stack[0].gbr, -pixel_stack[0].brg, pixel_stack[3].rgb) // pixel_stack[3].rgb = (g-b, b-r, r-g) + * A_MAD(NABS(1 - 2*pixel_stack[4].a), 1, 1, pixel_stack[3].a) // pixel_stack[3].a = 1 + -|1 - 2*L| = 1 - |2*L - 1| + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(6), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(6), R300_ALU_RGB_SEL_A(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_GBR) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_BRG) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NEG) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(6), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(6), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRCP_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NAB) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SRCP_OP(R300_ALU_ALPHA_SRCP_OP_1_MINUS_2RGB0) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 9) R_MAD(pixel_stack[3].rgb, pixel_stack[1].aaa, (6, 8, 10), pixel_stack[3].rgb) // pixel_stack[3].rgb = ((g-b)/C + 0, (b-r)/C + 2, (r-g)/C + 4) + 6 = 3 possible values of H' (depending on wether M=R, M=G or M=B) plus 6, to prepare for modulo + * A_RCP(pixel_stack[3].a, , , pixel_stack[3].a) // pixel_stack[3].a = 1 / ( 1 - |2*L - 1|) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(7), R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(7), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_AAA) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(7), R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(1) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(7), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_RCP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 10) R_MAD(pixel_stack[3].rgb, 1/6, 0, pixel_stack[3].rgb) // pixel_stack[3].rgb = pixel_stack[3].rgb / 6 (prepare for modulo) = (6/6..7/6 | 11/6..12/6, 7/6..9/6, 9/6..11/6) + * A_MAD(pixel_stack[4].a, lightness_mul, lightness_add, pixel_stack[4].a) // pixel_stack[4].a = L, with adjusted contrast & brightness + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(8), R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(8), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_BBB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(8), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(8), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_G) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_A) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 11) R_FRC(pixel_stack[3].rgb, , , pixel_stack[3].rgb) // pixel_stack[3].rgb = pixel_stack[3].rgb mod 6, so now -1..0 --> 5..6 = (0..1/6 | 5/6..1, 1/6..3/6, 3/6..5/6) + * A_MAD(pixel_stack[3].a, pixel_stack[1].b, 0, pixel_stack[3].a) // pixel_stack[3].a = C / (1 - |2*L - 1|) = S + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(9), R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(1) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(3) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(9), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_FRC) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(9), R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(9), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_B) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 12) R_CMP(1, 0, pixel_stack[2].rgb, pixel_stack[2].rgb) // pixel_stack[2].rgb = vector which is 1 where x=M and 0 otherwise + * A_MAD(pixel_stack[3].aaa, saturation_mul, saturation_add, pixel_stack[3].a) // pixel_stack[3].a = Adjusted saturation + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(10), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(10), R300_ALU_RGB_SEL_A(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(10), R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(R300_ALU_ALPHA_CONST(0)) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(10), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_R) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_A) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | + R300_ALU_ALPHA_CLAMP); + + /** + * 13) R_DP3(pixel_stack[2].rgb, (1, 1, 1), , ) // The next instruction can not use an RGB input for the Alpa ALU since it needs to operate on both pixel_stack[3].rgb and pixel_stack[2].rgb + * A_DP(, , , pixel_stack[1].a) // pixel_stack[1].a = 'nr of true braches' + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(11), R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(11), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_DP3) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(11), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(11), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_DP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 14) R_DP3(pixel_stack[3].rgb, pixel_stack[2].rgb, , pixel_stack[4].r) // pixel_stack[4].r = H' * 'nr of true braches' + * A_RCP(pixel_stack[1].a, , , pixel_stack[1].a) // pixel_stack[1].a = 1 / 'nr of true braches' + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(12), R300_ALU_RGB_ADDR0(3) | + R300_ALU_RGB_ADDR1(2) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(12), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_DP3) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(12), R300_ALU_ALPHA_ADDR0(1) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(1) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(12), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_RCP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 15) R_MAD(pixel_stack[4].rrr, pixel_stack[1].aaa, (0,0,0), pixel_stack[4].r) // pixel_stack[4].r = H' + * A_LN2(pixel_stack[4].a, , , pixel_stack[4].a) // pixel_stack[4].a = log2(i) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(13), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(1) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(13), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_AAA) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(13), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(1) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(13), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_LN2) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 16) R_MAD(pixel_stack[4].rrr, 1, hue, pixel_stack[4].r) // pixel_stack[4].r = adjust hue (no wrap-around yet) + * A_MAD(pixel_stack[4].a, gamma, 0, pixel_stack[4].a) // pixel_stack[4].a = log2(i) * gamma + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(14), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(R300_ALU_RGB_CONST(2)) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(14), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RRR) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(14), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(R300_ALU_RGB_CONST(2)) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(14), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_A) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 17) R_FRC(pixel_stack[4].rrr, , , pixel_stack[4].r) // pixel_stack[4].r = H'/6 mod 1/6 (wrap-around hue) + * A_EX2(pixel_stack[4].a, , , pixel_stack[4].a) // pixel_stack[4].a = 2 ^ (log2(i) * gamma) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(15), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(15), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_FRC) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_INSERT_NOP); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(15), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(4) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(15), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_EX2) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) | + R300_ALU_ALPHA_CLAMP); + + /** + * 18) R_MAD(NABS(1 - 2*pixel_stack[4].a), 1, 1, pixel_stack[4].b) // pixel_stack[4].b = 1 - |1 - 2*L| = 1 - |2*L - 1| + * A_CMP(0, pixel_stack[3].a, -pixel_stack[1].b, pixel_stack[3].a) // pixel_stack[3].a = 0, if C was 0 initially, S otherwise + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(16), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(16), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRCP_AAA) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NAB) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(16), R300_ALU_ALPHA_ADDR0(4) | + R300_ALU_ALPHA_ADDR1(3) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(16), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_A) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_SRCP_OP(R300_ALU_ALPHA_SRCP_OP_1_MINUS_2RGB0) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_CMP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 19) R_MAD(pixel_stack[4].b, pixel_stack[3].a, 0, pixel_stack[4].b) // pixel_stack[4].b = (1 - |2*L - 1|) * S = C + * A_CMP(0, pixel_stack[4].r, -pixel_stack[1].b, pixel_stack[3].a) // pixel_stack[3].a = 0, if C was 0 initially, H' otherwise + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(17), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(4) | + R300_ALU_RGB_ADDR2(1) | + R300_ALU_RGB_ADDRD(4) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(17), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BBB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_AAA) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(17), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(3) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(3) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(17), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_R) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_CMP) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 20) R_MAD(pixel_stack[4].bbb, -0.5, pixel_stack[4].aaa, pixel_stack[2].b) // pixel_stack[2].b = L - 0.5*C = m(in) + * A_MAD(pixel_stack[3].a, 6, 0, 0.5 * pixel_stack[2].a) // pixel_stack[2].a = H=0..1 -> H=0..3 == H=(0..6)/2 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(18), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(18), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BBB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_0_5) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NEG) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(18), R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(4) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(18), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRC1_R) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_DIV_2)); + + /** + * 21) R_MAD(pixel_stack[4].bbb, 1, pixel_stack[2].bbb, pixel_stack[2].r) // pixel_stack[2].r = C + m + * A_FRC(pixel_stack[2].a, , , 2 * pixel_stack[2].a) // pixel_stack[2].a = mod2(H), 0..2 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(19), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(19), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BBB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_BBB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) | + R300_ALU_RGB_INSERT_NOP); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(19), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(19), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_FRC) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_MUL_2)); + + /** + * 22) R_NOP // Next RGB instruction depends on H' which we still need to calculate. + * A_MAD(NABS(1 - pixel_stack[2].a), 1, 1, pixel_stack[2].a) // pixel_stack[2].a = 1 - | mod2(H') - 1 | + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(20), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(20), R300_ALU_RGB_SEL_A(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(20), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(20), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRCP_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NAB) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SRCP_OP(R300_ALU_ALPHA_SRCP_OP_1_MINUS_RGB0) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 23) R_MAD(pixel_stack[4].bbb, pixel_stack[2].aaa, pixel_stack[2].bbb, pixel_stack[2].g) // pixel_stack[2].g = C * (1 - |mod2(H') - 1|) + m = X + m + * A_MAD(pixel_stack[4].b, 1, pixel_stack[2].b, pixel_stack[2].a) // pixel_stack[2].a = C + m = pixel_stack[2].r, to create pixel_stack[1].rgb=(C+m, 0+m, X+m) from pixel_stack[2].abg=(C+m, X+m, 0+m) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(21), R300_ALU_RGB_ADDR0(4) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(2) | + R300_ALU_RGB_ADDRD(2) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(21), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BBB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_AAA) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_BBB) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(21), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(2) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(21), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_B) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 24) R_MAD(1, pixel_stack[2].abg, 0, pixel_stack[1].rgb) // pixel_stack[1].rgb = pixel_stack[2].rbg + * A_MAD(pixel_stack[3].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H' ([-1 .. 5]/6) + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(22), R300_ALU_RGB_ADDR0(0) | + R300_ALU_RGB_ADDR1(2) | + R300_ALU_RGB_ADDR2(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDRD(1) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(22), R300_ALU_RGB_SEL_A(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_ABG) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(22), R300_ALU_ALPHA_ADDR0(3) | + R300_ALU_ALPHA_ADDR1(2) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(22), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 25) R_CMP(pixel_stack[1].brg, pixel_stack[2].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // -1 <= H < 0 || 0 <= h < 1, (C, X, 0) || (X, C, 0) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(23), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(2) | + R300_ALU_RGB_ADDR2(R300_ALU_RGB_CONST(1)) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(23), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BRG) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(23), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(23), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 26) R_CMP(pixel_stack[2].brg, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 1 <= H < 2, (0, C, X) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(24), R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(24), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_BRG) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(24), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(24), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 27) R_CMP(pixel_stack[1].gbr, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 2 <= H < 3, (0, X, C) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(25), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(25), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GBR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(25), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(25), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 28) R_CMP(pixel_stack[2].gbr, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 3 <= H < 4, (X, 0, C) + * A_MAD(pixel_stack[2].a, 1, -1/6, pixel_stack[2].a) // pixel_stack[2].a = H - 1/6 + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(26), R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(R300_ALU_ALPHA_CONST(1)) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(26), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GBR) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(26), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_ADDRD(2) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(26), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC2_B) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + /** + * 29) R_CMP(pixel_stack[1].rgb, pixel_stack[0].rgb, pixel_stack[2].aaa, pixel_stack[0].rgb) // 4 <= H < 5, (C, 0, X) + * A_NOP + */ + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(27), R300_ALU_RGB_ADDR0(1) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_NONE) | + #if(DEBUG == 1) + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_NONE) | + #else + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB) | + #endif + + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(27), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_AAA) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_CMP) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)); + + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(27), R300_ALU_ALPHA_ADDR0(0) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(2) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + #if(DEBUG == 1) + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_NONE) | + #else + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + #endif + + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(27), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)); + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /** + * DEBUG output + * hue - pixel_stack[4].r + * saturation - pixel_stack[4].g + * lightness - pixel_stack[4].a + */ + #if(DEBUG == 1) + // RGB + OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(23), R300_ALU_RGB_ADDR0(2) | + R300_ALU_RGB_ADDR1(0) | + R300_ALU_RGB_ADDR2(0) | + R300_ALU_RGB_ADDRD(0) | + R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_NONE) | + R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB) | + R300_ALU_RGB_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_RGB_INST(23), R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) | R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) | R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_5) | R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) | + R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) | + R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_DIV_2) | + R300_ALU_RGB_CLAMP); + + // Alpha + OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(23), R300_ALU_ALPHA_ADDR0(2) | + R300_ALU_ALPHA_ADDR1(0) | + R300_ALU_ALPHA_ADDR2(0) | + R300_ALU_ALPHA_ADDRD(0) | + R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) | + R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE) | + R300_ALU_ALPHA_TARGET_A); + OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(23), R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) | R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) | + R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0) | R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NEG) | + R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) | + R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) + | R300_ALU_ALPHA_CLAMP + ); + #endif + //OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR( 8), R300_ALU_RGB_ADDR0(3) | + //R300_ALU_RGB_ADDR1(0) | + //R300_ALU_RGB_ADDR2(1) | + //R300_ALU_RGB_ADDRD(0) | + //R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_NONE) | + //R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB) | + //R300_ALU_RGB_TARGET_A); + +////A_CMP(0, pixel_stack[2].a, -pixel_stack[1].b, pixel_stack[2].a) // pixel_stack[2].a = 0, if C was 0 initially, mod2(H') otherwise + /* End DEBUG */ + #undef NR_RGBA_INSTRUCTIONS + FINISH_ACCEL(); +} diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c index 8f4e2d7..8fb5af2 100644 --- a/src/radeon_textured_videofuncs.c +++ b/src/radeon_textured_videofuncs.c @@ -1517,7 +1517,16 @@ FUNC_NAME(R300PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) } } else { if(is_rgb_format) { - FUNC_NAME(R300SetupPixelShaderRGB)(pScrn, pPriv); + // If there is no need to perform any adjustments use the NOP shader + if((pPriv->hue == 0) && (pPriv->saturation == 0) && + (pPriv->brightness == 0) && (pPriv->contrast == 0) && + (pPriv->gamma == 1000)) + { + FUNC_NAME(R300SetupPixelShaderRGBNop)(pScrn, pPriv); + } + else { + FUNC_NAME(R300SetupPixelShaderRGBFull)(pScrn, pPriv); + } } else { FUNC_NAME(R300SetupPixelShaderYUVtoRGB)(pScrn, pPriv); -- 1.7.3.4