commit c717eb9fdb0a95f2d171e09569b707dfb70bee53 Author: Kusanagi Kouichi Date: 2011-03-27 17:07:49 +0900 Xv: Add RGB formats to r600+ X8R8G8B8, A8R8G8B8, R5G6B5, X1R5G5B5 and A1R5G5B5. Non-premultiplied alpha. Brightness, contrast, saturation and hue. Little- and big-endian. diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c index bbdd7a7..515c8dc 100644 --- a/src/evergreen_shader.c +++ b/src/evergreen_shader.c @@ -751,7 +751,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_ALU_DWORD0(ADDR(5), + shader[i++] = CF_ALU_DWORD0(ADDR(6), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -764,7 +764,19 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) WHOLE_QUAD_MODE(0), BARRIER(1)); /* 1 */ - shader[i++] = CF_DWORD0(ADDR(21), + shader[i++] = CF_DWORD0(ADDR(36), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_JUMP), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(22), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -775,7 +787,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 2 */ + /* 3 */ shader[i++] = CF_DWORD0(ADDR(30), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -787,8 +799,8 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 3 */ - shader[i++] = CF_ALU_DWORD0(ADDR(9), + /* 4 */ + shader[i++] = CF_ALU_DWORD0(ADDR(10), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); @@ -800,7 +812,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ + /* 5 */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), RW_GPR(2), @@ -817,7 +829,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), MARK(0), BARRIER(1)); - /* 5 interpolate tex coords */ + /* 6 interpolate tex coords */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), @@ -841,7 +853,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 6 */ + /* 7 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -865,7 +877,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 7 */ + /* 8 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), @@ -889,7 +901,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 8 */ + /* 9 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -914,7 +926,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(0)); - /* 9,10,11,12 */ + /* 10,11,12,13 */ /* r2.x = MAD(c0.w, r1.x, c0.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0), SRC0_REL(ABSOLUTE), @@ -1004,7 +1016,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(0)); - /* 13,14,15,16 */ + /* 14,15,16,17 */ /* r2.x = MAD(c1.x, r1.y, pv.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1), SRC0_REL(ABSOLUTE), @@ -1093,7 +1105,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 17,18,19,20 */ + /* 18,19,20,21 */ /* r2.x = MAD(c2.x, r1.z, pv.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2), SRC0_REL(ABSOLUTE), @@ -1160,7 +1172,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* r2.w = MAD(0, 0, 1) */ + /* r2.w = MAD(0, 0, r1.w) */ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -1172,9 +1184,9 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), + SRC2_ELEM(ELEM_W), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), @@ -1183,7 +1195,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(1)); - /* 21 */ + /* 22 */ shader[i++] = CF_DWORD0(ADDR(24), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -1195,7 +1207,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TC), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 22 */ + /* 23 */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -1207,9 +1219,6 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 23 */ - shader[i++] = 0x00000000; - shader[i++] = 0x00000000; /* 24/25 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), INST_MOD(0), @@ -1385,6 +1394,312 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; + /* 36 - RGB */ + shader[i++] = CF_DWORD0(ADDR(40), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_TC), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 37 */ + shader[i++] = CF_ALU_DWORD0(ADDR(42), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(10), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 38 */ + shader[i++] = CF_ALU_DWORD0(ADDR(10), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 39 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 40/41 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(2), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 42,43,44 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_0_5), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_0_5), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 45,46,47 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 48,49,50,51 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(0)); + return i; } diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c index 147cd4e..f9e2f4d 100644 --- a/src/evergreen_textured_videofuncs.c +++ b/src/evergreen_textured_videofuncs.c @@ -59,6 +59,12 @@ static REF_TRANSFORM trans[2] = {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ }; +static const float rgb_to_yuv[2][9] = +{ + {0.299, 0.587, 0.114, -0.168736, -0.331264, 0.5, 0.5, -0.418688, -0.081312}, + {0.2126, 0.7102, 0.0772, -0.114572, -0.385428, 0.5, 0.5, -0.454153, -0.045847}, +}; + void EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { @@ -171,7 +177,17 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) src_obj.pitch = pPriv->src_pitch; src_obj.width = pPriv->w; src_obj.height = pPriv->h; - src_obj.bpp = 16; + switch(pPriv->id) { + case X8R8G8B8_LE: + case X8R8G8B8_BE: + case A8R8G8B8_LE: + case A8R8G8B8_BE: + src_obj.bpp = 32; + break; + default: + src_obj.bpp = 16; + break; + } src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; @@ -214,9 +230,11 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) break; case FOURCC_UYVY: case FOURCC_YUY2: - default: evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); break; + default: + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 1)); + break; } /* Shader */ @@ -331,7 +349,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) break; case FOURCC_UYVY: case FOURCC_YUY2: - default: accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; /* Y texture */ @@ -405,6 +422,84 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_samp.id = 1; evergreen_set_tex_sampler(pScrn, &tex_samp); break; + default: + accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; + + /* RGB texture */ + tex_res.id = 0; + tex_res.w = accel_state->src_obj[0].width; + tex_res.h = accel_state->src_obj[0].height; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + tex_res.bo = accel_state->src_obj[0].bo; + tex_res.mip_bo = accel_state->src_obj[0].bo; + + tex_res.dst_sel_x = SQ_SEL_Z; + tex_res.dst_sel_y = SQ_SEL_Y; + tex_res.dst_sel_z = SQ_SEL_X; + switch(pPriv->id) { + case X8R8G8B8_BE: + tex_res.endian = ENDIAN_8IN32; + case X8R8G8B8_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 2; + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case A8R8G8B8_BE: + tex_res.endian = ENDIAN_8IN32; + case A8R8G8B8_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 2; + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_w = SQ_SEL_W; + break; + case R5G6B5_BE: + tex_res.endian = ENDIAN_8IN16; + case R5G6B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_5_6_5; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case X1R5G5B5_BE: + tex_res.endian = ENDIAN_8IN16; + case X1R5G5B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_1_5_5_5; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case A1R5G5B5_BE: + tex_res.endian = ENDIAN_8IN16; + case A1R5G5B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_1_5_5_5; + tex_res.dst_sel_w = SQ_SEL_W; + break; + } + + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + if (accel_state->src_obj[0].tiling_flags == 0) + tex_res.array_mode = 1; + evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain); + + /* RGB sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + /* xxx: switch to bicubic */ + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + evergreen_set_tex_sampler(pScrn, &tex_samp); + break; } cb_conf.id = 0; @@ -445,6 +540,9 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.array_mode = 1; cb_conf.non_disp_tiling = 1; } + cb_conf.blendcntl = (BLEND_SRC_ALPHA << COLOR_SRCBLEND_shift) | + (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift) | + CB_BLEND0_CONTROL__ENABLE_bit; evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain); evergreen_set_spi(pScrn, (1 - 1), 1); @@ -471,6 +569,21 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_alu_consts[10] = vco[2]; ps_alu_consts[11] = 0.0; + ps_alu_consts[12] = rgb_to_yuv[ref][0]; + ps_alu_consts[13] = rgb_to_yuv[ref][3]; + ps_alu_consts[14] = rgb_to_yuv[ref][6]; + ps_alu_consts[15] = -Loff; + + ps_alu_consts[16] = rgb_to_yuv[ref][1]; + ps_alu_consts[17] = rgb_to_yuv[ref][4]; + ps_alu_consts[18] = rgb_to_yuv[ref][7]; + ps_alu_consts[19] = -Coff; + + ps_alu_consts[20] = rgb_to_yuv[ref][2]; + ps_alu_consts[21] = rgb_to_yuv[ref][5]; + ps_alu_consts[22] = rgb_to_yuv[ref][8]; + ps_alu_consts[23] = -Coff; + radeon_vbo_commit(pScrn, &accel_state->cbuf); evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT); diff --git a/src/r600_shader.c b/src/r600_shader.c index ab2f485..1d08bad 100644 --- a/src/r600_shader.c +++ b/src/r600_shader.c @@ -654,7 +654,19 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(16)); + shader[i++] = CF_DWORD0(ADDR(32)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_JUMP), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 1 */ + shader[i++] = CF_DWORD0(ADDR(17)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_BOOL), @@ -665,8 +677,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 1 */ - shader[i++] = CF_DWORD0(ADDR(24)); + /* 2 */ + shader[i++] = CF_DWORD0(ADDR(26)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_NOT_BOOL), @@ -677,8 +689,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); - /* 2 */ - shader[i++] = CF_ALU_DWORD0(ADDR(4), + /* 3 */ + shader[i++] = CF_ALU_DWORD0(ADDR(5), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -690,7 +702,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 3 */ + /* 4 */ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), TYPE(SQ_EXPORT_PIXEL), RW_GPR(2), @@ -708,7 +720,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_EXPORT_DONE), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4,5,6,7 */ + /* 5,6,7,8 */ /* r2.x = MAD(c0.w, r1.x, c0.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 0), SRC0_REL(ABSOLUTE), @@ -798,7 +810,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(0)); - /* 8,9,10,11 */ + /* 9,10,11,12 */ /* r2.x = MAD(c1.x, r1.y, pv.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 1), SRC0_REL(ABSOLUTE), @@ -887,7 +899,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 12,13,14,15 */ + /* 13,14,15,16 */ /* r2.x = MAD(c2.x, r1.z, pv.x) */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 2), SRC0_REL(ABSOLUTE), @@ -954,7 +966,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* r2.w = MAD(0, 0, 1) */ + /* r2.w = MAD(0, 0, r1.w) */ shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), @@ -966,9 +978,9 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); - shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1), + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1), SRC2_REL(ABSOLUTE), - SRC2_ELEM(ELEM_X), + SRC2_ELEM(ELEM_W), SRC2_NEG(0), ALU_INST(SQ_OP3_INST_MULADD), BANK_SWIZZLE(SQ_ALU_VEC_012), @@ -977,8 +989,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) DST_ELEM(ELEM_W), CLAMP(1)); - /* 16 */ - shader[i++] = CF_DWORD0(ADDR(18)); + /* 17 */ + shader[i++] = CF_DWORD0(ADDR(20)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -989,7 +1001,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 17 */ + /* 18 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1001,7 +1013,10 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 18/19 */ + /* 19 */ + shader[i++] = 0x00000000; + shader[i++] = 0x00000000; + /* 20/21 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1029,7 +1044,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 20/21 */ + /* 22/23 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1057,7 +1072,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 22/23 */ + /* 24/25 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1085,8 +1100,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 24 */ - shader[i++] = CF_DWORD0(ADDR(26)); + /* 26 */ + shader[i++] = CF_DWORD0(ADDR(28)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), @@ -1097,7 +1112,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TEX), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 25 */ + /* 27 */ shader[i++] = CF_DWORD0(ADDR(0)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -1109,7 +1124,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 26/27 */ + /* 28/29 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1137,7 +1152,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_Z(SQ_SEL_0), SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 28/29 */ + /* 30/31 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), BC_FRAC_MODE(0), FETCH_WHOLE_QUAD(0), @@ -1166,6 +1181,312 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader) SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; + /* 32 RGB */ + shader[i++] = CF_DWORD0(ADDR(36)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(1), + CALL_COUNT(0), + END_OF_PROGRAM(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_TEX), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 33 */ + shader[i++] = CF_ALU_DWORD0(ADDR(38), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(10), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 34 */ + shader[i++] = CF_ALU_DWORD0(ADDR(5), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(12), + USES_WATERFALL(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + /* 35 */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(2), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + R6xx_ELEM_LOOP(0), + BURST_COUNT(1), + END_OF_PROGRAM(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 36/37 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + BC_FRAC_MODE(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + R7xx_ALT_CONST(0)); + shader[i++] = TEX_DWORD1(DST_GPR(2), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 38,39,40 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_CFILE_BASE + 3), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_W), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_0_5), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 3), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_0_5), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 41,42,43 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(0)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 4), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Y), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(0)); + + /* 44,45,46,47 */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_X), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Y), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_CFILE_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 2), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_Z), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV), + SRC2_REL(ABSOLUTE), + SRC2_ELEM(ELEM_Z), + SRC2_NEG(0), + ALU_INST(SQ_OP3_INST_MULADD), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(SQ_ALU_SRC_0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_LOOP), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(ChipSet, + SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + FOG_MERGE(0), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); + return i; } diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 4ff0833..618e074 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -57,6 +57,12 @@ static REF_TRANSFORM trans[2] = {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ }; +static const float rgb_to_yuv[2][9] = +{ + {0.299, 0.587, 0.114, -0.168736, -0.331264, 0.5, 0.5, -0.418688, -0.081312}, + {0.2126, 0.7102, 0.0772, -0.114572, -0.385428, 0.5, 0.5, -0.454153, -0.045847}, +}; + void R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { @@ -107,7 +113,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) float bright, cont, gamma; int ref = pPriv->transform_index; Bool needgamma = FALSE; - float ps_alu_consts[12]; + float ps_alu_consts[24]; float vs_alu_consts[4]; cont = RTFContrast(pPriv->contrast); @@ -158,6 +164,21 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_alu_consts[10] = vco[2]; ps_alu_consts[11] = 0.0; + ps_alu_consts[12] = rgb_to_yuv[ref][0]; + ps_alu_consts[13] = rgb_to_yuv[ref][3]; + ps_alu_consts[14] = rgb_to_yuv[ref][6]; + ps_alu_consts[15] = -Loff; + + ps_alu_consts[16] = rgb_to_yuv[ref][1]; + ps_alu_consts[17] = rgb_to_yuv[ref][4]; + ps_alu_consts[18] = rgb_to_yuv[ref][7]; + ps_alu_consts[19] = -Coff; + + ps_alu_consts[20] = rgb_to_yuv[ref][2]; + ps_alu_consts[21] = rgb_to_yuv[ref][5]; + ps_alu_consts[22] = rgb_to_yuv[ref][8]; + ps_alu_consts[23] = -Coff; + CLEAR (cb_conf); CLEAR (tex_res); CLEAR (tex_samp); @@ -181,7 +202,17 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) src_obj.pitch = pPriv->src_pitch; src_obj.width = pPriv->w; src_obj.height = pPriv->h; - src_obj.bpp = 16; + switch(pPriv->id) { + case X8R8G8B8_LE: + case X8R8G8B8_BE: + case A8R8G8B8_LE: + case A8R8G8B8_BE: + src_obj.bpp = 32; + break; + default: + src_obj.bpp = 16; + break; + } src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; src_obj.bo = pPriv->src_bo[pPriv->currentBuffer]; @@ -223,9 +254,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) break; case FOURCC_UYVY: case FOURCC_YUY2: - default: r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); break; + default: + r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 1)); + break; } /* Shader */ @@ -346,7 +379,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) break; case FOURCC_UYVY: case FOURCC_YUY2: - default: accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; /* Y texture */ @@ -422,6 +454,85 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_samp.id = 1; r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); break; + default: + accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h; + + /* RGB texture */ + tex_res.id = 0; + tex_res.w = accel_state->src_obj[0].width; + tex_res.h = accel_state->src_obj[0].height; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_obj[0].offset; + tex_res.mip_base = accel_state->src_obj[0].offset; + tex_res.size = accel_state->src_size[0]; + tex_res.bo = accel_state->src_obj[0].bo; + tex_res.mip_bo = accel_state->src_obj[0].bo; + + tex_res.dst_sel_x = SQ_SEL_Z; + tex_res.dst_sel_y = SQ_SEL_Y; + tex_res.dst_sel_z = SQ_SEL_X; + switch(pPriv->id) { + case X8R8G8B8_BE: + tex_res.endian = ENDIAN_8IN32; + case X8R8G8B8_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 2; + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case A8R8G8B8_BE: + tex_res.endian = ENDIAN_8IN32; + case A8R8G8B8_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 2; + tex_res.format = FMT_8_8_8_8; + tex_res.dst_sel_w = SQ_SEL_W; + break; + case R5G6B5_BE: + tex_res.endian = ENDIAN_8IN16; + case R5G6B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_5_6_5; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case X1R5G5B5_BE: + tex_res.endian = ENDIAN_8IN16; + case X1R5G5B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_1_5_5_5; + tex_res.dst_sel_w = SQ_SEL_1; + break; + case A1R5G5B5_BE: + tex_res.endian = ENDIAN_8IN16; + case A1R5G5B5_LE: + tex_res.pitch = accel_state->src_obj[0].pitch >> 1; + tex_res.format = FMT_1_5_5_5; + tex_res.dst_sel_w = SQ_SEL_W; + break; + } + + tex_res.request_size = 1; + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + if (accel_state->src_obj[0].tiling_flags == 0) + tex_res.tile_mode = 1; + r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain); + + /* RGB sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + /* xxx: switch to bicubic */ + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp); + break; } cb_conf.id = 0; @@ -460,6 +571,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) cb_conf.rop = 3; if (accel_state->dst_obj.tiling_flags == 0) cb_conf.array_mode = 1; + cb_conf.blendcntl = (BLEND_SRC_ALPHA << COLOR_SRCBLEND_shift) | + (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift); r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain); r600_set_spi(pScrn, accel_state->ib, (1 - 1), 1); diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c index d247db6..6fb5c43 100644 --- a/src/radeon_textured_video.c +++ b/src/radeon_textured_video.c @@ -325,6 +325,13 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, srcPitch = (width << 1); srcPitch2 = 0; break; + case X8R8G8B8_LE: + case X8R8G8B8_BE: + case A8R8G8B8_LE: + case A8R8G8B8_BE: + srcPitch = width << 2; + dstPitch = RADEON_ALIGN(dst_width << 2, pPriv->hw_align); + break; } size = dstPitch * aligned_height + 2 * dstPitch2 * RADEON_ALIGN(((aligned_height + 1) >> 1), h_align); @@ -467,6 +474,19 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn, pPriv->src_addr + (top * dstPitch), srcPitch, dstPitch, nlines, width, 2); break; + case X8R8G8B8_LE: + case X8R8G8B8_BE: + case A8R8G8B8_LE: + case A8R8G8B8_BE: + if (info->ChipFamily >= CHIP_FAMILY_R600) + R600CopyData(pScrn, buf + (top * srcPitch), + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width * 4, 1); + else + RADEONCopyData(pScrn, buf + (top * srcPitch), + pPriv->src_addr + (top * dstPitch), + srcPitch, dstPitch, nlines, width, 4); + break; } /* update cliplist */ @@ -657,6 +677,109 @@ static XF86ImageRec Images[NUM_IMAGES] = XVIMAGE_UYVY }; +#define XVIMAGE_X8R8G8B8(id, byte_order) \ + { \ + id, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'A', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 32, \ + XvPacked, \ + 1, \ + 24, 0x00FF0000, 0x0000FF00, 0x000000FF, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'X', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define XVIMAGE_A8R8G8B8(id, byte_order) \ + { \ + id, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'A', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 32, \ + XvPacked, \ + 1, \ + 32, 0x00FF0000, 0x0000FF00, 0x000000FF, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'A', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define XVIMAGE_R5G6B5(id, byte_order) \ + { \ + id, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 0x00, \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 16, 0x0000F800, 0x000007E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define XVIMAGE_X1R5G5B5(id, byte_order) \ + { \ + id, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'T', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 15, 0x00007C00, 0x000003E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'X', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +#define XVIMAGE_A1R5G5B5(id, byte_order) \ + { \ + id, \ + XvRGB, \ + byte_order, \ + { 'R', 'G', 'B', 'T', \ + 0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \ + 16, \ + XvPacked, \ + 1, \ + 16, 0x00007C00, 0x000003E0, 0x0000001F, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {'A', 'R', 'G', 'B', \ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \ + XvTopToBottom \ + } + +static XF86ImageRec Images_r600[] = +{ + XVIMAGE_X8R8G8B8(X8R8G8B8_LE, LSBFirst), + XVIMAGE_X8R8G8B8(X8R8G8B8_BE, MSBFirst), + XVIMAGE_A8R8G8B8(A8R8G8B8_LE, LSBFirst), + XVIMAGE_A8R8G8B8(A8R8G8B8_BE, MSBFirst), + XVIMAGE_R5G6B5(R5G6B5_LE, LSBFirst), + XVIMAGE_R5G6B5(R5G6B5_BE, MSBFirst), + XVIMAGE_X1R5G5B5(X1R5G5B5_LE, LSBFirst), + XVIMAGE_X1R5G5B5(X1R5G5B5_BE, MSBFirst), + XVIMAGE_A1R5G5B5(A1R5G5B5_LE, LSBFirst), + XVIMAGE_A1R5G5B5(A1R5G5B5_BE, MSBFirst), + XVIMAGE_YUY2, + XVIMAGE_YV12, + XVIMAGE_I420, + XVIMAGE_UYVY +}; + int RADEONGetTexPortAttribute(ScrnInfoPtr pScrn, Atom attribute, @@ -862,8 +985,13 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen) adapt->pAttributes = Attributes; adapt->nAttributes = NUM_ATTRIBUTES; } - adapt->pImages = Images; - adapt->nImages = NUM_IMAGES; + if (IS_R600_3D) { + adapt->pImages = Images_r600; + adapt->nImages = RADEON_ARRAY_SIZE(Images_r600); + } else { + adapt->pImages = Images; + adapt->nImages = RADEON_ARRAY_SIZE(Images); + } adapt->PutVideo = NULL; adapt->PutStill = NULL; adapt->GetVideo = NULL; diff --git a/src/radeon_video.c b/src/radeon_video.c index 58e3920..7b4ea96 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -3124,6 +3124,10 @@ RADEONQueryImageAttributes( size += tmp; break; case FOURCC_RGBA32: + case X8R8G8B8_LE: + case X8R8G8B8_BE: + case A8R8G8B8_LE: + case A8R8G8B8_BE: size = *w << 2; if(pitches) pitches[0] = size; size *= *h; diff --git a/src/radeon_video.h b/src/radeon_video.h index 3a4a709..f7b23a5 100644 --- a/src/radeon_video.h +++ b/src/radeon_video.h @@ -17,6 +17,19 @@ #define ClipValue(v,min,max) ((v) < (min) ? (min) : (v) > (max) ? (max) : (v)) +enum { + X8R8G8B8_LE = 1, + X8R8G8B8_BE, + A8R8G8B8_LE, + A8R8G8B8_BE, + R5G6B5_LE, + R5G6B5_BE, + X1R5G5B5_LE, + X1R5G5B5_BE, + A1R5G5B5_LE, + A1R5G5B5_BE +}; + /* Xvideo port struct */ typedef struct { uint32_t transform_index;