diff --git a/pixman/src/fbmmx.c b/pixman/src/fbmmx.c index a99168c..f6f512f 100644 --- a/pixman/src/fbmmx.c +++ b/pixman/src/fbmmx.c @@ -2135,6 +2135,232 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pi } void +fbCompositeIn_nx8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD8 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + CARD32 src; + CARD8 sa; + __m64 vsrc, vsrca; + + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + fbComposeGetSolid(pSrc, pDst, src); + + sa = src >> 24; + if (sa == 0) + return; + + vsrc = load8888(src); + vsrca = expand_alpha(vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + if ((((unsigned long)pDst & 3) == 0) && + (((unsigned long)pSrc & 3) == 0)) + { + while (w >= 4) + { + CARD32 m; + __m64 vmask; + __m64 vdest; + + m = 0; + + vmask = load8888 (*(CARD32 *)mask); + vdest = load8888 (*(CARD32 *)dst); + + *(CARD32 *)dst = store8888 (in (in (vsrca, vmask), vdest)); + + dst += 4; + mask += 4; + w -= 4; + } + } + + while (w--) + { + CARD16 tmp; + CARD8 a; + CARD32 m, d; + CARD32 r; + + a = *mask++; + d = *dst; + + m = FbInU (sa, 0, a, tmp); + r = FbInU (m, 0, d, tmp); + + *dst++ = r; + } + } + + _mm_empty(); +} + +void +fbCompositeIn_8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD8 *dstLine, *dst; + CARD8 *srcLine, *src; + FbStride srcStride, dstStride; + CARD16 w; + + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + if ((((unsigned long)pDst & 3) == 0) && + (((unsigned long)pSrc & 3) == 0)) + { + while (w >= 4) + { + CARD32 *s = (CARD32 *)src; + CARD32 *d = (CARD32 *)dst; + + *d = store8888 (in (load8888 (*s), load8888 (*d))); + + w -= 4; + dst += 4; + src += 4; + } + } + + while (w--) + { + CARD8 s, d; + CARD16 tmp; + + s = *src; + d = *dst; + + *dst = FbInU (s, 0, d, tmp); + + src++; + dst++; + } + } + + _mm_empty (); +} + +void +fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD8 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + CARD32 src; + CARD8 sa; + __m64 vsrc, vsrca; + + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + + fbComposeGetSolid(pSrc, pDst, src); + + sa = src >> 24; + if (sa == 0) + return; + + vsrc = load8888(src); + vsrca = expand_alpha(vsrc); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + if ((((unsigned long)pMask & 3) == 0) && + (((unsigned long)pDst & 3) == 0)) + { + while (w >= 4) + { + __m64 vmask = load8888 (*(CARD32 *)mask); + __m64 vdest = load8888 (*(CARD32 *)dst); + + *(CARD32 *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); + + w -= 4; + dst += 4; + mask += 4; + } + } + + while (w--) + { + CARD16 tmp; + CARD16 a; + CARD32 m, d; + CARD32 r; + + a = *mask++; + d = *dst; + + m = FbInU (sa, 0, a, tmp); + r = FbAdd (m, d, 0, tmp); + + *dst++ = r; + } + } + + _mm_empty(); +} + +void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op, PicturePtr pSrc, PicturePtr pMask, diff --git a/pixman/src/fbmmx.h b/pixman/src/fbmmx.h index 531bcba..5c08180 100644 --- a/pixman/src/fbmmx.h +++ b/pixman/src/fbmmx.h @@ -45,6 +45,20 @@ pixman_private void fbComposeSetupMMX(void); pixman_private +void fbCompositeIn_nx8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private void fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op, PicturePtr pSrc, PicturePtr pMask, @@ -109,6 +123,35 @@ void fbCompositeSolidMaskSrc_nx8x8888mmx INT16 yDst, CARD16 width, CARD16 height); + +pixman_private +void fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + +pixman_private +void fbCompositeIn_8x8mmx (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height); + pixman_private void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op, PicturePtr pSrc, diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c index 0cdec3f..2019cbc 100644 --- a/pixman/src/fbpict.c +++ b/pixman/src/fbpict.c @@ -844,6 +844,58 @@ fbCompositeSrcAdd_8888x8888 (pixman_oper } static void +fbCompositeSrcAdd_8888x8x8 (pixman_operator_t op, + PicturePtr pSrc, + PicturePtr pMask, + PicturePtr pDst, + INT16 xSrc, + INT16 ySrc, + INT16 xMask, + INT16 yMask, + INT16 xDst, + INT16 yDst, + CARD16 width, + CARD16 height) +{ + CARD8 *dstLine, *dst; + CARD8 *maskLine, *mask; + FbStride dstStride, maskStride; + CARD16 w; + CARD32 src; + CARD8 sa; + + fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1); + fbComposeGetSolid (pSrc, pDst, src); + sa = (src >> 24); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + + while (w--) + { + CARD16 tmp; + CARD16 a; + CARD32 m, d; + CARD32 r; + + a = *mask++; + d = *dst; + + m = FbInU (sa, 0, a, tmp); + r = FbAdd (m, d, 0, tmp); + + *dst++ = r; + } + } +} + +static void fbCompositeSrcAdd_1000x1000 (pixman_operator_t op, PicturePtr pSrc, PicturePtr pMask, @@ -1759,6 +1811,26 @@ #endif break; } } + else + { + if ((pSrc->format_code == PICT_a8r8g8b8 || + pSrc->format_code == PICT_a8b8g8r8) && + srcRepeat && + pMask->format_code == PICT_a8 && + pDst->format_code == PICT_a8) + { +#ifdef USE_MMX + if (fbHaveMMX()) + { + srcRepeat = FALSE; + + func = fbCompositeSrcAdd_8888x8x8mmx; + } + else +#endif + func = fbCompositeSrcAdd_8888x8x8; + } + } break; case PIXMAN_OPERATOR_SRC: if (pMask) @@ -1798,10 +1870,34 @@ #endif } } break; + case PIXMAN_OPERATOR_IN: +#ifdef USE_MMX + if (pSrc->format_code == PICT_a8 && + pDst->format_code == PICT_a8 && + !pMask) + { + if (fbHaveMMX()) + func = fbCompositeIn_8x8mmx; + } + else if (srcRepeat && pMask && !pMask->componentAlpha && + (pSrc->format_code == PICT_a8r8g8b8 || + pSrc->format_code == PICT_a8b8g8r8) && + (pMask->format_code == PICT_a8) && + pDst->format_code == PICT_a8) + { + if (fbHaveMMX()) + { + srcRepeat = FALSE; + func = fbCompositeIn_nx8x8mmx; + } + } +#else + func = NULL; +#endif + break; case PIXMAN_OPERATOR_CLEAR: case PIXMAN_OPERATOR_DST: case PIXMAN_OPERATOR_OVER_REVERSE: - case PIXMAN_OPERATOR_IN: case PIXMAN_OPERATOR_IN_REVERSE: case PIXMAN_OPERATOR_OUT: case PIXMAN_OPERATOR_OUT_REVERSE: