configure.ac | 32 ++++++++++++++-------------- pixman/Makefile.am | 16 +++++++------- pixman/{pixman-arm.c => pixman-arm-simd.c} | 2 +- pixman/{pixman-arm.h => pixman-arm-simd.h} | 8 +++--- pixman/pixman-pict.c | 12 +++++----- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/configure.ac b/configure.ac index dd095ff..7937f95 100644 --- a/configure.ac +++ b/configure.ac @@ -277,42 +277,42 @@ AC_SUBST(VMX_CFLAGS) AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes) -dnl Check for ARM +dnl Check for ARM SIMD instructions -have_armv6_simd=no -AC_MSG_CHECKING(whether to use ARM assembler) +have_arm_simd=no +AC_MSG_CHECKING(whether to use ARM SIMD assembler) xserver_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS $ARM_CFLAGS" AC_COMPILE_IFELSE([ int main () { asm("uqadd8 r1, r1, r2"); return 0; -}], have_armv6_simd=yes) +}], have_arm_simd=yes) CFLAGS=$xserver_save_CFLAGS -AC_ARG_ENABLE(arm, - [AC_HELP_STRING([--disable-arm], - [disable ARM fast paths])], - [enable_arm=$enableval], [enable_arm=auto]) +AC_ARG_ENABLE(arm-simd, + [AC_HELP_STRING([--disable-arm-simd], + [disable ARM SIMD fast paths])], + [enable_arm_simd=$enableval], [enable_arm_simd=auto]) -if test $enable_arm = no ; then - have_armv6_simd=disabled +if test $enable_arm_simd = no ; then + have_arm_simd=disabled fi -if test $have_armv6_simd = yes ; then - AC_DEFINE(USE_ARM, 1, [use ARM compiler intrinsics]) +if test $have_arm_simd = yes ; then + AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics]) else ARM_CFLAGS= fi -AC_MSG_RESULT($have_armv6_simd) -if test $enable_arm = yes && test $have_armv6_simd = no ; then - AC_MSG_ERROR([ARM intrinsics not detected]) +AC_MSG_RESULT($have_arm_simd) +if test $enable_arm_simd = yes && test $have_arm_simd = no ; then + AC_MSG_ERROR([ARM SIMD intrinsics not detected]) fi AC_SUBST(ARM_CFLAGS) -AM_CONDITIONAL(USE_ARM, test $have_armv6_simd = yes) +AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes) AC_ARG_ENABLE(gtk, diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 339e01e..6d5a643 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -80,14 +80,14 @@ libpixman_1_la_LIBADD += libpixman-sse2.la endif # arm code -if USE_ARM -noinst_LTLIBRARIES += libpixman-arm.la -libpixman_arm_la_SOURCES = \ - pixman-arm.c \ - pixman-arm.h -libpixman_arm_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS) -libpixman_arm_la_LIBADD = $(DEP_LIBS) -libpixman_1_la_LIBADD += libpixman-arm.la +if USE_ARM_SIMD +noinst_LTLIBRARIES += libpixman-arm-simd.la +libpixman_arm_simd_la_SOURCES = \ + pixman-arm-simd.c \ + pixman-arm-simd.h +libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS) +libpixman_arm_simd_la_LIBADD = $(DEP_LIBS) +libpixman_1_la_LIBADD += libpixman-arm-simd.la endif diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c new file mode 100644 index 0000000..c7851cb --- /dev/null +++ b/pixman/pixman-arm-simd.c @@ -0,0 +1,407 @@ +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "pixman-arm-simd.h" + +void +fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint8_t *dstLine, *dst; + uint8_t *srcLine, *src; + int dstStride, srcStride; + uint16_t w; + uint8_t s, d; + + fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); + fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + + while (w && (unsigned long)dst & 3) + { + s = *src; + d = *dst; + asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + *dst = d; + + dst++; + src++; + w--; + } + + while (w >= 4) + { + asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst)); + dst += 4; + src += 4; + w -= 4; + } + + while (w) + { + s = *src; + d = *dst; + asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); + *dst = d; + + dst++; + src++; + w--; + } + } + +} + +void +fbCompositeSrc_8888x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t *dstLine, *dst; + uint32_t *srcLine, *src; + int dstStride, srcStride; + uint16_t w; + uint32_t component_half = 0x800080; + uint32_t upper_component_mask = 0xff00ff00; + uint32_t alpha_mask = 0xff; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + "ldr r4, [%[dest]] \n\t" + +#else + "ldr r4, [%[dest]] \n\t" + + /* = 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" +#endif + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* multiply by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + /* recombine the 0xff00ff00 bytes of r6 and r7 */ + "and r7, %[upper_component_mask]\n\t" + "uxtab16 r6, r7, r6, ror #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} + +void +fbCompositeSrc_8888x8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t *dstLine, *dst; + uint32_t *srcLine, *src; + uint32_t mask; + int dstStride, srcStride; + uint16_t w; + uint32_t component_half = 0x800080; + uint32_t alpha_mask = 0xff; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); + + fbComposeGetSolid (pMask, mask, pDst->bits.format); + mask = (mask) >> 24; + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + src = srcLine; + srcLine += srcStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load src */ + "ldr r5, [%[src]], #4\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + "uxtb16 r6, r5\n\t" + "uxtb16 r7, r5, ror #8\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, %[mask_alpha], %[component_half]\n\t" + "mla r7, r7, %[mask_alpha], %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* 255 - alpha */ + "sub r8, %[alpha_mask], r5, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) + : [component_half] "r" (component_half), [mask_alpha] "r" (mask), + [alpha_mask] "r" (alpha_mask) + : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" + ); + } +} + +void +fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height) +{ + uint32_t src, srca; + uint32_t *dstLine, *dst; + uint8_t *maskLine, *mask; + int dstStride, maskStride; + uint16_t w; + + fbComposeGetSolid(pSrc, src, pDst->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + uint32_t component_mask = 0xff00ff; + uint32_t component_half = 0x800080; + + uint32_t src_hi = (src >> 8) & component_mask; + uint32_t src_lo = src & component_mask; + + fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); + fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); + + while (height--) + { + dst = dstLine; + dstLine += dstStride; + mask = maskLine; + maskLine += maskStride; + w = width; + +//#define inner_branch + asm volatile ( + "cmp %[w], #0\n\t" + "beq 2f\n\t" + "1:\n\t" + /* load mask */ + "ldrb r5, [%[mask]], #1\n\t" +#ifdef inner_branch + /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. + * The 0x0 case also allows us to avoid doing an unecessary data + * write which is more valuable so we only check for that */ + "cmp r5, #0\n\t" + "beq 3f\n\t" + +#endif + "ldr r4, [%[dest]] \n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, %[src_lo], r5, %[component_half]\n\t" + "mla r7, %[src_hi], r5, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r5, r6, r7, lsl #8\n\t" + + "uxtb16 r6, r4\n\t" + "uxtb16 r7, r4, ror #8\n\t" + + /* we could simplify this to use 'sub' if we were + * willing to give up a register for alpha_mask */ + "mvn r8, r5\n\t" + "mov r8, r8, lsr #24\n\t" + + /* multiply by alpha (r8) then by 257 and divide by 65536 */ + "mla r6, r6, r8, %[component_half]\n\t" + "mla r7, r7, r8, %[component_half]\n\t" + + "uxtab16 r6, r6, r6, ror #8\n\t" + "uxtab16 r7, r7, r7, ror #8\n\t" + + "uxtb16 r6, r6, ror #8\n\t" + "uxtb16 r7, r7, ror #8\n\t" + + /* recombine */ + "orr r6, r6, r7, lsl #8\n\t" + + "uqadd8 r5, r6, r5\n\t" + +#ifdef inner_branch + "3:\n\t" + +#endif + "str r5, [%[dest]], #4\n\t" + /* increment counter and jmp to top */ + "subs %[w], %[w], #1\n\t" + "bne 1b\n\t" + "2:\n\t" + : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) + : [component_half] "r" (component_half), + [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) + : "r4", "r5", "r6", "r7", "r8", "cc", "memory" + ); + } +} diff --git a/pixman/pixman-arm-simd.h b/pixman/pixman-arm-simd.h new file mode 100644 index 0000000..ecaace5 --- /dev/null +++ b/pixman/pixman-arm-simd.h @@ -0,0 +1,94 @@ +/* + * Copyright © 2008 Mozilla Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Mozilla Corporation not be used in + * advertising or publicity pertaining to distribution of the software without + * specific, written prior permission. Mozilla Corporation makes no + * representations about the suitability of this software for any purpose. It + * is provided "as is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Author: Jeff Muizelaar (jeff@infidigm.net) + * + */ + +#include "pixman-private.h" + +#ifdef USE_ARM_SIMD + +static inline pixman_bool_t pixman_have_arm_simd(void) { return TRUE; } + +#else +#define pixman_have_arm_simd() FALSE +#endif + +#ifdef USE_ARM_SIMD + +void +fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); +void +fbCompositeSrc_8888x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); + +void +fbCompositeSrc_8888x8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); +void +fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, + pixman_image_t * pSrc, + pixman_image_t * pMask, + pixman_image_t * pDst, + int16_t xSrc, + int16_t ySrc, + int16_t xMask, + int16_t yMask, + int16_t xDst, + int16_t yDst, + uint16_t width, + uint16_t height); + + +#endif /* USE_ARM */ diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c deleted file mode 100644 index b73134c..0000000 --- a/pixman/pixman-arm.c +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "pixman-arm.h" - -void -fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s, d; - - fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - - while (w && (unsigned long)dst & 3) - { - s = *src; - d = *dst; - asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); - *dst = d; - - dst++; - src++; - w--; - } - - while (w >= 4) - { - asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst)); - dst += 4; - src += 4; - w -= 4; - } - - while (w) - { - s = *src; - d = *dst; - asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s)); - *dst = d; - - dst++; - src++; - w--; - } - } - -} - -void -fbCompositeSrc_8888x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint32_t component_half = 0x800080; - uint32_t upper_component_mask = 0xff00ff00; - uint32_t alpha_mask = 0xff; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - -//#define inner_branch - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - "ldr r4, [%[dest]] \n\t" - -#else - "ldr r4, [%[dest]] \n\t" - - /* = 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" -#endif - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* multiply by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - /* recombine the 0xff00ff00 bytes of r6 and r7 */ - "and r7, %[upper_component_mask]\n\t" - "uxtab16 r6, r7, r6, ror #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); - } -} - -void -fbCompositeSrc_8888x8x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - uint32_t mask; - int dstStride, srcStride; - uint16_t w; - uint32_t component_half = 0x800080; - uint32_t alpha_mask = 0xff; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); - - fbComposeGetSolid (pMask, mask, pDst->bits.format); - mask = (mask) >> 24; - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; - w = width; - -//#define inner_branch - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load src */ - "ldr r5, [%[src]], #4\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - "uxtb16 r6, r5\n\t" - "uxtb16 r7, r5, ror #8\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, %[mask_alpha], %[component_half]\n\t" - "mla r7, r7, %[mask_alpha], %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* 255 - alpha */ - "sub r8, %[alpha_mask], r5, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src) - : [component_half] "r" (component_half), [mask_alpha] "r" (mask), - [alpha_mask] "r" (alpha_mask) - : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" - ); - } -} - -void -fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height) -{ - uint32_t src, srca; - uint32_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - - fbComposeGetSolid(pSrc, src, pDst->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - uint32_t component_mask = 0xff00ff; - uint32_t component_half = 0x800080; - - uint32_t src_hi = (src >> 8) & component_mask; - uint32_t src_lo = src & component_mask; - - fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); - - while (height--) - { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; - w = width; - -//#define inner_branch - asm volatile ( - "cmp %[w], #0\n\t" - "beq 2f\n\t" - "1:\n\t" - /* load mask */ - "ldrb r5, [%[mask]], #1\n\t" -#ifdef inner_branch - /* We can avoid doing the multiplication in two cases: 0x0 or 0xff. - * The 0x0 case also allows us to avoid doing an unecessary data - * write which is more valuable so we only check for that */ - "cmp r5, #0\n\t" - "beq 3f\n\t" - -#endif - "ldr r4, [%[dest]] \n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, %[src_lo], r5, %[component_half]\n\t" - "mla r7, %[src_hi], r5, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r5, r6, r7, lsl #8\n\t" - - "uxtb16 r6, r4\n\t" - "uxtb16 r7, r4, ror #8\n\t" - - /* we could simplify this to use 'sub' if we were - * willing to give up a register for alpha_mask */ - "mvn r8, r5\n\t" - "mov r8, r8, lsr #24\n\t" - - /* multiply by alpha (r8) then by 257 and divide by 65536 */ - "mla r6, r6, r8, %[component_half]\n\t" - "mla r7, r7, r8, %[component_half]\n\t" - - "uxtab16 r6, r6, r6, ror #8\n\t" - "uxtab16 r7, r7, r7, ror #8\n\t" - - "uxtb16 r6, r6, ror #8\n\t" - "uxtb16 r7, r7, ror #8\n\t" - - /* recombine */ - "orr r6, r6, r7, lsl #8\n\t" - - "uqadd8 r5, r6, r5\n\t" - -#ifdef inner_branch - "3:\n\t" - -#endif - "str r5, [%[dest]], #4\n\t" - /* increment counter and jmp to top */ - "subs %[w], %[w], #1\n\t" - "bne 1b\n\t" - "2:\n\t" - : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask) - : [component_half] "r" (component_half), - [src_hi] "r" (src_hi), [src_lo] "r" (src_lo) - : "r4", "r5", "r6", "r7", "r8", "cc", "memory" - ); - } -} diff --git a/pixman/pixman-arm.h b/pixman/pixman-arm.h deleted file mode 100644 index 258054a..0000000 --- a/pixman/pixman-arm.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright © 2008 Mozilla Corporation - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of Mozilla Corporation not be used in - * advertising or publicity pertaining to distribution of the software without - * specific, written prior permission. Mozilla Corporation makes no - * representations about the suitability of this software for any purpose. It - * is provided "as is" without express or implied warranty. - * - * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS - * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY - * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN - * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING - * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - * - * Author: Jeff Muizelaar (jeff@infidigm.net) - * - */ - -#include "pixman-private.h" - -#ifdef USE_ARM - -static inline pixman_bool_t pixman_have_arm(void) { return TRUE; } - -#else -#define pixman_have_arm() FALSE -#endif - -#ifdef USE_ARM - -void -fbCompositeSrcAdd_8000x8000arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height); -void -fbCompositeSrc_8888x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height); - -void -fbCompositeSrc_8888x8x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height); -void -fbCompositeSolidMask_nx8x8888arm (pixman_op_t op, - pixman_image_t * pSrc, - pixman_image_t * pMask, - pixman_image_t * pDst, - int16_t xSrc, - int16_t ySrc, - int16_t xMask, - int16_t yMask, - int16_t xDst, - int16_t yDst, - uint16_t width, - uint16_t height); - - -#endif /* USE_ARM */ diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c index 070b190..6dbc009 100644 --- a/pixman/pixman-pict.c +++ b/pixman/pixman-pict.c @@ -34,7 +34,7 @@ #include "pixman-mmx.h" #include "pixman-vmx.h" #include "pixman-sse2.h" -#include "pixman-arm.h" +#include "pixman-arm-simd.h" #include "pixman-combine32.h" #ifdef __GNUC__ @@ -1522,8 +1522,8 @@ static const FastPathInfo vmx_fast_paths[] = }; #endif -#ifdef USE_ARM -static const FastPathInfo arm_fast_paths[] = +#ifdef USE_ARM_SIMD +static const FastPathInfo arm_simd_fast_paths[] = { { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm, 0 }, @@ -1897,9 +1897,9 @@ pixman_image_composite (pixman_op_t op, info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf); #endif -#ifdef USE_ARM - if (!info && pixman_have_arm()) - info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf); +#ifdef USE_ARM_SIMD + if (!info && pixman_have_arm_simd()) + info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf); #endif if (!info)