configure.ac                               |   32 ++++++++++++++--------------
 pixman/Makefile.am                         |   16 +++++++-------
 pixman/{pixman-arm.c => pixman-arm-simd.c} |    2 +-
 pixman/{pixman-arm.h => pixman-arm-simd.h} |    8 +++---
 pixman/pixman-pict.c                       |   12 +++++-----
 5 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/configure.ac b/configure.ac
index dd095ff..7937f95 100644
--- a/configure.ac
+++ b/configure.ac
@@ -277,42 +277,42 @@ AC_SUBST(VMX_CFLAGS)
 
 AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 
-dnl Check for ARM
+dnl Check for ARM SIMD instructions
 
-have_armv6_simd=no
-AC_MSG_CHECKING(whether to use ARM assembler)
+have_arm_simd=no
+AC_MSG_CHECKING(whether to use ARM SIMD assembler)
 xserver_save_CFLAGS=$CFLAGS
 CFLAGS="$CFLAGS $ARM_CFLAGS"
 AC_COMPILE_IFELSE([
 int main () {
     asm("uqadd8 r1, r1, r2");
     return 0;
-}], have_armv6_simd=yes)
+}], have_arm_simd=yes)
 CFLAGS=$xserver_save_CFLAGS
 
-AC_ARG_ENABLE(arm,
-   [AC_HELP_STRING([--disable-arm],
-                   [disable ARM fast paths])],
-   [enable_arm=$enableval], [enable_arm=auto])
+AC_ARG_ENABLE(arm-simd,
+   [AC_HELP_STRING([--disable-arm-simd],
+                   [disable ARM SIMD fast paths])],
+   [enable_arm_simd=$enableval], [enable_arm_simd=auto])
 
-if test $enable_arm = no ; then
-   have_armv6_simd=disabled
+if test $enable_arm_simd = no ; then
+   have_arm_simd=disabled
 fi
 
-if test $have_armv6_simd = yes ; then
-   AC_DEFINE(USE_ARM, 1, [use ARM compiler intrinsics])
+if test $have_arm_simd = yes ; then
+   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
 else
    ARM_CFLAGS=
 fi
 
-AC_MSG_RESULT($have_armv6_simd)
-if test $enable_arm = yes && test $have_armv6_simd = no ; then
-   AC_MSG_ERROR([ARM intrinsics not detected])
+AC_MSG_RESULT($have_arm_simd)
+if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
+   AC_MSG_ERROR([ARM SIMD intrinsics not detected])
 fi
 
 AC_SUBST(ARM_CFLAGS)
 
-AM_CONDITIONAL(USE_ARM, test $have_armv6_simd = yes)
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
 
 
 AC_ARG_ENABLE(gtk,
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 339e01e..6d5a643 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -80,14 +80,14 @@ libpixman_1_la_LIBADD += libpixman-sse2.la
 endif
 
 # arm code
-if USE_ARM
-noinst_LTLIBRARIES += libpixman-arm.la
-libpixman_arm_la_SOURCES = \
-	pixman-arm.c \
-	pixman-arm.h
-libpixman_arm_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
-libpixman_arm_la_LIBADD = $(DEP_LIBS)
-libpixman_1_la_LIBADD += libpixman-arm.la
+if USE_ARM_SIMD
+noinst_LTLIBRARIES += libpixman-arm-simd.la
+libpixman_arm_simd_la_SOURCES = \
+	pixman-arm-simd.c \
+	pixman-arm-simd.h
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
+libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-arm-simd.la
 endif
 
 
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
new file mode 100644
index 0000000..c7851cb
--- /dev/null
+++ b/pixman/pixman-arm-simd.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-arm-simd.h"
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height)
+{
+    uint8_t	*dstLine, *dst;
+    uint8_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint8_t	s, d;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	while (w && (unsigned long)dst & 3)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
+	    dst += 4;
+	    src += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+    }
+
+}
+
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t upper_component_mask = 0xff00ff00;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load src */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0\n\t"
+			"beq 3f\n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			"ldr r4, [%[dest]] \n\t"
+
+#else
+			"ldr r4, [%[dest]] \n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+#endif
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* multiply by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			/* recombine the 0xff00ff00 bytes of r6 and r7 */
+			"and r7, %[upper_component_mask]\n\t"
+			"uxtab16 r6, r7, r6, ror #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t	xSrc,
+			       int16_t	ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    uint32_t	mask;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
+    mask = (mask) >> 24;
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load src */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0\n\t"
+			"beq 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			"uxtb16 r6, r5\n\t"
+			"uxtb16 r7, r5, ror #8\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
+			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t      xSrc,
+			       int16_t      ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	 src, srca;
+    uint32_t	*dstLine, *dst;
+    uint8_t	*maskLine, *mask;
+    int		 dstStride, maskStride;
+    uint16_t	 w;
+
+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    uint32_t component_mask = 0xff00ff;
+    uint32_t component_half = 0x800080;
+
+    uint32_t src_hi = (src >> 8) & component_mask;
+    uint32_t src_lo = src & component_mask;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load mask */
+			"ldrb r5, [%[mask]], #1\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0\n\t"
+			"beq 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, %[src_lo], r5, %[component_half]\n\t"
+			"mla r7, %[src_hi], r5, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* we could simplify this to use 'sub' if we were
+			 * willing to give up a register for alpha_mask */
+			"mvn r8, r5\n\t"
+			"mov r8, r8, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
+			: [component_half] "r" (component_half),
+			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
diff --git a/pixman/pixman-arm-simd.h b/pixman/pixman-arm-simd.h
new file mode 100644
index 0000000..ecaace5
--- /dev/null
+++ b/pixman/pixman-arm-simd.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+#include "pixman-private.h"
+
+#ifdef USE_ARM_SIMD
+
+static inline pixman_bool_t pixman_have_arm_simd(void) { return TRUE; }
+
+#else
+#define pixman_have_arm_simd() FALSE
+#endif
+
+#ifdef USE_ARM_SIMD
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height);
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+
+#endif /* USE_ARM */
diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c
deleted file mode 100644
index b73134c..0000000
--- a/pixman/pixman-arm.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright © 2008 Mozilla Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "pixman-arm.h"
-
-void
-fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-				pixman_image_t * pSrc,
-				pixman_image_t * pMask,
-				pixman_image_t * pDst,
-				int16_t      xSrc,
-				int16_t      ySrc,
-				int16_t      xMask,
-				int16_t      yMask,
-				int16_t      xDst,
-				int16_t      yDst,
-				uint16_t     width,
-				uint16_t     height)
-{
-    uint8_t	*dstLine, *dst;
-    uint8_t	*srcLine, *src;
-    int	dstStride, srcStride;
-    uint16_t	w;
-    uint8_t	s, d;
-
-    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
-    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-
-    while (height--)
-    {
-	dst = dstLine;
-	dstLine += dstStride;
-	src = srcLine;
-	srcLine += srcStride;
-	w = width;
-
-	while (w && (unsigned long)dst & 3)
-	{
-	    s = *src;
-	    d = *dst;
-	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-	    *dst = d;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-
-	while (w >= 4)
-	{
-	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
-	    dst += 4;
-	    src += 4;
-	    w -= 4;
-	}
-
-	while (w)
-	{
-	    s = *src;
-	    d = *dst;
-	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-	    *dst = d;
-
-	    dst++;
-	    src++;
-	    w--;
-	}
-    }
-
-}
-
-void
-fbCompositeSrc_8888x8888arm (pixman_op_t op,
-			 pixman_image_t * pSrc,
-			 pixman_image_t * pMask,
-			 pixman_image_t * pDst,
-			 int16_t      xSrc,
-			 int16_t      ySrc,
-			 int16_t      xMask,
-			 int16_t      yMask,
-			 int16_t      xDst,
-			 int16_t      yDst,
-			 uint16_t     width,
-			 uint16_t     height)
-{
-    uint32_t	*dstLine, *dst;
-    uint32_t	*srcLine, *src;
-    int	dstStride, srcStride;
-    uint16_t	w;
-    uint32_t component_half = 0x800080;
-    uint32_t upper_component_mask = 0xff00ff00;
-    uint32_t alpha_mask = 0xff;
-
-    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-
-    while (height--)
-    {
-	dst = dstLine;
-	dstLine += dstStride;
-	src = srcLine;
-	srcLine += srcStride;
-	w = width;
-
-//#define inner_branch
-	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load src */
-			"ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
-
-			/* = 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
-			"ldr r4, [%[dest]] \n\t"
-
-#else
-			"ldr r4, [%[dest]] \n\t"
-
-			/* = 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-#endif
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
-
-			/* multiply by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
-
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
-
-			/* recombine the 0xff00ff00 bytes of r6 and r7 */
-			"and r7, %[upper_component_mask]\n\t"
-			"uxtab16 r6, r7, r6, ror #8\n\t"
-
-			"uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-			"3:\n\t"
-
-#endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
-			  [alpha_mask] "r" (alpha_mask)
-			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-			);
-    }
-}
-
-void
-fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-			       pixman_image_t * pSrc,
-			       pixman_image_t * pMask,
-			       pixman_image_t * pDst,
-			       int16_t	xSrc,
-			       int16_t	ySrc,
-			       int16_t      xMask,
-			       int16_t      yMask,
-			       int16_t      xDst,
-			       int16_t      yDst,
-			       uint16_t     width,
-			       uint16_t     height)
-{
-    uint32_t	*dstLine, *dst;
-    uint32_t	*srcLine, *src;
-    uint32_t	mask;
-    int	dstStride, srcStride;
-    uint16_t	w;
-    uint32_t component_half = 0x800080;
-    uint32_t alpha_mask = 0xff;
-
-    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-
-    fbComposeGetSolid (pMask, mask, pDst->bits.format);
-    mask = (mask) >> 24;
-
-    while (height--)
-    {
-	dst = dstLine;
-	dstLine += dstStride;
-	src = srcLine;
-	srcLine += srcStride;
-	w = width;
-
-//#define inner_branch
-	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load src */
-			"ldr r5, [%[src]], #4\n\t"
-#ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
-
-#endif
-			"ldr r4, [%[dest]] \n\t"
-
-			"uxtb16 r6, r5\n\t"
-			"uxtb16 r7, r5, ror #8\n\t"
-
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
-			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
-
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
-
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
-
-			/* recombine */
-			"orr r5, r6, r7, lsl #8\n\t"
-
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
-
-			/* 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
-
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
-
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
-
-			/* recombine */
-			"orr r6, r6, r7, lsl #8\n\t"
-
-			"uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-			"3:\n\t"
-
-#endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
-			  [alpha_mask] "r" (alpha_mask)
-			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
-			);
-    }
-}
-
-void
-fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
-			       pixman_image_t * pSrc,
-			       pixman_image_t * pMask,
-			       pixman_image_t * pDst,
-			       int16_t      xSrc,
-			       int16_t      ySrc,
-			       int16_t      xMask,
-			       int16_t      yMask,
-			       int16_t      xDst,
-			       int16_t      yDst,
-			       uint16_t     width,
-			       uint16_t     height)
-{
-    uint32_t	 src, srca;
-    uint32_t	*dstLine, *dst;
-    uint8_t	*maskLine, *mask;
-    int		 dstStride, maskStride;
-    uint16_t	 w;
-
-    fbComposeGetSolid(pSrc, src, pDst->bits.format);
-
-    srca = src >> 24;
-    if (src == 0)
-	return;
-
-    uint32_t component_mask = 0xff00ff;
-    uint32_t component_half = 0x800080;
-
-    uint32_t src_hi = (src >> 8) & component_mask;
-    uint32_t src_lo = src & component_mask;
-
-    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-
-    while (height--)
-    {
-	dst = dstLine;
-	dstLine += dstStride;
-	mask = maskLine;
-	maskLine += maskStride;
-	w = width;
-
-//#define inner_branch
-	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load mask */
-			"ldrb r5, [%[mask]], #1\n\t"
-#ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
-
-#endif
-			"ldr r4, [%[dest]] \n\t"
-
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, %[src_lo], r5, %[component_half]\n\t"
-			"mla r7, %[src_hi], r5, %[component_half]\n\t"
-
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
-
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
-
-			/* recombine */
-			"orr r5, r6, r7, lsl #8\n\t"
-
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
-
-			/* we could simplify this to use 'sub' if we were
-			 * willing to give up a register for alpha_mask */
-			"mvn r8, r5\n\t"
-			"mov r8, r8, lsr #24\n\t"
-
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
-
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
-
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
-
-			/* recombine */
-			"orr r6, r6, r7, lsl #8\n\t"
-
-			"uqadd8 r5, r6, r5\n\t"
-
-#ifdef inner_branch
-			"3:\n\t"
-
-#endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
-			: [component_half] "r" (component_half),
-			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
-			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-			);
-    }
-}
diff --git a/pixman/pixman-arm.h b/pixman/pixman-arm.h
deleted file mode 100644
index 258054a..0000000
--- a/pixman/pixman-arm.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright © 2008 Mozilla Corporation
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of Mozilla Corporation not be used in
- * advertising or publicity pertaining to distribution of the software without
- * specific, written prior permission.  Mozilla Corporation makes no
- * representations about the suitability of this software for any purpose.  It
- * is provided "as is" without express or implied warranty.
- *
- * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
- * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
- * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
- * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
- * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *
- * Author:  Jeff Muizelaar (jeff@infidigm.net)
- *
- */
-
-#include "pixman-private.h"
-
-#ifdef USE_ARM
-
-static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
-
-#else
-#define pixman_have_arm() FALSE
-#endif
-
-#ifdef USE_ARM
-
-void
-fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-				pixman_image_t * pSrc,
-				pixman_image_t * pMask,
-				pixman_image_t * pDst,
-				int16_t      xSrc,
-				int16_t      ySrc,
-				int16_t      xMask,
-				int16_t      yMask,
-				int16_t      xDst,
-				int16_t      yDst,
-				uint16_t     width,
-				uint16_t     height);
-void
-fbCompositeSrc_8888x8888arm (pixman_op_t op,
-			 pixman_image_t * pSrc,
-			 pixman_image_t * pMask,
-			 pixman_image_t * pDst,
-			 int16_t      xSrc,
-			 int16_t      ySrc,
-			 int16_t      xMask,
-			 int16_t      yMask,
-			 int16_t      xDst,
-			 int16_t      yDst,
-			 uint16_t     width,
-			 uint16_t     height);
-
-void
-fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-			 pixman_image_t * pSrc,
-			 pixman_image_t * pMask,
-			 pixman_image_t * pDst,
-			 int16_t      xSrc,
-			 int16_t      ySrc,
-			 int16_t      xMask,
-			 int16_t      yMask,
-			 int16_t      xDst,
-			 int16_t      yDst,
-			 uint16_t     width,
-			 uint16_t     height);
-void
-fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
-			 pixman_image_t * pSrc,
-			 pixman_image_t * pMask,
-			 pixman_image_t * pDst,
-			 int16_t      xSrc,
-			 int16_t      ySrc,
-			 int16_t      xMask,
-			 int16_t      yMask,
-			 int16_t      xDst,
-			 int16_t      yDst,
-			 uint16_t     width,
-			 uint16_t     height);
-
-
-#endif /* USE_ARM */
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 070b190..6dbc009 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -34,7 +34,7 @@
 #include "pixman-mmx.h"
 #include "pixman-vmx.h"
 #include "pixman-sse2.h"
-#include "pixman-arm.h"
+#include "pixman-arm-simd.h"
 #include "pixman-combine32.h"
 
 #ifdef __GNUC__
@@ -1522,8 +1522,8 @@ static const FastPathInfo vmx_fast_paths[] =
 };
 #endif
 
-#ifdef USE_ARM
-static const FastPathInfo arm_fast_paths[] =
+#ifdef USE_ARM_SIMD
+static const FastPathInfo arm_simd_fast_paths[] =
 {
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm,      0 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm,	   0 },
@@ -1897,9 +1897,9 @@ pixman_image_composite (pixman_op_t      op,
 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 #endif
 
-#ifdef USE_ARM
-	if (!info && pixman_have_arm())
-	    info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
+#ifdef USE_ARM_SIMD
+	if (!info && pixman_have_arm_simd())
+	    info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 #endif
 
         if (!info)