--- mozilla/gfx/cairo/libpixman/src/pixman-access.c.original 2009-04-14 07:26:21.000000000 -0400 +++ mozilla/gfx/cairo/libpixman/src/pixman-access.c 2009-04-14 10:32:23.000000000 -0400 @@ -403,10 +403,55 @@ fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) { const uint32_t *bits = pict->bits + y*pict->rowstride; const uint8_t *pixel = (const uint8_t *)bits + x; const uint8_t *end = pixel + width; + +#ifdef _USE_ALPHA_COPY_SSE2_ + // align dest pointer + while ( width && ((unsigned)buffer & 15) ) + { + width--; + *buffer++ = READ(pict, pixel++) << 24; + } + + // bulk read-shift-write + if ( (width >= 16) && !((unsigned)buffer & 15) ) + { + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; + const __m128i xmm7 = _mm_setzero_si128(); + + while (width >= 16) + { + xmm0 = _mm_loadu_si128((__m128i *)pixel); + width -= 16; + pixel += 16; + + xmm1 = _mm_load_si128(&xmm7); + xmm2 = _mm_load_si128(&xmm7); + xmm1 = _mm_unpacklo_epi8(xmm1, xmm0); + xmm2 = _mm_unpackhi_epi8(xmm2, xmm0); + + xmm3 = _mm_load_si128(&xmm7); + xmm4 = _mm_load_si128(&xmm7); + xmm5 = _mm_load_si128(&xmm7); + xmm6 = _mm_load_si128(&xmm7); + + xmm3 = _mm_unpacklo_epi16(xmm3, xmm1); + xmm4 = _mm_unpackhi_epi16(xmm4, xmm1); + xmm5 = _mm_unpacklo_epi16(xmm5, xmm2); + xmm6 = _mm_unpackhi_epi16(xmm6, xmm2); + + _mm_store_si128( ((__m128i *)&buffer[ 0]), xmm3 ); + _mm_store_si128( ((__m128i *)&buffer[ 4]), xmm4 ); + _mm_store_si128( ((__m128i *)&buffer[ 8]), xmm5 ); + _mm_store_si128( ((__m128i *)&buffer[12]), xmm6 ); + buffer += 16; + } + } +#endif // _USE_ALPHA_COPY_SSE2_ + while (pixel < end) { *buffer++ = READ(pict, pixel++) << 24; } }