diff -Nr -U5 libpixman.original/src/pixman-access.c libpixman/src/pixman-access.c --- libpixman.original/src/pixman-access.c 2009-02-23 16:41:44.000000000 -0500 +++ libpixman/src/pixman-access.c 2009-03-17 09:40:53.000000000 -0400 @@ -66,10 +66,20 @@ #define YV12_V(line) \ ((uint8_t *) ((bits) + offset0 + \ ((stride) >> 1) * ((line) >> 1))) +#if (defined(_MSC_VER) && defined(_M_IX86) && (_M_IX86_FP == 2)) || \ + (defined(__GNUC__) && defined(__SSE2__)) +# include "emmintrin.h" +# define _USE_ALPHA_COPY_SSE2_ +#elif (defined(_MSC_VER) && defined(_M_IX86) && (_M_IX86_FP == 1)) || \ + (defined(__GNUC__) && defined(__SSE__)) +# include "xmmintrin.h" +# define _USE_ALPHA_COPY_SSE_ +#endif + /*********************************** Fetch ************************************/ static FASTCALL void fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) { @@ -83,10 +93,70 @@ fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer) { const uint32_t *bits = pict->bits + y*pict->rowstride; const uint32_t *pixel = (const uint32_t *)bits + x; const uint32_t *end = pixel + width; + +#if defined(_USE_ALPHA_COPY_SSE2_) || defined(_USE_ALPHA_COPY_SSE_) + ALIGN16 static const unsigned int alphas[4] = + {0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000}; + __m64 alpha2, tmp8a, tmp8b, tmp8c, tmp8d; + +#ifdef _USE_ALPHA_COPY_SSE2_ + __m128i alpha4, tmp16a, tmp16b; + + // if both source and dest pointers are or can be aligned... + if ( ((uint32_t)pixel & 0x0F) == ((uint32_t)buffer & 0x0F) ) + { + // if not already aligned, align pointers with single-pixel copies + while ( ((uint32_t)pixel & 0x0F) && width ) { + *buffer++ = *pixel++ | 0xff000000; + width--; + } + + // use of tmp0 and tmp1 variables are to avoid + // back-to-back register operations + // also: MSVC8 code gen for MMX sucks; use asm instead? + + alpha4 = *((__m128i *)&alphas); + while (width >= 8) + { + tmp16a = *((__m128i *)&pixel[0]); + tmp16b = *((__m128i *)&pixel[4]); + *((__m128i *)&buffer[0]) = _mm_or_si128(tmp16a, alpha4); + *((__m128i *)&buffer[4]) = _mm_or_si128(tmp16b, alpha4); + buffer+=8; pixel+=8; width-=8; + } + } +#endif // _USE_ALPHA_COPY_SSE2_ + + // if both source and dest pointers are or can be aligned... + if ( (width >= 8) && (((uint32_t)pixel & 0x07) == ((uint32_t)buffer & 0x07)) ) + { + // if not already aligned, align pointers with single-pixel copies + while ( ((uint32_t)pixel & 0x07) && width ) { + *buffer++ = *pixel++ | 0xff000000; + width--; + } + + alpha2 = *((__m64 *)&alphas); + while (width >= 8) + { + tmp8a = *((__m64 *)&pixel[0]); + tmp8b = *((__m64 *)&pixel[2]); + tmp8c = *((__m64 *)&pixel[4]); + tmp8d = *((__m64 *)&pixel[6]); + *((__m64 *)&buffer[0]) = _m_por(tmp8a, alpha2); + *((__m64 *)&buffer[2]) = _m_por(tmp8b, alpha2); + *((__m64 *)&buffer[4]) = _m_por(tmp8c, alpha2); + *((__m64 *)&buffer[6]) = _m_por(tmp8d, alpha2); + buffer+=8; pixel+=8; width-=8; + } + _m_empty(); + } +#endif // defined(_USE_ALPHA_COPY_SSE2_) || defined(_USE_ALPHA_COPY_SSE_) + while (pixel < end) { *buffer++ = READ(pict, pixel++) | 0xff000000; } } diff -Nr -U5 libpixman.original/src/pixman-compose.c libpixman/src/pixman-compose.c --- libpixman.original/src/pixman-compose.c 2009-02-23 16:41:44.000000000 -0500 +++ libpixman/src/pixman-compose.c 2009-03-17 09:40:53.000000000 -0400 @@ -546,11 +546,11 @@ #define SCANLINE_BUFFER_LENGTH 2048 void pixman_composite_rect_general (const FbComposeData *data) { - uint32_t _scanline_buffer[SCANLINE_BUFFER_LENGTH * 3]; + ALIGN16 uint32_t _scanline_buffer[SCANLINE_BUFFER_LENGTH * 3]; const pixman_format_code_t srcFormat = data->src->type == BITS ? data->src->bits.format : 0; const pixman_format_code_t maskFormat = data->mask && data->mask->type == BITS ? data->mask->bits.format : 0; const pixman_format_code_t destFormat = data->dest->type == BITS ? data->dest->bits.format : 0; const int srcWide = PIXMAN_FORMAT_16BPC(srcFormat); const int maskWide = data->mask && PIXMAN_FORMAT_16BPC(maskFormat); diff -Nr -U5 libpixman.original/src/pixman-private.h libpixman/src/pixman-private.h --- libpixman.original/src/pixman-private.h 2009-02-23 16:41:44.000000000 -0500 +++ libpixman/src/pixman-private.h 2009-03-17 09:40:53.000000000 -0400 @@ -8,10 +8,16 @@ #include "cairo-platform.h" #include "pixman.h" #include +#if defined(_MSC_VER) +# define ALIGN16 __declspec(align(16)) +#elif defined(__GNUC__) +# define ALIGN16 __attribute__ ((aligned (16))) +#endif // _MSC_VER || __GNUC__ + #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE