From 7657d503b0b1ec8b235910f58602ed631e3fa18f Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Wed, 16 Feb 2011 01:21:49 +0200 Subject: [PATCH] Workaround for _mm_empty() related miscompilation problem In some cases floating point registers may get corrupted and as a result pixman tests fail. The most likely cause is some kind of miscompilation of inlined function containing MMX/SSE2 code: https://bugs.freedesktop.org/show_bug.cgi?id=33069 This patch just puts MMX/SSE2 code into a normal function which now gets called from an additional small inline wrapper function. The size of compiled code also becomes smaller. --- pixman/pixman-sse2.c | 60 +++++++++++++++++++++++++++++++++++--------------- 1 files changed, 42 insertions(+), 18 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 2e135e2..c323d1f 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -5794,14 +5794,12 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, } /* A variant of 'core_combine_over_u_sse2' with minor tweaks */ -static force_inline void +static void scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, const uint32_t* ps, int32_t w, pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t fully_transparent_src) + pixman_fixed_t unit_x) { uint32_t s, d; const uint32_t* pm = NULL; @@ -5810,9 +5808,6 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; - if (fully_transparent_src) - return; - /* Align dst on a 16-byte boundary */ while (w && ((unsigned long)pd & 15)) { @@ -5888,32 +5883,45 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, _mm_empty (); } +static force_inline void +scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper (uint32_t* pd, + const uint32_t* ps, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + if (zero_src) + return; + scaled_nearest_scanline_sse2_8888_8888_OVER (pd, ps, w, vx, unit_x); +} + + FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper, uint32_t, uint32_t, COVER) FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper, uint32_t, uint32_t, NONE) FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, - scaled_nearest_scanline_sse2_8888_8888_OVER, + scaled_nearest_scanline_sse2_8888_8888_OVER_wrapper, uint32_t, uint32_t, PAD) -static force_inline void +static void scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, uint32_t * dst, const uint32_t * src, int32_t w, pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx, - pixman_bool_t zero_src) + pixman_fixed_t unit_x) { __m128i xmm_mask; __m128i xmm_src, xmm_src_lo, xmm_src_hi; __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; - if (zero_src || (*mask >> 24) == 0) + if ((*mask >> 24) == 0) return; xmm_mask = create_mask_16_128 (*mask >> 24); @@ -6001,14 +6009,30 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, _mm_empty (); } +static force_inline void +scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + if (zero_src) + return; + scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src, w, + vx, unit_x); +} + FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper, uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper, uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, - scaled_nearest_scanline_sse2_8888_n_8888_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER_wrapper, uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) static const pixman_fast_path_t sse2_fast_paths[] = -- 1.7.3.4