diff --git a/configure.ac b/configure.ac index 7ccdda8..1d91b9b 100644 --- a/configure.ac +++ b/configure.ac @@ -190,11 +190,11 @@ then fi AM_CONDITIONAL(HAVE_ASM_BLOCKS, test "x$HAVE_ASM_BLOCKS" = "xyes") -#if test "x$HAVE_I386" = "xyes" -#then -# # I'd write a test for this, but as of 4.1.2, gcc is still broken -# AC_DEFINE(USE_SSE_WRAPPER, 1, [Defined if SSE functions need stack alignment wrappers]) -#fi +if test "x$HAVE_I386" = "xyes" +then + # I'd write a test for this, but as of 4.1.2, gcc is still broken + AC_DEFINE(USE_SSE_WRAPPER, 1, [Defined if SSE functions need stack alignment wrappers]) +fi AS_MMX_INTRINSICS(MMX_CFLAGS, HAVE_MMX_INTRINSICS=yes, HAVE_MMX_INTRINSICS=no) AS_SSE_INTRINSICS(SSE_CFLAGS, HAVE_SSE_INTRINSICS=yes, HAVE_SSE_INTRINSICS=no) diff --git a/liboil/sse/clamp_sse.c b/liboil/sse/clamp_sse.c index 5d34c6a..e2977fc 100644 --- a/liboil/sse/clamp_sse.c +++ b/liboil/sse/clamp_sse.c @@ -32,6 +32,8 @@ #include #include +#include "sse_wrapper.h" + static void clamp_u8_sse (uint8_t *dest, uint8_t *src1, int n, uint8_t *src2_1, uint8_t *src3_1) @@ -69,7 +71,7 @@ clamp_u8_sse (uint8_t *dest, uint8_t *src1, int n, uint8_t *src2_1, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamp_u8_sse, clamp_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamp_u8_sse, clamp_u8, OIL_IMPL_FLAG_SSE2); static void clamp_s16_sse (int16_t *dest, int16_t *src1, int n, int16_t *src2_1, @@ -108,7 +110,7 @@ clamp_s16_sse (int16_t *dest, int16_t *src1, int n, int16_t *src2_1, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamp_s16_sse, clamp_s16, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamp_s16_sse, clamp_s16, OIL_IMPL_FLAG_SSE2); static void clamp_f32_sse (float *dest, const float *src1, int n, const float *src2_1, @@ -147,7 +149,7 @@ clamp_f32_sse (float *dest, const float *src1, int n, const float *src2_1, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamp_f32_sse, clamp_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamp_f32_sse, clamp_f32, OIL_IMPL_FLAG_SSE); static void clamp_f64_sse (double *dest, const double *src1, int n, const double *src2_1, @@ -186,7 +188,7 @@ clamp_f64_sse (double *dest, const double *src1, int n, const double *src2_1, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamp_f64_sse, clamp_f64, +OIL_DEFINE_IMPL_FULL_WRAPPER (clamp_f64_sse, clamp_f64, OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); static void @@ -219,7 +221,7 @@ clamplow_u8_sse (uint8_t *dest, const uint8_t *src1, int n, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamplow_u8_sse, clamplow_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamplow_u8_sse, clamplow_u8, OIL_IMPL_FLAG_SSE2); static void clamplow_s16_sse (int16_t *dest, const int16_t *src1, int n, @@ -251,7 +253,7 @@ clamplow_s16_sse (int16_t *dest, const int16_t *src1, int n, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamplow_s16_sse, clamplow_s16, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamplow_s16_sse, clamplow_s16, OIL_IMPL_FLAG_SSE2); static void clamplow_f32_sse (float *dest, const float *src1, int n, const float *src2_1) @@ -282,7 +284,7 @@ clamplow_f32_sse (float *dest, const float *src1, int n, const float *src2_1) *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamplow_f32_sse, clamplow_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamplow_f32_sse, clamplow_f32, OIL_IMPL_FLAG_SSE); static void clamplow_f64_sse (double *dest, const double *src1, int n, const double *src2_1) @@ -313,7 +315,7 @@ clamplow_f64_sse (double *dest, const double *src1, int n, const double *src2_1) *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamplow_f64_sse, clamplow_f64, +OIL_DEFINE_IMPL_FULL_WRAPPER (clamplow_f64_sse, clamplow_f64, OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); static void @@ -346,7 +348,7 @@ clamphigh_u8_sse (uint8_t *dest, const uint8_t *src1, int n, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamphigh_u8_sse, clamphigh_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamphigh_u8_sse, clamphigh_u8, OIL_IMPL_FLAG_SSE2); static void clamphigh_s16_sse (int16_t *dest, const int16_t *src1, int n, @@ -378,7 +380,7 @@ clamphigh_s16_sse (int16_t *dest, const int16_t *src1, int n, *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamphigh_s16_sse, clamphigh_s16, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamphigh_s16_sse, clamphigh_s16, OIL_IMPL_FLAG_SSE2); static void clamphigh_f32_sse (float *dest, const float *src1, int n, const float *src2_1) @@ -409,7 +411,7 @@ clamphigh_f32_sse (float *dest, const float *src1, int n, const float *src2_1) *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamphigh_f32_sse, clamphigh_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (clamphigh_f32_sse, clamphigh_f32, OIL_IMPL_FLAG_SSE); static void clamphigh_f64_sse (double *dest, const double *src1, int n, const double *src2_1) @@ -440,5 +442,5 @@ clamphigh_f64_sse (double *dest, const double *src1, int n, const double *src2_1 *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (clamphigh_f64_sse, clamphigh_f64, +OIL_DEFINE_IMPL_FULL_WRAPPER (clamphigh_f64_sse, clamphigh_f64, OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/composite_sse.c b/liboil/sse/composite_sse.c index 307fd17..5798d48 100644 --- a/liboil/sse/composite_sse.c +++ b/liboil/sse/composite_sse.c @@ -32,6 +32,8 @@ #include #include +#include "sse_wrapper.h" + #define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s)) static void @@ -64,7 +66,7 @@ composite_add_argb_sse (uint32_t *dest, const uint32_t *src, int n) COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(s))); } } -OIL_DEFINE_IMPL_FULL (composite_add_argb_sse, composite_add_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_add_argb_sse, composite_add_argb, OIL_IMPL_FLAG_SSE2); static void @@ -100,7 +102,7 @@ composite_add_argb_const_src_sse (uint32_t *dest, const uint32_t *src_1, int n) COMPOSITE_ADD(oil_argb_B(d), oil_argb_B(val))); } } -OIL_DEFINE_IMPL_FULL (composite_add_argb_const_src_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_add_argb_const_src_sse, composite_add_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -128,7 +130,7 @@ composite_add_u8_sse (uint8_t *dest, const uint8_t *src, int n) *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (composite_add_u8_sse, composite_add_u8, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_add_u8_sse, composite_add_u8, OIL_IMPL_FLAG_SSE2); static void @@ -158,5 +160,5 @@ composite_add_u8_const_src_sse (uint8_t *dest, const uint8_t *src_1, int n) *dest++ = x; } } -OIL_DEFINE_IMPL_FULL (composite_add_u8_const_src_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_add_u8_const_src_sse, composite_add_u8_const_src, OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/copy_sse.c b/liboil/sse/copy_sse.c index b695bc4..6cca512 100644 --- a/liboil/sse/copy_sse.c +++ b/liboil/sse/copy_sse.c @@ -31,6 +31,8 @@ #include #include +#include "sse_wrapper.h" + static void copy_u8_sse (uint8_t *dest, const uint8_t *src, int n) { @@ -46,7 +48,7 @@ copy_u8_sse (uint8_t *dest, const uint8_t *src, int n) *dest++ = *src++; } } -OIL_DEFINE_IMPL_FULL (copy_u8_sse, copy_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (copy_u8_sse, copy_u8, OIL_IMPL_FLAG_SSE2); static void copy_u8_sse_unroll2 (uint8_t *dest, const uint8_t *src, int n) @@ -70,4 +72,4 @@ copy_u8_sse_unroll2 (uint8_t *dest, const uint8_t *src, int n) *dest++ = *src++; } } -OIL_DEFINE_IMPL_FULL (copy_u8_sse_unroll2, copy_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (copy_u8_sse_unroll2, copy_u8, OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/math_sse.c b/liboil/sse/math_sse.c index e5d238d..d3f483d 100644 --- a/liboil/sse/math_sse.c +++ b/liboil/sse/math_sse.c @@ -32,6 +32,8 @@ #include #include +#include "sse_wrapper.h" + static void add_f32_sse (float *dest, float *src1, float *src2, int n) { @@ -53,7 +55,7 @@ add_f32_sse (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ + *src2++; } } -OIL_DEFINE_IMPL_FULL (add_f32_sse, add_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (add_f32_sse, add_f32, OIL_IMPL_FLAG_SSE); static void add_f64_sse2 (double *dest, double *src1, double *src2, int n) @@ -78,7 +80,7 @@ add_f64_sse2 (double *dest, double *src1, double *src2, int n) n--; } } -OIL_DEFINE_IMPL_FULL (add_f64_sse2, add_f64, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (add_f64_sse2, add_f64, OIL_IMPL_FLAG_SSE2); static void add_f64_sse2_unroll (double *dest, double *src1, double *src2, int n) @@ -118,7 +120,7 @@ add_f64_sse2_unroll (double *dest, double *src1, double *src2, int n) n--; } } -OIL_DEFINE_IMPL_FULL (add_f64_sse2_unroll, add_f64, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (add_f64_sse2_unroll, add_f64, OIL_IMPL_FLAG_SSE2); static void subtract_f32_sse (float *dest, float *src1, float *src2, int n) @@ -141,7 +143,7 @@ subtract_f32_sse (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ - *src2++; } } -OIL_DEFINE_IMPL_FULL (subtract_f32_sse, subtract_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (subtract_f32_sse, subtract_f32, OIL_IMPL_FLAG_SSE); static void multiply_f32_sse (float *dest, float *src1, float *src2, int n) @@ -164,7 +166,7 @@ multiply_f32_sse (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ * *src2++; } } -OIL_DEFINE_IMPL_FULL (multiply_f32_sse, multiply_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (multiply_f32_sse, multiply_f32, OIL_IMPL_FLAG_SSE); static void divide_f32_sse (float *dest, float *src1, float *src2, int n) @@ -187,7 +189,7 @@ divide_f32_sse (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ / *src2++; } } -OIL_DEFINE_IMPL_FULL (divide_f32_sse, divide_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (divide_f32_sse, divide_f32, OIL_IMPL_FLAG_SSE); static void minimum_f32_sse (float *dest, float *src1, float *src2, int n) @@ -214,7 +216,7 @@ minimum_f32_sse (float *dest, float *src1, float *src2, int n) src2++; } } -OIL_DEFINE_IMPL_FULL (minimum_f32_sse, minimum_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (minimum_f32_sse, minimum_f32, OIL_IMPL_FLAG_SSE); static void maximum_f32_sse (float *dest, float *src1, float *src2, int n) @@ -241,7 +243,7 @@ maximum_f32_sse (float *dest, float *src1, float *src2, int n) src2++; } } -OIL_DEFINE_IMPL_FULL (maximum_f32_sse, maximum_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (maximum_f32_sse, maximum_f32, OIL_IMPL_FLAG_SSE); static void inverse_f32_sse (float *dest, float *src1, int n) @@ -266,7 +268,7 @@ inverse_f32_sse (float *dest, float *src1, int n) *dest++ = 1.0 / *src1++; } } -OIL_DEFINE_IMPL_FULL (inverse_f32_sse, inverse_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (inverse_f32_sse, inverse_f32, OIL_IMPL_FLAG_SSE); static void negative_f32_sse (float *dest, float *src1, int n) @@ -288,7 +290,7 @@ negative_f32_sse (float *dest, float *src1, int n) *dest++ = -(*src1++); } } -OIL_DEFINE_IMPL_FULL (negative_f32_sse, negative_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (negative_f32_sse, negative_f32, OIL_IMPL_FLAG_SSE); static void scalaradd_f32_ns_sse (float *dest, float *src1, float *val, int n) @@ -312,7 +314,7 @@ scalaradd_f32_ns_sse (float *dest, float *src1, float *val, int n) *dest++ = *src1++ + *val; } } -OIL_DEFINE_IMPL_FULL (scalaradd_f32_ns_sse, scalaradd_f32_ns, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (scalaradd_f32_ns_sse, scalaradd_f32_ns, OIL_IMPL_FLAG_SSE); static void scalarmultiply_f32_ns_sse (float *dest, float *src1, float *val, int n) @@ -336,7 +338,7 @@ scalarmultiply_f32_ns_sse (float *dest, float *src1, float *val, int n) *dest++ = *src1++ * *val; } } -OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (scalarmultiply_f32_ns_sse, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE); static void scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n) @@ -360,5 +362,5 @@ scalarmultiply_f64_ns_sse2 (double *dest, double *src1, double *val, int n) *dest++ = *src1++ * *val; } } -OIL_DEFINE_IMPL_FULL (scalarmultiply_f64_ns_sse2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (scalarmultiply_f64_ns_sse2, scalarmultiply_f64_ns, OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/math_sse_unroll2.c b/liboil/sse/math_sse_unroll2.c index 51dca09..61b414b 100644 --- a/liboil/sse/math_sse_unroll2.c +++ b/liboil/sse/math_sse_unroll2.c @@ -32,6 +32,8 @@ #include #include +#include "sse_wrapper.h" + static void add_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) { @@ -57,7 +59,7 @@ add_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ + *src2++; } } -OIL_DEFINE_IMPL_FULL (add_f32_sse_unroll2, add_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (add_f32_sse_unroll2, add_f32, OIL_IMPL_FLAG_SSE); static void subtract_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) @@ -84,7 +86,7 @@ subtract_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ - *src2++; } } -OIL_DEFINE_IMPL_FULL (subtract_f32_sse_unroll2, subtract_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (subtract_f32_sse_unroll2, subtract_f32, OIL_IMPL_FLAG_SSE); static void multiply_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) @@ -111,7 +113,7 @@ multiply_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ * *src2++; } } -OIL_DEFINE_IMPL_FULL (multiply_f32_sse_unroll2, multiply_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (multiply_f32_sse_unroll2, multiply_f32, OIL_IMPL_FLAG_SSE); static void divide_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) @@ -138,7 +140,7 @@ divide_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) *dest++ = *src1++ / *src2++; } } -OIL_DEFINE_IMPL_FULL (divide_f32_sse_unroll2, divide_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (divide_f32_sse_unroll2, divide_f32, OIL_IMPL_FLAG_SSE); static void minimum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) @@ -169,7 +171,7 @@ minimum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) src2++; } } -OIL_DEFINE_IMPL_FULL (minimum_f32_sse_unroll2, minimum_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (minimum_f32_sse_unroll2, minimum_f32, OIL_IMPL_FLAG_SSE); static void maximum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) @@ -200,7 +202,7 @@ maximum_f32_sse_unroll2 (float *dest, float *src1, float *src2, int n) src2++; } } -OIL_DEFINE_IMPL_FULL (maximum_f32_sse_unroll2, maximum_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (maximum_f32_sse_unroll2, maximum_f32, OIL_IMPL_FLAG_SSE); static void inverse_f32_sse_unroll2 (float *dest, float *src1, int n) @@ -229,7 +231,7 @@ inverse_f32_sse_unroll2 (float *dest, float *src1, int n) *dest++ = 1.0 / *src1++; } } -OIL_DEFINE_IMPL_FULL (inverse_f32_sse_unroll2, inverse_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (inverse_f32_sse_unroll2, inverse_f32, OIL_IMPL_FLAG_SSE); static void negative_f32_sse_unroll2 (float *dest, float *src1, int n) @@ -255,7 +257,7 @@ negative_f32_sse_unroll2 (float *dest, float *src1, int n) *dest++ = -(*src1++); } } -OIL_DEFINE_IMPL_FULL (negative_f32_sse_unroll2, negative_f32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (negative_f32_sse_unroll2, negative_f32, OIL_IMPL_FLAG_SSE); static void scalaradd_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n) @@ -282,7 +284,7 @@ scalaradd_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n) *dest++ = *src1++ + *val; } } -OIL_DEFINE_IMPL_FULL (scalaradd_f32_ns_sse_unroll2, scalaradd_f32_ns, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (scalaradd_f32_ns_sse_unroll2, scalaradd_f32_ns, OIL_IMPL_FLAG_SSE); static void scalarmultiply_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n) @@ -309,7 +311,7 @@ scalarmultiply_f32_ns_sse_unroll2 (float *dest, float *src1, float *val, int n) *dest++ = *src1++ * *val; } } -OIL_DEFINE_IMPL_FULL (scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL_WRAPPER (scalarmultiply_f32_ns_sse_unroll2, scalarmultiply_f32_ns, OIL_IMPL_FLAG_SSE); static void scalarmultiply_f64_ns_sse2_unroll2 (double *dest, double *src1, double *val, int n) diff --git a/liboil/sse/multsum_sse.c b/liboil/sse/multsum_sse.c index 37238a5..c41bffc 100644 --- a/liboil/sse/multsum_sse.c +++ b/liboil/sse/multsum_sse.c @@ -5,6 +5,8 @@ #include #include +#include "sse_wrapper.h" + #define MULTSUM_SSE2_NSTRIDED(i) { \ t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \ t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \ @@ -63,6 +65,6 @@ multsum_f64_sse2_unroll4(double *dest, *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double)); } } -OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2); #endif diff --git a/liboil/sse/splat_sse.c b/liboil/sse/splat_sse.c index 14593a6..6231293 100644 --- a/liboil/sse/splat_sse.c +++ b/liboil/sse/splat_sse.c @@ -31,6 +31,8 @@ #include #include +#include "sse_wrapper.h" + static void splat_u32_ns_sse (uint32_t *dest, const uint32_t *param, int n) { @@ -49,7 +51,7 @@ splat_u32_ns_sse (uint32_t *dest, const uint32_t *param, int n) *dest++ = *param; } } -OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse, splat_u32_ns, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (splat_u32_ns_sse, splat_u32_ns, OIL_IMPL_FLAG_SSE2); static void splat_u32_ns_sse_unroll2 (uint32_t *dest, const uint32_t *param, int n) @@ -74,7 +76,7 @@ splat_u32_ns_sse_unroll2 (uint32_t *dest, const uint32_t *param, int n) *dest++ = *param; } } -OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse_unroll2, splat_u32_ns, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (splat_u32_ns_sse_unroll2, splat_u32_ns, OIL_IMPL_FLAG_SSE2); static void splat_u8_ns_sse (uint8_t *dest, const uint8_t *param, int n) @@ -94,7 +96,7 @@ splat_u8_ns_sse (uint8_t *dest, const uint8_t *param, int n) *dest++ = *param; } } -OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse, splat_u8_ns, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (splat_u8_ns_sse, splat_u8_ns, OIL_IMPL_FLAG_SSE2); static void splat_u8_ns_sse_unroll2 (uint8_t *dest, const uint8_t *param, int n) @@ -119,4 +121,4 @@ splat_u8_ns_sse_unroll2 (uint8_t *dest, const uint8_t *param, int n) *dest++ = *param; } } -OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse_unroll2, splat_u8_ns, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER (splat_u8_ns_sse_unroll2, splat_u8_ns, OIL_IMPL_FLAG_SSE2);