--- xc/programs/Xserver/fb/fbmmx.c.orig	2005-03-01 19:01:35.401903000 -0500
+++ xc/programs/Xserver/fb/fbmmx.c	2005-03-01 19:01:39.330305792 -0500
@@ -29,6 +29,8 @@
 
 #ifdef RENDER
 
+#include <mmintrin.h>
+
 #include "picturestr.h"
 #include "mipict.h"
 #include "fbpict.h"
@@ -48,6 +50,10 @@
 #define CHECKPOINT()
 #endif
 
+#define mmx_and(a,b)	((Vector1x64)_mm_and_si64((__m64)(a),(__m64)(b)))
+#define mmx_or(a,b)	((Vector1x64)_mm_or_si64((__m64)(a),(__m64)(b)))
+#define mmx_xor(a,b)	((Vector1x64)_mm_xor_si64((__m64)(a),(__m64)(b)))
+
 typedef struct
 {
     ullong mmx_zero;
@@ -102,7 +108,7 @@
 static __inline__ Vector4x16
 negate (Vector4x16 mask)
 {
-    return (Vector4x16)__builtin_ia32_pxor (
+    return (Vector4x16)mmx_xor (
 	(Vector1x64)mask,
 	(Vector1x64)c.mmx_4x00ff);
 }
@@ -163,9 +169,9 @@
 
     t1 = shift ((Vector1x64)pixel, -48);
     t2 = shift (t1, 16);
-    t1 = __builtin_ia32_por (t1, t2);
+    t1 = mmx_or (t1, t2);
     t2 = shift (t1, 32);
-    t1 = __builtin_ia32_por (t1, t2);
+    t1 = mmx_or (t1, t2);
 
     return (Vector4x16)t1;
 }
@@ -178,9 +184,9 @@
     t1 = shift ((Vector1x64)pixel,  48);
     t1 = shift (t1, -48);
     t2 = shift (t1, 16);
-    t1 = __builtin_ia32_por (t1, t2);
+    t1 = mmx_or (t1, t2);
     t2 = shift (t1, 32);
-    t1 = __builtin_ia32_por (t1, t2);
+    t1 = mmx_or (t1, t2);
 
     return (Vector4x16)t1;
 }
@@ -192,15 +198,15 @@
 
     x = y = z = (Vector1x64)pixel;
 
-    x = __builtin_ia32_pand (x, (Vector1x64)c.mmx_ffff0000ffff0000);
-    y = __builtin_ia32_pand (y, (Vector1x64)c.mmx_000000000000ffff);
-    z = __builtin_ia32_pand (z, (Vector1x64)c.mmx_0000ffff00000000);
+    x = mmx_and (x, (Vector1x64)c.mmx_ffff0000ffff0000);
+    y = mmx_and (y, (Vector1x64)c.mmx_000000000000ffff);
+    z = mmx_and (z, (Vector1x64)c.mmx_0000ffff00000000);
 
     y = shift (y, 32);
     z = shift (z, -32);
 
-    x = __builtin_ia32_por (x, y);
-    x = __builtin_ia32_por (x, z);
+    x = mmx_or (x, y);
+    x = mmx_or (x, z);
 
     return (Vector4x16)x;
 }
@@ -234,7 +240,7 @@
 over_rev_non_pre (Vector4x16 src, Vector4x16 dest)
 {
     Vector4x16 srca = expand_alpha (src);
-    Vector4x16 srcfaaa = (Vector4x16)__builtin_ia32_por((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha);
+    Vector4x16 srcfaaa = (Vector4x16)mmx_or((Vector1x64)srca, (Vector1x64)c.mmx_full_alpha);
 
     return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
 }
@@ -300,9 +306,9 @@
     Vector1x64 t1 = shift (p, 36 - 11);
     Vector1x64 t2 = shift (p, 16 - 5);
     
-    p = __builtin_ia32_por (t1, p);
-    p = __builtin_ia32_por (t2, p);
-    p = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_rgb);
+    p = mmx_or (t1, p);
+    p = mmx_or (t2, p);
+    p = mmx_and (p, (Vector1x64)c.mmx_565_rgb);
     
     pixel = __builtin_ia32_pmullw ((Vector4x16)p, (Vector4x16)c.mmx_565_unpack_multiplier);
     return __builtin_ia32_psrlw (pixel, 8);
@@ -324,27 +330,27 @@
     Vector1x64 t = (Vector1x64)target;
     Vector1x64 r, g, b;
     
-    r = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_r);
-    g = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_g);
-    b = __builtin_ia32_pand (p, (Vector1x64)c.mmx_565_b);
+    r = mmx_and (p, (Vector1x64)c.mmx_565_r);
+    g = mmx_and (p, (Vector1x64)c.mmx_565_g);
+    b = mmx_and (p, (Vector1x64)c.mmx_565_b);
     
     r = shift (r, - (32 - 8) + pos * 16);
     g = shift (g, - (16 - 3) + pos * 16);
     b = shift (b, - (0  + 3) + pos * 16);
 
     if (pos == 0)
-	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_0);
+	t = mmx_and (t, (Vector1x64)c.mmx_mask_0);
     else if (pos == 1)
-	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_1);
+	t = mmx_and (t, (Vector1x64)c.mmx_mask_1);
     else if (pos == 2)
-	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_2);
+	t = mmx_and (t, (Vector1x64)c.mmx_mask_2);
     else if (pos == 3)
-	t = __builtin_ia32_pand (t, (Vector1x64)c.mmx_mask_3);
+	t = mmx_and (t, (Vector1x64)c.mmx_mask_3);
     
-    p = __builtin_ia32_por (r, t);
-    p = __builtin_ia32_por (g, p);
+    p = mmx_or (r, t);
+    p = mmx_or (g, p);
     
-    return (Vector4x16)__builtin_ia32_por (b, p);
+    return (Vector4x16)mmx_or (b, p);
 }
 
 static __inline__ void