diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c
index bf405a5..f21156d 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -33,6 +33,7 @@
 #include "util/u_pointer.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_cpu_detect.h"
 
 #include "gallivm/lp_bld.h"
 #include "gallivm/lp_bld_debug.h"
@@ -374,10 +375,32 @@ test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test)
 
       test_func_jit(out, in);
       for (i = 0; i < num_vals; ++i) {
-         float ref = test->ref(in[i]);
+         float ref;
          double error, precision;
+         union fi val;
          bool pass;
 
+         val.f = in[i];
+
+         /*
+         * If we have a denorm manually set it to (+-)0.
+         * This is because the reference may or may not do the right thing
+         * otherwise because we want the result according to treating all
+         * denormals as zero (FTZ/DAZ). Not using fpclassify because
+         * a) some compilers are stuck at c89 (msvc)
+         * b) not sure it reliably works with non-standard ftz/daz mode
+         * And, right now we only disable denorms on x86/sse, so to get
+         * results which match only do it in this case here too.
+         */
+#if defined(PIPE_ARCH_SSE)
+         if (util_cpu_caps.has_sse) {
+            if ((val.ui & 0x7f800000) == 0) {
+               val.ui &= 0xff800000;
+            }
+         }
+#endif
+
+         ref = test->ref(val.f);
          if (util_inf_sign(ref) && util_inf_sign(out[i]) == util_inf_sign(ref)) {
             error = 0;
          } else {