diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index d930f09..d565996 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1648,10 +1648,62 @@ lp_build_floor(struct lp_build_context *bld, return lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); } else { - LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + const struct lp_type type = bld->type; LLVMValueRef res; - res = lp_build_ifloor(bld, a); - res = LLVMBuildSIToFP(builder, res, vec_type, ""); + LLVMTypeRef int_vec_type = bld->int_vec_type; + LLVMTypeRef vec_type = bld->vec_type; + + if (type.sign) { + struct lp_type inttype; + struct lp_build_context intbld; + LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); + LLVMValueRef trunc, truncminusone, mask, anosign; + + assert(type.floating); + assert(type.width == 32); /* might want to handle doubles at some point */ + assert(lp_check_value(type, a)); + + inttype = type; + inttype.floating = 0; + lp_build_context_init(&intbld, bld->gallivm, inttype); + + /* round by truncation */ + trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); + trunc = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc"); + + /* + * fix values if rounding is wrong (for non-special cases) + * - this is the case if trunc > a + */ + mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, trunc, a); + /* + * instead of sub/select could do + * resint = add(itrunc, mask) (mask is minus one / zero) + * res = SiToFP(resint) + */ + truncminusone = lp_build_sub(bld, trunc, bld->one); + res = lp_build_select(bld, mask, truncminusone, trunc); + + /* mask out sign bit */ + anosign = lp_build_abs(bld, a); + /* + * mask out all values if anosign > 2^24 + * This should work both for large ints (floor is no-op for them because + * such floats are always exact) as well as special cases like NaNs, Infs + * (taking advantage of the fact they use max exponent). + * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) + */ + anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); + cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); + mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); + res = lp_build_select(bld, mask, a, res); + } + else { + /* round by truncation */ + res = LLVMBuildFPToSI(builder, a, int_vec_type, ""); + res = LLVMBuildSIToFP(builder, res, vec_type, "floor.trunc"); + + } return res; } } @@ -1826,32 +1878,30 @@ lp_build_ifloor(struct lp_build_context *bld, res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); } else { - /* Take the sign bit and add it to 1 constant */ - LLVMTypeRef vec_type = bld->vec_type; - unsigned mantissa = lp_mantissa(type); - LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, - (unsigned long long)1 << (type.width - 1)); - LLVMValueRef sign; - LLVMValueRef offset; + struct lp_type inttype; + struct lp_build_context intbld; + LLVMValueRef trunc, itrunc, mask; - /* sign = a < 0 ? ~0 : 0 */ - sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); - sign = LLVMBuildAnd(builder, sign, mask, ""); - sign = LLVMBuildAShr(builder, sign, - lp_build_const_int_vec(bld->gallivm, type, - type.width - 1), - "ifloor.sign"); + assert(type.floating); + assert(lp_check_value(type, a)); - /* offset = -0.99999(9)f */ - offset = lp_build_const_vec(bld->gallivm, type, - -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); - offset = LLVMConstBitCast(offset, int_vec_type); + inttype = type; + inttype.floating = 0; + lp_build_context_init(&intbld, bld->gallivm, inttype); - /* offset = a < 0 ? offset : 0.0f */ - offset = LLVMBuildAnd(builder, offset, sign, ""); - offset = LLVMBuildBitCast(builder, offset, vec_type, "ifloor.offset"); + /* round by truncation */ + itrunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); + trunc = LLVMBuildSIToFP(builder, itrunc, bld->vec_type, "ifloor.trunc"); - res = LLVMBuildFAdd(builder, res, offset, "ifloor.res"); + /* + * fix values if rounding is wrong (for non-special cases) + * - this is the case if trunc > a + * The results of doing this with NaNs, very large values etc. + * are undefined but this seems to be the case anyway. + */ + mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, trunc, a); + /* cheapie minus one with mask since the mask is minus one / zero */ + return lp_build_add(&intbld, itrunc, mask); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 050eba7..d677dbb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -489,7 +489,7 @@ lp_build_init(void) gallivm_initialized = TRUE; -#if 0 +#if 1 /* For simulating less capable machines */ util_cpu_caps.has_sse3 = 0; util_cpu_caps.has_ssse3 = 0;