From 18ecf4d377954bb99ed9c5ca2f7b450cfdf2cd5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 3 Nov 2015 18:16:29 +0100 Subject: [PATCH] Add llvm.ldexp.* intrinsic, associated SDNode and library calls This allows the LibCallSimplifier to consistently produce only intrinsics when optimizing an existing exp2 intrinsic. For targets that do not support FLDEXP natively (i.e. most of them), legalization can Expand it to the open coded SINT_TO_FP + FEXP2 + FMUL or convert it to the corresponding LibCall. This has been turned on conservatively. This fixes a bug where instcombine incorrectly generated a library call for the AMDGPU target. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92709 --- docs/LangRef.rst | 36 +++++++++++++++++++++ include/llvm/Analysis/TargetLibraryInfo.h | 1 + include/llvm/CodeGen/ISDOpcodes.h | 3 ++ include/llvm/CodeGen/RuntimeLibcalls.h | 5 +++ include/llvm/IR/Intrinsics.td | 1 + include/llvm/Target/TargetSelectionDAG.td | 4 +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 16 ++++++++++ lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 ++-- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 1 + lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 20 ++++++------ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 32 +++++++++++++++++++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 1 + lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 + lib/CodeGen/TargetLoweringBase.cpp | 12 +++++++ lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 ++-- lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 - lib/Target/AMDGPU/AMDGPUInstrInfo.td | 2 -- lib/Target/AMDGPU/SIInstructions.td | 4 +-- lib/Target/ARM/ARMISelLowering.cpp | 6 ++++ lib/Target/Hexagon/HexagonISelLowering.cpp | 4 +-- lib/Target/Mips/MipsISelLowering.cpp | 2 ++ lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 3 +- lib/Target/X86/X86ISelLowering.cpp | 11 +++++++ lib/Transforms/Utils/SimplifyLibCalls.cpp | 37 ++++++++++++++-------- test/CodeGen/AMDGPU/llvm.ldexp.ll | 26 +++++++++++++++ test/CodeGen/X86/ldexp.ll | 30 ++++++++++++++++++ test/Transforms/InstCombine/exp2-1.ll | 8 ++--- 29 files changed, 248 insertions(+), 43 deletions(-) create mode 100644 test/CodeGen/AMDGPU/llvm.ldexp.ll create mode 100644 test/CodeGen/X86/ldexp.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 388bb6b..18c9c89 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -9852,6 +9852,42 @@ Semantics: This function returns the same values as the libm ``exp2`` functions would, and handles error conditions in the same way. +'``llvm.ldexp.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.ldexp`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.ldexp.f32(float %Val, i32 %Exp) + declare double @llvm.ldexp.f64(double %Val, i32 %Exp) + declare x86_fp80 @llvm.ldexp.f80(x86_fp80 %Val, i32 %Exp) + declare fp128 @llvm.ldexp.f128(fp128 %Val, i32 %Exp) + declare ppc_fp128 @llvm.ldexp.ppcf128(ppc_fp128 %Val, i32 %Exp) + +Overview: +""""""""" + +The '``llvm.ldexp.*``' intrinsics perform the ldexp function. + +Arguments: +"""""""""" + +The first argument and the return value are floating point numbers of the same +type. The second argument is an integer. + +Semantics: +"""""""""" + +This function multiplies the first argument by 2 raised to the second argument's +power, thus returning the same values as the libm ``ldexp`` functions +would, and handles error conditions in the same way. + '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 7becdf0..9ff10ea 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -233,6 +233,7 @@ public: case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: + case LibFunc::ldexp: case LibFunc::ldexpf: case LibFunc::ldexpl: case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy: case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen: case LibFunc::memchr: diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index c28802c..6261520 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -524,6 +524,9 @@ namespace ISD { /// when a single input is NaN, NaN is returned. FMINNAN, FMAXNAN, + /// FLDEXP - ldexp from libm (op0 * 2**op1). + FLDEXP, + /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 2be5de6..791f508 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -148,6 +148,11 @@ namespace RTLIB { EXP2_F80, EXP2_F128, EXP2_PPCF128, + LDEXP_F32, + LDEXP_F64, + LDEXP_F80, + LDEXP_F128, + LDEXP_PPCF128, SIN_F32, SIN_F64, SIN_F80, diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index fa43eb6..fa8b25d 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -360,6 +360,7 @@ let Properties = [IntrNoMem] in { def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_ldexp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_minnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 7a73a0f..0b33339 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -148,6 +148,9 @@ def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int SDTCisInt<0>, SDTCisFP<1> ]>; +def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2> +]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> @@ -434,6 +437,7 @@ def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; def frnd : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; +def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>; def fround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>; def fextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 80cd40e..f735bbc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3244,6 +3244,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + + case ISD::FLDEXP: { + EVT VT = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, VT, Node->getOperand(1)); + Tmp2 = DAG.getNode(ISD::FEXP2, dl, VT, Tmp1); + Tmp3 = DAG.getNode(ISD::FMUL, dl, VT, Node->getOperand(0), Tmp2); + Results.push_back(Tmp3); + break; + } + case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -3964,6 +3974,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128)); break; + case ISD::FLDEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, + RTLIB::LDEXP_F80, RTLIB::LDEXP_F128, + RTLIB::LDEXP_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -4266,6 +4281,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::FCOPYSIGN: + case ISD::FLDEXP: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = Node->getOperand(1); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 5d8e46b..aa6976d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -559,13 +559,13 @@ private: SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); + SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); - SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); @@ -608,13 +608,13 @@ private: void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExpOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); @@ -685,7 +685,7 @@ private: SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); - SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f98b801..9be09f4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -306,6 +306,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: + case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: case ISD::FLOG: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index dffcaaa..32218ca 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -55,7 +55,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; - case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::FLDEXP: + case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; @@ -198,10 +199,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { NewVT, Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, SDLoc(N), - Op.getValueType(), Op, N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, + N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -595,7 +596,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; - case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FLDEXP: + case ISD::FPOWI: SplitVecRes_ExpOp(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; @@ -870,8 +872,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, false, false, false, MinAlign(Alignment, IncrementSize)); } -void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_ExpOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); @@ -2004,8 +2005,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_FCOPYSIGN(N); break; + case ISD::FLDEXP: case ISD::FPOWI: - Res = WidenVecRes_POWI(N); + Res = WidenVecRes_ExpOp(N); break; case ISD::SHL: @@ -2310,7 +2312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } -SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8d04f47..08c64b4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4208,6 +4208,12 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +/// ExpandLdExp - Expand a llvm.ldexp intrinsic. +static SDValue ExpandLdExp(SDLoc DL, SDValue Op1, SDValue Op2, + SelectionDAG &DAG) { + return DAG.getNode(ISD::FLDEXP, DL, Op1.getValueType(), Op1, Op2); +} + // getUnderlyingArgReg - Find underlying register used for a truncated or // bitcasted argument. static unsigned getUnderlyingArgReg(const SDValue &N) { @@ -4740,6 +4746,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return nullptr; + case Intrinsic::ldexp: + setValue(&I, ExpandLdExp(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; @@ -5738,6 +5748,22 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, return true; } +/// visitLdExpCall - If a call instruction fits a ldexp call (as expected), +/// translate it to an SDNode with opcode FLDEXP and return true. +bool SelectionDAGBuilder::visitLdExpCall(const CallInst &I) { + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + !I.getArgOperand(1)->getType()->isIntegerTy() || + I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(ISD::FLDEXP, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa(I.getCalledValue())) { @@ -5875,6 +5901,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; + case LibFunc::ldexp: + case LibFunc::ldexpf: + case LibFunc::ldexpl: + if (visitLdExpCall(I)) + return; + break; case LibFunc::memcmp: if (visitMemCmpCall(I)) return; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 321e18d..38a2a22 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -838,6 +838,7 @@ private: bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitLdExpCall(const CallInst &I); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 8034634..25e9b6b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -207,6 +207,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; + case ISD::FLDEXP: return "fldexp"; case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SELECT: return "select"; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index ece2e39..f683828 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -166,6 +166,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::EXP2_F80] = "exp2l"; Names[RTLIB::EXP2_F128] = "exp2l"; Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::LDEXP_F32] = "ldexpf"; + Names[RTLIB::LDEXP_F64] = "ldexp"; + Names[RTLIB::LDEXP_F80] = "ldexpl"; + Names[RTLIB::LDEXP_F128] = "ldexpl"; + Names[RTLIB::LDEXP_PPCF128] = "ldexpl"; Names[RTLIB::SIN_F32] = "sinf"; Names[RTLIB::SIN_F64] = "sin"; Names[RTLIB::SIN_F80] = "sinl"; @@ -433,6 +438,13 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; } + + if (TT.isOSWindows() && !TT.isOSCygMing()) { + Names[RTLIB::LDEXP_F32] = nullptr; + Names[RTLIB::LDEXP_F80] = nullptr; + Names[RTLIB::LDEXP_F128] = nullptr; + Names[RTLIB::LDEXP_PPCF128] = nullptr; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 428624c..a6b5d77 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -269,6 +269,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); @@ -292,6 +294,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FLDEXP, MVT::f16, Promote); setOperationAction(ISD::FRINT, MVT::f16, Promote); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); @@ -336,6 +339,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FPOW, MVT::v4f16, Expand); setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); @@ -368,6 +372,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); + setOperationAction(ISD::FLDEXP, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); @@ -648,6 +653,7 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand); setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand); setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FLDEXP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand); setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index af9fcbd..c66ec36 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -959,8 +959,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::AMDGPU_ldexp: - return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), + Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imax: return DAG.getNode(ISD::SMAX, DL, VT, Op.getOperand(1), @@ -2158,7 +2158,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); - SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi, + SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi, DAG.getConstant(32, SL, MVT::i32)); // TODO: Should this propagate fast-math-flags? return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); @@ -2678,7 +2678,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(RSQ) NODE_NAME_CASE(RSQ_LEGACY) NODE_NAME_CASE(RSQ_CLAMPED) - NODE_NAME_CASE(LDEXP) NODE_NAME_CASE(FP_CLASS) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(CARRY) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 1e060c4..a69cef1 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -254,7 +254,6 @@ enum NodeType : unsigned { RSQ, RSQ_LEGACY, RSQ_CLAMPED, - LDEXP, FP_CLASS, DOT4, CARRY, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index b413897..4b59ec3 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -65,8 +65,6 @@ def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; // out = 1.0 / sqrt(a) result clamped to +/- max_float. def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; -def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; - def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index ed75b4d..5f4f146 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1598,7 +1598,7 @@ defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32 VOP_I32_I32_I32 >; defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", - VOP_F32_F32_I32, AMDGPUldexp + VOP_F32_F32_I32, fldexp >; defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst , "v_cvt_pkaccum_u8_f32", @@ -1749,7 +1749,7 @@ defm V_MAX_F64 : VOP3Inst , "v_max_f64", } // isCommutable = 1 defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", - VOP_F64_F64_I32, AMDGPUldexp + VOP_F64_F64_I32, fldexp >; } // let SchedRW = [WriteDoubleAdd] diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 13f5285..01645e5 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -482,6 +482,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::v2f64, Expand); setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); @@ -500,6 +501,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f32, Expand); setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); @@ -517,6 +519,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::v2f32, Expand); setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); @@ -641,6 +644,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); @@ -924,6 +928,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f64, Expand); if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::f64, Expand); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 4ebc596..1aa4cc8 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1433,7 +1433,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, for (unsigned FPExpOp : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, - ISD::FPOW, ISD::FCOPYSIGN}) { + ISD::FPOW, ISD::FLDEXP, ISD::FCOPYSIGN}) { setOperationAction(FPExpOp, MVT::f32, Expand); setOperationAction(FPExpOp, MVT::f64, Expand); } @@ -1495,7 +1495,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, - ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, + ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP, // Misc: ISD::SELECT, ISD::ConstantPool, // Vector: diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b88af5a..eff9ef3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -362,6 +362,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f4919dd..fd07eb9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -165,12 +165,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); @@ -455,6 +457,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -687,6 +690,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FCOS , MVT::v4f64, Expand); setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); + setOperationAction(ISD::FLDEXP, MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); @@ -733,6 +737,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FCOS , MVT::v4f32, Expand); setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); + setOperationAction(ISD::FLDEXP , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 1c7d86b..478bdbe 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -125,7 +125,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, + ISD::FLDEXP}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4240aaa..117d4d5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -654,6 +654,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); + setOperationAction(ISD::FLDEXP , MVT::f32 , LibCall); + setOperationAction(ISD::FLDEXP , MVT::f64 , LibCall); + setOperationAction(ISD::FLDEXP , MVT::f80 , LibCall); + + // These are not available on some Windows configurations + if (!getLibcallName(RTLIB::LDEXP_F32)) { + setOperationAction(ISD::FLDEXP, MVT::f32, Expand); + setOperationAction(ISD::FLDEXP, MVT::f80, Expand); + } + setOperationAction(ISD::FLOG, MVT::f80, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); setOperationAction(ISD::FLOG10, MVT::f80, Expand); @@ -692,6 +702,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FLDEXP, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 14a0a57..aff54eb 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1185,13 +1185,19 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + bool TryLdExp; LibFunc::Func LdExp = LibFunc::ldexpl; - if (Op->getType()->isFloatTy()) - LdExp = LibFunc::ldexpf; - else if (Op->getType()->isDoubleTy()) - LdExp = LibFunc::ldexp; + if (Callee->isIntrinsic()) { + TryLdExp = true; + } else { + if (Op->getType()->isFloatTy()) + LdExp = LibFunc::ldexpf; + else if (Op->getType()->isDoubleTy()) + LdExp = LibFunc::ldexp; + TryLdExp = TLI->has(LdExp); + } - if (TLI->has(LdExp)) { + if (TryLdExp) { Value *LdExpArg = nullptr; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) @@ -1207,14 +1213,19 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) { One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = Caller->getParent(); - Value *Callee = - M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), - Op->getType(), B.getInt32Ty(), nullptr); - CallInst *CI = B.CreateCall(Callee, {One, LdExpArg}); - if (const Function *F = dyn_cast(Callee->stripPointerCasts())) - CI->setCallingConv(F->getCallingConv()); - - return CI; + if (Callee->isIntrinsic()) { + Function *F = + Intrinsic::getDeclaration(M, Intrinsic::ldexp, Op->getType()); + return B.CreateCall(F, {One, LdExpArg}); + } else { + Value *Callee = + M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(), + Op->getType(), B.getInt32Ty(), nullptr); + CallInst *CI = B.CreateCall(Callee, {One, LdExpArg}); + if (const Function *F = dyn_cast(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; + } } } return Ret; diff --git a/test/CodeGen/AMDGPU/llvm.ldexp.ll b/test/CodeGen/AMDGPU/llvm.ldexp.ll new file mode 100644 index 0000000..e5f1c9a --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.ldexp.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}test_ldexp_f32: +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { + %result = call float @llvm.ldexp.f32(float %a, i32 %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_ldexp_f64: +; SI: v_ldexp_f64 +; SI: s_endpgm +define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { + %result = call double @llvm.ldexp.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +declare float @llvm.ldexp.f32(float, i32) #1 +declare double @llvm.ldexp.f64(double, i32) #1 + +attributes #1 = { nounwind readnone } + diff --git a/test/CodeGen/X86/ldexp.ll b/test/CodeGen/X86/ldexp.ll new file mode 100644 index 0000000..710ea4a --- /dev/null +++ b/test/CodeGen/X86/ldexp.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN + +; CHECK-LABEL: ldexp_f32: +; CHECK-WIN-LABEL: ldexp_f32: +; CHECK: jmp ldexpf +; CHECK-WIN-NOT: ldexpf +define float @ldexp_f32(i8 zeroext %x) { + %1 = zext i8 %x to i32 + %2 = call float @llvm.ldexp.f32(float 1.000000e+00, i32 %1) + ret float %2 +} + +; CHECK-LABEL: ldexp_f64: +; CHECK-WIN-LABEL: ldexp_f64: +; CHECK: jmp ldexp +; CHECK-WIN: calll _ldexp +define double @ldexp_f64(i8 zeroext %x) { + %1 = zext i8 %x to i32 + %2 = call double @llvm.ldexp.f64(double 1.000000e+00, i32 %1) + ret double %2 +} + +; Function Attrs: nounwind readnone +declare double @llvm.ldexp.f64(double, i32) #0 + +; Function Attrs: nounwind readnone +declare float @llvm.ldexp.f32(float, i32) #0 + +attributes #0 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll index 8e6a0e0..30a2a02 100644 --- a/test/Transforms/InstCombine/exp2-1.ll +++ b/test/Transforms/InstCombine/exp2-1.ll @@ -1,7 +1,6 @@ ; Test that the exp2 library call simplifier works correctly. ; ; RUN: opt < %s -instcombine -S | FileCheck %s -; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -81,11 +80,9 @@ declare float @llvm.exp2.f32(float) define double @test_simplify9(i8 zeroext %x) { ; CHECK-LABEL: @test_simplify9( -; CHECK-WIN-LABEL: @test_simplify9( %conv = uitofp i8 %x to double %ret = call double @llvm.exp2.f64(double %conv) -; CHECK: call double @ldexp -; CHECK-WIN: call double @ldexp +; CHECK: call double @llvm.ldexp.f64 ret double %ret } @@ -94,7 +91,6 @@ define float @test_simplify10(i8 zeroext %x) { ; CHECK-WIN-LABEL: @test_simplify10( %conv = uitofp i8 %x to float %ret = call float @llvm.exp2.f32(float %conv) -; CHECK: call float @ldexpf -; CHECK-WIN-NOT: call float @ldexpf +; CHECK: call float @llvm.ldexp.f32 ret float %ret } -- 2.5.0