From f66cc2be7b632b822a40c959eb1c6147be49a49c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 29 Aug 2013 11:43:01 -0700 Subject: [PATCH] XXX: WIP Register spilling v2 v2: - Fix encoding of Lane Mask - Use correct register flags, so we don't overwrite the low dword when restoring multi-dword registers. --- lib/Target/R600/AMDGPUInstrInfo.h | 20 ++++----- lib/Target/R600/SIInstrInfo.cpp | 74 +++++++++++++++++++++++++++++++ lib/Target/R600/SIInstrInfo.h | 12 +++++ lib/Target/R600/SIMachineFunctionInfo.cpp | 35 ++++++++++++++- lib/Target/R600/SIMachineFunctionInfo.h | 27 +++++++++++ lib/Target/R600/SIRegisterInfo.cpp | 26 +++++++++++ lib/Target/R600/SIRegisterInfo.h | 2 + 7 files changed, 185 insertions(+), 11 deletions(-) diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 306f467..0088fb6 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -77,16 +77,16 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const = 0; - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 356cf24..aa31d80 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" #include "SIDefines.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -182,6 +183,79 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } +void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + unsigned KillFlag = isKill ? RegState::Kill : 0; + + if (RC == &AMDGPU::SReg_32RegClass) { + if (RI.allUsesCopiedToVGPR(MRI, SrcReg)) { + unsigned VReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + BuildMI(MBB, MI, DL, get(AMDGPU::COPY), VReg) + .addReg(SrcReg, KillFlag); + MFI->SpillTracker.addSpilledReg(FrameIndex, VReg); + } else { + unsigned Lane = MFI->SpillTracker.getNextLane(MRI); + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32_e32), + MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg, KillFlag) + .addImm(Lane + 0x80); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, + Lane); + } + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) + .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); + storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + } + } +} + +void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + if (RC == &AMDGPU::SReg_32RegClass) { + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + assert(Spill.VGPR); + if (!Spill.hasLane()) { + BuildMI(MBB, MI, DL, get(AMDGPU::COPY), DestReg) + .addReg(Spill.VGPR); + } else { + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32_e32), DestReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane + 0x80); + } + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned Flags = RegState::Define; + if (i == 0) { + Flags |= RegState::Undef; + } + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) + .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) + .addReg(SubReg); + } + } +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 4ccd4ce..6a9fd07 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,18 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + unsigned commuteOpcode(unsigned Opcode) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index ee0e307..86c5c97 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,9 +10,42 @@ #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define MAX_LANES 64 using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - PSInputAddr(0) { } + PSInputAddr(0), + SpillTracker() { } + +static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { + return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); +} + +unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { + if (!LaneVGPR) { + LaneVGPR = createLaneVGPR(MRI); + } else { + CurrentLane++; + if (CurrentLane == MAX_LANES) { + CurrentLane = 0; + LaneVGPR = createLaneVGPR(MRI); + } + } + return CurrentLane; +} + +void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, + unsigned Reg, + int Lane) { + SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane); +} + +const SIMachineFunctionInfo::SpilledReg& +SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) { + return SpilledRegisters[FrameIndex]; +} diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 6da9f7f..1f91450 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -16,15 +16,42 @@ #define SIMACHINEFUNCTIONINFO_H_ #include "AMDGPUMachineFunction.h" +#include namespace llvm { +class MachineRegisterInfo; + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public AMDGPUMachineFunction { public: + + struct SpilledReg { + unsigned VGPR; + int Lane; + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } + SpilledReg() : VGPR(0), Lane(-1) { } + bool hasLane() { return Lane != -1;} + }; + + struct RegSpillTracker { + private: + unsigned CurrentLane; + std::map SpilledRegisters; + public: + unsigned LaneVGPR; + RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } + unsigned getNextLane(MachineRegisterInfo &MRI); + void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); + const SpilledReg& getSpilledReg(unsigned FrameIndex); + }; + + // SIMachineFunctionInfo definition + SIMachineFunctionInfo(const MachineFunction &MF); unsigned PSInputAddr; + struct RegSpillTracker SpillTracker; }; } // End namespace llvm diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 5d12564..2422521 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "SIRegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -81,3 +82,28 @@ bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const { RC == &AMDGPU::SReg_256RegClass || RC == &AMDGPU::SReg_512RegClass; } + +bool SIRegisterInfo::allUsesCopiedToVGPR(const MachineRegisterInfo &MRI, + unsigned Reg) const { + if (MRI.use_empty(Reg)) { + return false; + } + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg), + E = MachineRegisterInfo::use_end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() != AMDGPU::COPY) { + return false; + } + unsigned DstReg = MI.getOperand(0).getReg(); + // XXX: Copies to physical VGPR registers should be allowed. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) { + return false; + } + if (isSGPRClass(MRI.getRegClass(DstReg))) { + return false; + } + } + return true; +} + diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index ffc5797..9e0c602 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -48,6 +48,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \returns true if this class contains only SGPR registers bool isSGPRClass(const TargetRegisterClass *RC) const; + + bool allUsesCopiedToVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; }; } // End namespace llvm -- 1.8.1.5