From 33da5ade7127b0efbcff193f52a1e5bc53f82f14 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 24 Feb 2014 12:41:48 -0500 Subject: [PATCH] R600/SI: Only create one instruction when spilling/restoring register The register spiller assumes that only one new instruction is created when spilling and restoring registers, so we need to emit pseudo instructions for vector register spills and lower them after register allocation. --- lib/Target/R600/SIInstrInfo.cpp | 134 +++++++++++++++++++++++++----- lib/Target/R600/SIInstrInfo.h | 2 + lib/Target/R600/SIInstructions.td | 21 +++++ lib/Target/R600/SIMachineFunctionInfo.cpp | 14 ++-- lib/Target/R600/SIMachineFunctionInfo.h | 6 +- lib/Target/R600/SIRegisterInfo.cpp | 7 ++ lib/Target/R600/SIRegisterInfo.h | 6 ++ 7 files changed, 161 insertions(+), 29 deletions(-) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index a239fb9..57ef4d3 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -198,21 +198,36 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned KillFlag = isKill ? RegState::Kill : 0; if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { - unsigned Lane = MFI->SpillTracker.getNextLane(MRI); - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), - MFI->SpillTracker.LaneVGPR) + unsigned Lane = MFI->SpillTracker.reserveLanes(MRI); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR) .addReg(SrcReg, KillFlag) .addImm(Lane); - MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, - Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane); } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) - .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); - storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); + // We are only allowed to create one new instruction when spilling + // registers, so we need to use pseudo instruction for vector + // registers. + // + // Reserve a spot in the spill tracker for each sub-register of + // the vector register. + unsigned NumSubRegs = RC->getSize() / 4; + unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, NumSubRegs); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, + FirstLane); + + unsigned Opcode; + switch (RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; + default: llvm_unreachable("Cannot spill register class"); } + + BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg) + .addImm(FrameIndex); } } @@ -225,28 +240,101 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); DebugLoc DL = MBB.findDebugLoc(MI); if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { - SIMachineFunctionInfo::SpilledReg Spill = + SIMachineFunctionInfo::SpilledReg Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); assert(Spill.VGPR); BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned Flags = RegState::Define; - if (i == 0) { - Flags |= RegState::Undef; - } - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); - BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) - .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) - .addReg(SubReg); + unsigned Opcode; + switch(RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; + default: llvm_unreachable("Cannot spill register class"); } + + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addReg(Spill.VGPR) + .addImm(FrameIndex); } } +bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + unsigned NumSubRegs = 1; + SIMachineFunctionInfo *MFI = + MI->getParent()->getParent()->getInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + switch (MI->getOpcode()) { + default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); + + // SGPR register spill + case AMDGPU::SI_SPILL_S512_SAVE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 4 + // times. (1 << 4) = 16 + case AMDGPU::SI_SPILL_S256_SAVE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 3 + // times. (1 << 3) = 8 + case AMDGPU::SI_SPILL_S128_SAVE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 2 + // times. (1 << 2) = 4 + case AMDGPU::SI_SPILL_S64_SAVE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 1 + // time. (1 << 1) = 2 + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned FrameIndex = MI->getOperand(2).getImm(); + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), + MI->getOperand(0).getReg()) + .addReg(SubReg) + .addImm(Spill.Lane + i); + } + MI->eraseFromParent(); + break; + + // SGPR register restore + case AMDGPU::SI_SPILL_S512_RESTORE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 4 + // times. (1 << 4) = 16 + case AMDGPU::SI_SPILL_S256_RESTORE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 3 + // times. (1 << 3) = 8 + case AMDGPU::SI_SPILL_S128_RESTORE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 2 + // times. (1 << 2) = 4 + case AMDGPU::SI_SPILL_S64_RESTORE: + NumSubRegs <<= 1; // With fall through NumSubRegs will be left-shifted 1 + // time. (1 << 1) = 2 + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned FrameIndex = MI->getOperand(2).getImm(); + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(MI->getOperand(1).getReg()) + .addImm(Spill.Lane + i); + } + MI->eraseFromParent(); + break; + } + + return true; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index de3ff07..c83d090 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -55,6 +55,8 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + unsigned commuteOpcode(unsigned Opcode) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 9da05c3..7f69b76 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1413,6 +1413,27 @@ def V_SUB_F64 : InstSI < } // end usesCustomInserter +multiclass SI_SPILL_SGPR { + + def _SAVE : InstSI < + (outs VReg_32:$dst), + (ins sgpr_class:$src, i32imm:$frame_idx), + "", [] + >; + + def _RESTORE : InstSI < + (outs sgpr_class:$dst), + (ins VReg_32:$src, i32imm:$frame_idx), + "", [] + >; + +} + +defm SI_SPILL_S64 : SI_SPILL_SGPR ; +defm SI_SPILL_S128 : SI_SPILL_SGPR ; +defm SI_SPILL_S256 : SI_SPILL_SGPR ; +defm SI_SPILL_S512 : SI_SPILL_SGPR ; + } // end IsCodeGenOnly, isPseudo def : Pat< diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index ea04346..4f9fed1 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -30,17 +30,21 @@ static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); } -unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { +unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes( + MachineRegisterInfo &MRI, unsigned NumRegs) { + unsigned StartLane = CurrentLane; if (!LaneVGPR) { LaneVGPR = createLaneVGPR(MRI); } else { - CurrentLane++; - if (CurrentLane == MAX_LANES) { - CurrentLane = 0; + CurrentLane += NumRegs; + if (CurrentLane >= MAX_LANES) { + StartLane = CurrentLane = 0; LaneVGPR = createLaneVGPR(MRI); + } else { + StartLane++; } } - return CurrentLane; + return StartLane; } void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 8dc82a0..f78b1f3 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -43,7 +43,11 @@ public: public: unsigned LaneVGPR; RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } - unsigned getNextLane(MachineRegisterInfo &MRI); + /// \p NumRegs The number of consecutive registers what need to be spilled. + /// This function will ensure that all registers are stored in + /// the same VGPR. + /// \returns The lane to be used for storing the first register. + unsigned reserveLanes(MachineRegisterInfo &MRI, unsigned NumRegs = 1); void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); const SpilledReg& getSpilledReg(unsigned FrameIndex); bool programSpillsRegisters() { return !SpilledRegisters.empty(); } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index a784fa4..4fee135 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -129,3 +129,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( return &AMDGPU::VGPR_32RegClass; } } + +unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, + const TargetRegisterClass *SubRC, + unsigned Channel) const { + unsigned Index = getHWRegIndex(Reg); + return SubRC->getRegister(Index + Channel); +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 8148f7f..aa98275 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// be returned. const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const; + + /// \p Channel This is the register channel (e.g. a value from 0-16), not the + /// SubReg index. + /// \returns The sub-register of Reg that is in Channel. + unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, + unsigned Channel) const; }; } // End namespace llvm -- 1.8.1.5