From 15a09a85b634e2f6bff00453155a8dc210be8529 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 7 May 2015 13:30:19 -0700 Subject: [PATCH] R600/SI: Fix bug in VGPR spilling AMDGPU::SI_SPILL_V96_RESTORE was missing from a switch statement, which caused the srsrc and soffset register to not be set correctly. This commit replaces the switch statement with a SITargetInfo query to make sure all spill instructions are covered. --- lib/Target/R600/SIDefines.h | 3 +- lib/Target/R600/SIInstrFormats.td | 2 + lib/Target/R600/SIInstrInfo.h | 4 + lib/Target/R600/SIInstructions.td | 4 +- lib/Target/R600/SIPrepareScratchRegs.cpp | 132 ++++++++++++++----------------- 5 files changed, 69 insertions(+), 76 deletions(-) diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index b540140..4727d97 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -36,7 +36,8 @@ enum { DS = 1 << 17, MIMG = 1 << 18, FLAT = 1 << 19, - WQM = 1 << 20 + WQM = 1 << 20, + VGPRSpill = 1 << 21 }; } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index bc693c3..273f567 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -39,6 +39,7 @@ class InstSI pattern> : field bits<1> MIMG = 0; field bits<1> FLAT = 0; field bits<1> WQM = 0; + field bits<1> VGPRSpill = 0; // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; @@ -66,6 +67,7 @@ class InstSI pattern> : let TSFlags{18} = MIMG; let TSFlags{19} = FLAT; let TSFlags{20} = WQM; + let TSFlags{21} = VGPRSpill; // Most instructions require adjustments after selection to satisfy // operand requirements. diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 7e049dc..7732062 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -214,6 +214,10 @@ public: return get(Opcode).TSFlags & SIInstrFlags::WQM; } + bool isVGPRSpill(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; + } + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 838b645..8bbeb9a 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2048,7 +2048,7 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; multiclass SI_SPILL_VGPR { - let UseNamedOperandTable = 1 in { + let UseNamedOperandTable = 1, VGPRSpill = 1 in { def _SAVE : InstSI < (outs), (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, @@ -2061,7 +2061,7 @@ multiclass SI_SPILL_VGPR { (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; - } // End UseNamedOperandTable = 1 + } // End UseNamedOperandTable = 1, VGPRSpill = 1 } defm SI_SPILL_V32 : SI_SPILL_VGPR ; diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp index 0a57a5b..0a7f684 100644 --- a/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; RS.forward(I); DebugLoc DL = MI.getDebugLoc(); - switch(MI.getOpcode()) { - default: break; - case AMDGPU::SI_SPILL_V512_SAVE: - case AMDGPU::SI_SPILL_V256_SAVE: - case AMDGPU::SI_SPILL_V128_SAVE: - case AMDGPU::SI_SPILL_V96_SAVE: - case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: - case AMDGPU::SI_SPILL_V32_RESTORE: - case AMDGPU::SI_SPILL_V64_RESTORE: - case AMDGPU::SI_SPILL_V128_RESTORE: - case AMDGPU::SI_SPILL_V256_RESTORE: - case AMDGPU::SI_SPILL_V512_RESTORE: - - // Scratch resource - unsigned ScratchRsrcReg = - RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size - - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); - unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); - unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - - // Scratch Offset - if (ScratchOffsetReg == AMDGPU::NoRegister) { - ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), - ScratchOffsetReg) - .addFrameIndex(ScratchOffsetFI) - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - } else if (!MBB.isLiveIn(ScratchOffsetReg)) { - MBB.addLiveIn(ScratchOffsetReg); - } - - if (ScratchRsrcReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF.getFunction()->getContext(); - Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchRsrcReg = AMDGPU::SGPR0; - ScratchOffsetReg = AMDGPU::SGPR0; - } - MI.getOperand(2).setReg(ScratchRsrcReg); - MI.getOperand(2).setIsKill(true); - MI.getOperand(2).setIsUndef(false); - MI.getOperand(3).setReg(ScratchOffsetReg); - MI.getOperand(3).setIsUndef(false); - MI.getOperand(3).setIsKill(false); - MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); - - break; + if (!TII->isVGPRSpill(MI.getOpcode())) + continue; + + // Scratch resource + unsigned ScratchRsrcReg = + RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) + .addImm(Rsrc & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) + .addImm(Rsrc >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + // Scratch Offset + if (ScratchOffsetReg == AMDGPU::NoRegister) { + ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), + ScratchOffsetReg) + .addFrameIndex(ScratchOffsetFI) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); + } else if (!MBB.isLiveIn(ScratchOffsetReg)) { + MBB.addLiveIn(ScratchOffsetReg); } + + if (ScratchRsrcReg == AMDGPU::NoRegister || + ScratchOffsetReg == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.emitError("ran out of SGPRs for spilling VGPRs"); + ScratchRsrcReg = AMDGPU::SGPR0; + ScratchOffsetReg = AMDGPU::SGPR0; + } + MI.getOperand(2).setReg(ScratchRsrcReg); + MI.getOperand(2).setIsKill(true); + MI.getOperand(2).setIsUndef(false); + MI.getOperand(3).setReg(ScratchOffsetReg); + MI.getOperand(3).setIsUndef(false); + MI.getOperand(3).setIsKill(false); + MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); } } return true; -- 2.1.0