| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp |
| Warning: | line 945, column 8 Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===----------------------- SIFrameLowering.cpp --------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //==-----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "SIFrameLowering.h" |
| 10 | #include "AMDGPU.h" |
| 11 | #include "GCNSubtarget.h" |
| 12 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 13 | #include "SIMachineFunctionInfo.h" |
| 14 | #include "llvm/CodeGen/LivePhysRegs.h" |
| 15 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 16 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 17 | #include "llvm/Target/TargetMachine.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | #define DEBUG_TYPE"frame-info" "frame-info" |
| 22 | |
| 23 | static cl::opt<bool> EnableSpillVGPRToAGPR( |
| 24 | "amdgpu-spill-vgpr-to-agpr", |
| 25 | cl::desc("Enable spilling VGPRs to AGPRs"), |
| 26 | cl::ReallyHidden, |
| 27 | cl::init(true)); |
| 28 | |
| 29 | // Find a scratch register that we can use in the prologue. We avoid using |
| 30 | // callee-save registers since they may appear to be free when this is called |
| 31 | // from canUseAsPrologue (during shrink wrapping), but then no longer be free |
| 32 | // when this is called from emitPrologue. |
| 33 | static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, |
| 34 | LivePhysRegs &LiveRegs, |
| 35 | const TargetRegisterClass &RC, |
| 36 | bool Unused = false) { |
| 37 | // Mark callee saved registers as used so we will not choose them. |
| 38 | const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); |
| 39 | for (unsigned i = 0; CSRegs[i]; ++i) |
| 40 | LiveRegs.addReg(CSRegs[i]); |
| 41 | |
| 42 | if (Unused) { |
| 43 | // We are looking for a register that can be used throughout the entire |
| 44 | // function, so any use is unacceptable. |
| 45 | for (MCRegister Reg : RC) { |
| 46 | if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) |
| 47 | return Reg; |
| 48 | } |
| 49 | } else { |
| 50 | for (MCRegister Reg : RC) { |
| 51 | if (LiveRegs.available(MRI, Reg)) |
| 52 | return Reg; |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | return MCRegister(); |
| 57 | } |
| 58 | |
| 59 | static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, |
| 60 | LivePhysRegs &LiveRegs, |
| 61 | Register &TempSGPR, |
| 62 | Optional<int> &FrameIndex, |
| 63 | bool IsFP) { |
| 64 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 65 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 66 | |
| 67 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 68 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 69 | |
| 70 | // We need to save and restore the current FP/BP. |
| 71 | |
| 72 | // 1: If there is already a VGPR with free lanes, use it. We |
| 73 | // may already have to pay the penalty for spilling a CSR VGPR. |
| 74 | if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { |
| 75 | int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, |
| 76 | TargetStackID::SGPRSpill); |
| 77 | |
| 78 | if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) |
| 79 | llvm_unreachable("allocate SGPR spill should have worked")__builtin_unreachable(); |
| 80 | |
| 81 | FrameIndex = NewFI; |
| 82 | |
| 83 | LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false) |
| 84 | dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "do { } while (false) |
| 85 | << printReg(Spill.VGPR, TRI) << ':' << Spill.Lanedo { } while (false) |
| 86 | << '\n')do { } while (false); |
| 87 | return; |
| 88 | } |
| 89 | |
| 90 | // 2: Next, try to save the FP/BP in an unused SGPR. |
| 91 | TempSGPR = findScratchNonCalleeSaveRegister( |
| 92 | MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); |
| 93 | |
| 94 | if (!TempSGPR) { |
| 95 | int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, |
| 96 | TargetStackID::SGPRSpill); |
| 97 | |
| 98 | if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { |
| 99 | // 3: There's no free lane to spill, and no free register to save FP/BP, |
| 100 | // so we're forced to spill another VGPR to use for the spill. |
| 101 | FrameIndex = NewFI; |
| 102 | |
| 103 | LLVM_DEBUG(do { } while (false) |
| 104 | auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false) |
| 105 | dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "do { } while (false) |
| 106 | << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';)do { } while (false); |
| 107 | } else { |
| 108 | // Remove dead <NewFI> index |
| 109 | MF.getFrameInfo().RemoveStackObject(NewFI); |
| 110 | // 4: If all else fails, spill the FP/BP to memory. |
| 111 | FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); |
| 112 | LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "do { } while (false) |
| 113 | << (IsFP ? "FP" : "BP") << '\n')do { } while (false); |
| 114 | } |
| 115 | } else { |
| 116 | LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "do { } while (false) |
| 117 | << printReg(TempSGPR, TRI) << '\n')do { } while (false); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | // We need to specially emit stack operations here because a different frame |
| 122 | // register is used than in the rest of the function, as getFrameRegister would |
| 123 | // use. |
| 124 | static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, |
| 125 | const SIMachineFunctionInfo &FuncInfo, |
| 126 | LivePhysRegs &LiveRegs, MachineFunction &MF, |
| 127 | MachineBasicBlock &MBB, |
| 128 | MachineBasicBlock::iterator I, Register SpillReg, |
| 129 | int FI) { |
| 130 | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR |
| 131 | : AMDGPU::BUFFER_STORE_DWORD_OFFSET; |
| 132 | |
| 133 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 134 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
| 135 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 136 | PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), |
| 137 | FrameInfo.getObjectAlign(FI)); |
| 138 | LiveRegs.addReg(SpillReg); |
| 139 | TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true, |
| 140 | FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, |
| 141 | &LiveRegs); |
| 142 | LiveRegs.removeReg(SpillReg); |
| 143 | } |
| 144 | |
| 145 | static void buildEpilogRestore(const GCNSubtarget &ST, |
| 146 | const SIRegisterInfo &TRI, |
| 147 | const SIMachineFunctionInfo &FuncInfo, |
| 148 | LivePhysRegs &LiveRegs, MachineFunction &MF, |
| 149 | MachineBasicBlock &MBB, |
| 150 | MachineBasicBlock::iterator I, Register SpillReg, |
| 151 | int FI) { |
| 152 | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR |
| 153 | : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; |
| 154 | |
| 155 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 156 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
| 157 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 158 | PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), |
| 159 | FrameInfo.getObjectAlign(FI)); |
| 160 | TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false, |
| 161 | FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, |
| 162 | &LiveRegs); |
| 163 | } |
| 164 | |
| 165 | static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
| 166 | const DebugLoc &DL, const SIInstrInfo *TII, |
| 167 | Register TargetReg) { |
| 168 | MachineFunction *MF = MBB.getParent(); |
| 169 | const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
| 170 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
| 171 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
| 172 | Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); |
| 173 | Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); |
| 174 | |
| 175 | if (MFI->getGITPtrHigh() != 0xffffffff) { |
| 176 | BuildMI(MBB, I, DL, SMovB32, TargetHi) |
| 177 | .addImm(MFI->getGITPtrHigh()) |
| 178 | .addReg(TargetReg, RegState::ImplicitDefine); |
| 179 | } else { |
| 180 | const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); |
| 181 | BuildMI(MBB, I, DL, GetPC64, TargetReg); |
| 182 | } |
| 183 | Register GitPtrLo = MFI->getGITPtrLoReg(*MF); |
| 184 | MF->getRegInfo().addLiveIn(GitPtrLo); |
| 185 | MBB.addLiveIn(GitPtrLo); |
| 186 | BuildMI(MBB, I, DL, SMovB32, TargetLo) |
| 187 | .addReg(GitPtrLo); |
| 188 | } |
| 189 | |
| 190 | // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` |
| 191 | void SIFrameLowering::emitEntryFunctionFlatScratchInit( |
| 192 | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
| 193 | const DebugLoc &DL, Register ScratchWaveOffsetReg) const { |
| 194 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 195 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 196 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
| 197 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 198 | |
| 199 | // We don't need this if we only have spills since there is no user facing |
| 200 | // scratch. |
| 201 | |
| 202 | // TODO: If we know we don't have flat instructions earlier, we can omit |
| 203 | // this from the input registers. |
| 204 | // |
| 205 | // TODO: We only need to know if we access scratch space through a flat |
| 206 | // pointer. Because we only detect if flat instructions are used at all, |
| 207 | // this will be used more often than necessary on VI. |
| 208 | |
| 209 | Register FlatScrInitLo; |
| 210 | Register FlatScrInitHi; |
| 211 | |
| 212 | if (ST.isAmdPalOS()) { |
| 213 | // Extract the scratch offset from the descriptor in the GIT |
| 214 | LivePhysRegs LiveRegs; |
| 215 | LiveRegs.init(*TRI); |
| 216 | LiveRegs.addLiveIns(MBB); |
| 217 | |
| 218 | // Find unused reg to load flat scratch init into |
| 219 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 220 | Register FlatScrInit = AMDGPU::NoRegister; |
| 221 | ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); |
| 222 | unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; |
| 223 | AllSGPR64s = AllSGPR64s.slice( |
| 224 | std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); |
| 225 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
| 226 | for (MCPhysReg Reg : AllSGPR64s) { |
| 227 | if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && |
| 228 | !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { |
| 229 | FlatScrInit = Reg; |
| 230 | break; |
| 231 | } |
| 232 | } |
| 233 | assert(FlatScrInit && "Failed to find free register for scratch init")((void)0); |
| 234 | |
| 235 | FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); |
| 236 | FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); |
| 237 | |
| 238 | buildGitPtr(MBB, I, DL, TII, FlatScrInit); |
| 239 | |
| 240 | // We now have the GIT ptr - now get the scratch descriptor from the entry |
| 241 | // at offset 0 (or offset 16 for a compute shader). |
| 242 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
| 243 | const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); |
| 244 | auto *MMO = MF.getMachineMemOperand( |
| 245 | PtrInfo, |
| 246 | MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
| 247 | MachineMemOperand::MODereferenceable, |
| 248 | 8, Align(4)); |
| 249 | unsigned Offset = |
| 250 | MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; |
| 251 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
| 252 | unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); |
| 253 | BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) |
| 254 | .addReg(FlatScrInit) |
| 255 | .addImm(EncodedOffset) // offset |
| 256 | .addImm(0) // cpol |
| 257 | .addMemOperand(MMO); |
| 258 | |
| 259 | // Mask the offset in [47:0] of the descriptor |
| 260 | const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); |
| 261 | BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) |
| 262 | .addReg(FlatScrInitHi) |
| 263 | .addImm(0xffff); |
| 264 | } else { |
| 265 | Register FlatScratchInitReg = |
| 266 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); |
| 267 | assert(FlatScratchInitReg)((void)0); |
| 268 | |
| 269 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 270 | MRI.addLiveIn(FlatScratchInitReg); |
| 271 | MBB.addLiveIn(FlatScratchInitReg); |
| 272 | |
| 273 | FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); |
| 274 | FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); |
| 275 | } |
| 276 | |
| 277 | // Do a 64-bit pointer add. |
| 278 | if (ST.flatScratchIsPointer()) { |
| 279 | if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
| 280 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) |
| 281 | .addReg(FlatScrInitLo) |
| 282 | .addReg(ScratchWaveOffsetReg); |
| 283 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) |
| 284 | .addReg(FlatScrInitHi) |
| 285 | .addImm(0); |
| 286 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
| 287 | addReg(FlatScrInitLo). |
| 288 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | |
| 289 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
| 290 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
| 291 | addReg(FlatScrInitHi). |
| 292 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | |
| 293 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
| 294 | return; |
| 295 | } |
| 296 | |
| 297 | // For GFX9. |
| 298 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) |
| 299 | .addReg(FlatScrInitLo) |
| 300 | .addReg(ScratchWaveOffsetReg); |
| 301 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) |
| 302 | .addReg(FlatScrInitHi) |
| 303 | .addImm(0); |
| 304 | |
| 305 | return; |
| 306 | } |
| 307 | |
| 308 | assert(ST.getGeneration() < AMDGPUSubtarget::GFX9)((void)0); |
| 309 | |
| 310 | // Copy the size in bytes. |
| 311 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) |
| 312 | .addReg(FlatScrInitHi, RegState::Kill); |
| 313 | |
| 314 | // Add wave offset in bytes to private base offset. |
| 315 | // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. |
| 316 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo) |
| 317 | .addReg(FlatScrInitLo) |
| 318 | .addReg(ScratchWaveOffsetReg); |
| 319 | |
| 320 | // Convert offset to 256-byte units. |
| 321 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) |
| 322 | .addReg(FlatScrInitLo, RegState::Kill) |
| 323 | .addImm(8); |
| 324 | } |
| 325 | |
| 326 | // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not |
| 327 | // memory. They should have been removed by now. |
| 328 | static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { |
| 329 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
| 330 | I != E; ++I) { |
| 331 | if (!MFI.isDeadObjectIndex(I)) |
| 332 | return false; |
| 333 | } |
| 334 | |
| 335 | return true; |
| 336 | } |
| 337 | |
| 338 | // Shift down registers reserved for the scratch RSRC. |
| 339 | Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( |
| 340 | MachineFunction &MF) const { |
| 341 | |
| 342 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 343 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 344 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
| 345 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 346 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 347 | |
| 348 | assert(MFI->isEntryFunction())((void)0); |
| 349 | |
| 350 | Register ScratchRsrcReg = MFI->getScratchRSrcReg(); |
| 351 | |
| 352 | if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && |
| 353 | allStackObjectsAreDead(MF.getFrameInfo()))) |
| 354 | return Register(); |
| 355 | |
| 356 | if (ST.hasSGPRInitBug() || |
| 357 | ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) |
| 358 | return ScratchRsrcReg; |
| 359 | |
| 360 | // We reserved the last registers for this. Shift it down to the end of those |
| 361 | // which were actually used. |
| 362 | // |
| 363 | // FIXME: It might be safer to use a pseudoregister before replacement. |
| 364 | |
| 365 | // FIXME: We should be able to eliminate unused input registers. We only |
| 366 | // cannot do this for the resources required for scratch access. For now we |
| 367 | // skip over user SGPRs and may leave unused holes. |
| 368 | |
| 369 | unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; |
| 370 | ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); |
| 371 | AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); |
| 372 | |
| 373 | // Skip the last N reserved elements because they should have already been |
| 374 | // reserved for VCC etc. |
| 375 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
| 376 | for (MCPhysReg Reg : AllSGPR128s) { |
| 377 | // Pick the first unallocated one. Make sure we don't clobber the other |
| 378 | // reserved input we needed. Also for PAL, make sure we don't clobber |
| 379 | // the GIT pointer passed in SGPR0 or SGPR8. |
| 380 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && |
| 381 | !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { |
| 382 | MRI.replaceRegWith(ScratchRsrcReg, Reg); |
| 383 | MFI->setScratchRSrcReg(Reg); |
| 384 | return Reg; |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | return ScratchRsrcReg; |
| 389 | } |
| 390 | |
| 391 | static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { |
| 392 | return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); |
| 393 | } |
| 394 | |
| 395 | void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, |
| 396 | MachineBasicBlock &MBB) const { |
| 397 | assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")((void)0); |
| 398 | |
| 399 | // FIXME: If we only have SGPR spills, we won't actually be using scratch |
| 400 | // memory since these spill to VGPRs. We should be cleaning up these unused |
| 401 | // SGPR spill frame indices somewhere. |
| 402 | |
| 403 | // FIXME: We still have implicit uses on SGPR spill instructions in case they |
| 404 | // need to spill to vector memory. It's likely that will not happen, but at |
| 405 | // this point it appears we need the setup. This part of the prolog should be |
| 406 | // emitted after frame indices are eliminated. |
| 407 | |
| 408 | // FIXME: Remove all of the isPhysRegUsed checks |
| 409 | |
| 410 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 411 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 412 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 413 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
| 414 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 415 | const Function &F = MF.getFunction(); |
| 416 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 417 | |
| 418 | assert(MFI->isEntryFunction())((void)0); |
| 419 | |
| 420 | Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( |
| 421 | AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); |
| 422 | // FIXME: Hack to not crash in situations which emitted an error. |
| 423 | if (!PreloadedScratchWaveOffsetReg) |
| 424 | return; |
| 425 | |
| 426 | // We need to do the replacement of the private segment buffer register even |
| 427 | // if there are no stack objects. There could be stores to undef or a |
| 428 | // constant without an associated object. |
| 429 | // |
| 430 | // This will return `Register()` in cases where there are no actual |
| 431 | // uses of the SRSRC. |
| 432 | Register ScratchRsrcReg; |
| 433 | if (!ST.enableFlatScratch()) |
| 434 | ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); |
| 435 | |
| 436 | // Make the selected register live throughout the function. |
| 437 | if (ScratchRsrcReg) { |
| 438 | for (MachineBasicBlock &OtherBB : MF) { |
| 439 | if (&OtherBB != &MBB) { |
| 440 | OtherBB.addLiveIn(ScratchRsrcReg); |
| 441 | } |
| 442 | } |
| 443 | } |
| 444 | |
| 445 | // Now that we have fixed the reserved SRSRC we need to locate the |
| 446 | // (potentially) preloaded SRSRC. |
| 447 | Register PreloadedScratchRsrcReg; |
| 448 | if (ST.isAmdHsaOrMesa(F)) { |
| 449 | PreloadedScratchRsrcReg = |
| 450 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); |
| 451 | if (ScratchRsrcReg && PreloadedScratchRsrcReg) { |
| 452 | // We added live-ins during argument lowering, but since they were not |
| 453 | // used they were deleted. We're adding the uses now, so add them back. |
| 454 | MRI.addLiveIn(PreloadedScratchRsrcReg); |
| 455 | MBB.addLiveIn(PreloadedScratchRsrcReg); |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | // Debug location must be unknown since the first debug location is used to |
| 460 | // determine the end of the prologue. |
| 461 | DebugLoc DL; |
| 462 | MachineBasicBlock::iterator I = MBB.begin(); |
| 463 | |
| 464 | // We found the SRSRC first because it needs four registers and has an |
| 465 | // alignment requirement. If the SRSRC that we found is clobbering with |
| 466 | // the scratch wave offset, which may be in a fixed SGPR or a free SGPR |
| 467 | // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch |
| 468 | // wave offset to a free SGPR. |
| 469 | Register ScratchWaveOffsetReg; |
| 470 | if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { |
| 471 | ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); |
| 472 | unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); |
| 473 | AllSGPRs = AllSGPRs.slice( |
| 474 | std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); |
| 475 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
| 476 | for (MCPhysReg Reg : AllSGPRs) { |
| 477 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && |
| 478 | !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { |
| 479 | ScratchWaveOffsetReg = Reg; |
| 480 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) |
| 481 | .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); |
| 482 | break; |
| 483 | } |
| 484 | } |
| 485 | } else { |
| 486 | ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; |
| 487 | } |
| 488 | assert(ScratchWaveOffsetReg)((void)0); |
| 489 | |
| 490 | if (requiresStackPointerReference(MF)) { |
| 491 | Register SPReg = MFI->getStackPtrOffsetReg(); |
| 492 | assert(SPReg != AMDGPU::SP_REG)((void)0); |
| 493 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) |
| 494 | .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST)); |
| 495 | } |
| 496 | |
| 497 | if (hasFP(MF)) { |
| 498 | Register FPReg = MFI->getFrameOffsetReg(); |
| 499 | assert(FPReg != AMDGPU::FP_REG)((void)0); |
| 500 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); |
| 501 | } |
| 502 | |
| 503 | bool NeedsFlatScratchInit = |
| 504 | MFI->hasFlatScratchInit() && |
| 505 | (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || |
| 506 | (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch())); |
| 507 | |
| 508 | if ((NeedsFlatScratchInit || ScratchRsrcReg) && |
| 509 | !ST.flatScratchIsArchitected()) { |
| 510 | MRI.addLiveIn(PreloadedScratchWaveOffsetReg); |
| 511 | MBB.addLiveIn(PreloadedScratchWaveOffsetReg); |
| 512 | } |
| 513 | |
| 514 | if (NeedsFlatScratchInit) { |
| 515 | emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); |
| 516 | } |
| 517 | |
| 518 | if (ScratchRsrcReg) { |
| 519 | emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, |
| 520 | PreloadedScratchRsrcReg, |
| 521 | ScratchRsrcReg, ScratchWaveOffsetReg); |
| 522 | } |
| 523 | } |
| 524 | |
| 525 | // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` |
| 526 | void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( |
| 527 | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
| 528 | const DebugLoc &DL, Register PreloadedScratchRsrcReg, |
| 529 | Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { |
| 530 | |
| 531 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 532 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 533 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
| 534 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 535 | const Function &Fn = MF.getFunction(); |
| 536 | |
| 537 | if (ST.isAmdPalOS()) { |
| 538 | // The pointer to the GIT is formed from the offset passed in and either |
| 539 | // the amdgpu-git-ptr-high function attribute or the top part of the PC |
| 540 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
| 541 | Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); |
| 542 | |
| 543 | buildGitPtr(MBB, I, DL, TII, Rsrc01); |
| 544 | |
| 545 | // We now have the GIT ptr - now get the scratch descriptor from the entry |
| 546 | // at offset 0 (or offset 16 for a compute shader). |
| 547 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
| 548 | const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); |
| 549 | auto MMO = MF.getMachineMemOperand(PtrInfo, |
| 550 | MachineMemOperand::MOLoad | |
| 551 | MachineMemOperand::MOInvariant | |
| 552 | MachineMemOperand::MODereferenceable, |
| 553 | 16, Align(4)); |
| 554 | unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; |
| 555 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
| 556 | unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); |
| 557 | BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) |
| 558 | .addReg(Rsrc01) |
| 559 | .addImm(EncodedOffset) // offset |
| 560 | .addImm(0) // cpol |
| 561 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine) |
| 562 | .addMemOperand(MMO); |
| 563 | |
| 564 | // The driver will always set the SRD for wave 64 (bits 118:117 of |
| 565 | // descriptor / bits 22:21 of third sub-reg will be 0b11) |
| 566 | // If the shader is actually wave32 we have to modify the const_index_stride |
| 567 | // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The |
| 568 | // reason the driver does this is that there can be cases where it presents |
| 569 | // 2 shaders with different wave size (e.g. VsFs). |
| 570 | // TODO: convert to using SCRATCH instructions or multiple SRD buffers |
| 571 | if (ST.isWave32()) { |
| 572 | const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32); |
| 573 | BuildMI(MBB, I, DL, SBitsetB32, Rsrc03) |
| 574 | .addImm(21) |
| 575 | .addReg(Rsrc03); |
| 576 | } |
| 577 | } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { |
| 578 | assert(!ST.isAmdHsaOrMesa(Fn))((void)0); |
| 579 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
| 580 | |
| 581 | Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); |
| 582 | Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); |
| 583 | |
| 584 | // Use relocations to get the pointer, and setup the other bits manually. |
| 585 | uint64_t Rsrc23 = TII->getScratchRsrcWords23(); |
| 586 | |
| 587 | if (MFI->hasImplicitBufferPtr()) { |
| 588 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
| 589 | |
| 590 | if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { |
| 591 | const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); |
| 592 | |
| 593 | BuildMI(MBB, I, DL, Mov64, Rsrc01) |
| 594 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
| 595 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 596 | } else { |
| 597 | const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); |
| 598 | |
| 599 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
| 600 | auto MMO = MF.getMachineMemOperand( |
| 601 | PtrInfo, |
| 602 | MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
| 603 | MachineMemOperand::MODereferenceable, |
| 604 | 8, Align(4)); |
| 605 | BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) |
| 606 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
| 607 | .addImm(0) // offset |
| 608 | .addImm(0) // cpol |
| 609 | .addMemOperand(MMO) |
| 610 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 611 | |
| 612 | MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
| 613 | MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
| 614 | } |
| 615 | } else { |
| 616 | Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
| 617 | Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
| 618 | |
| 619 | BuildMI(MBB, I, DL, SMovB32, Rsrc0) |
| 620 | .addExternalSymbol("SCRATCH_RSRC_DWORD0") |
| 621 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 622 | |
| 623 | BuildMI(MBB, I, DL, SMovB32, Rsrc1) |
| 624 | .addExternalSymbol("SCRATCH_RSRC_DWORD1") |
| 625 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 626 | |
| 627 | } |
| 628 | |
| 629 | BuildMI(MBB, I, DL, SMovB32, Rsrc2) |
| 630 | .addImm(Rsrc23 & 0xffffffff) |
| 631 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 632 | |
| 633 | BuildMI(MBB, I, DL, SMovB32, Rsrc3) |
| 634 | .addImm(Rsrc23 >> 32) |
| 635 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 636 | } else if (ST.isAmdHsaOrMesa(Fn)) { |
| 637 | assert(PreloadedScratchRsrcReg)((void)0); |
| 638 | |
| 639 | if (ScratchRsrcReg != PreloadedScratchRsrcReg) { |
| 640 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) |
| 641 | .addReg(PreloadedScratchRsrcReg, RegState::Kill); |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | // Add the scratch wave offset into the scratch RSRC. |
| 646 | // |
| 647 | // We only want to update the first 48 bits, which is the base address |
| 648 | // pointer, without touching the adjacent 16 bits of flags. We know this add |
| 649 | // cannot carry-out from bit 47, otherwise the scratch allocation would be |
| 650 | // impossible to fit in the 48-bit global address space. |
| 651 | // |
| 652 | // TODO: Evaluate if it is better to just construct an SRD using the flat |
| 653 | // scratch init and some constants rather than update the one we are passed. |
| 654 | Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
| 655 | Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
| 656 | |
| 657 | // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in |
| 658 | // the kernel body via inreg arguments. |
| 659 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) |
| 660 | .addReg(ScratchRsrcSub0) |
| 661 | .addReg(ScratchWaveOffsetReg) |
| 662 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 663 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) |
| 664 | .addReg(ScratchRsrcSub1) |
| 665 | .addImm(0) |
| 666 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
| 667 | } |
| 668 | |
| 669 | bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { |
| 670 | switch (ID) { |
| 671 | case TargetStackID::Default: |
| 672 | case TargetStackID::NoAlloc: |
| 673 | case TargetStackID::SGPRSpill: |
| 674 | return true; |
| 675 | case TargetStackID::ScalableVector: |
| 676 | case TargetStackID::WasmLocal: |
| 677 | return false; |
| 678 | } |
| 679 | llvm_unreachable("Invalid TargetStackID::Value")__builtin_unreachable(); |
| 680 | } |
| 681 | |
| 682 | static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, |
| 683 | const SIMachineFunctionInfo *FuncInfo, |
| 684 | MachineFunction &MF, MachineBasicBlock &MBB, |
| 685 | MachineBasicBlock::iterator MBBI, bool IsProlog) { |
| 686 | if (LiveRegs.empty()) { |
| 687 | LiveRegs.init(TRI); |
| 688 | if (IsProlog) { |
| 689 | LiveRegs.addLiveIns(MBB); |
| 690 | } else { |
| 691 | // In epilog. |
| 692 | LiveRegs.addLiveOuts(MBB); |
| 693 | LiveRegs.stepBackward(*MBBI); |
| 694 | } |
| 695 | } |
| 696 | } |
| 697 | |
| 698 | // Activate all lanes, returns saved exec. |
| 699 | static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, |
| 700 | MachineFunction &MF, |
| 701 | MachineBasicBlock &MBB, |
| 702 | MachineBasicBlock::iterator MBBI, |
| 703 | bool IsProlog) { |
| 704 | Register ScratchExecCopy; |
| 705 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 706 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 707 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 708 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
| 709 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 710 | DebugLoc DL; |
| 711 | |
| 712 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog); |
| 713 | |
| 714 | ScratchExecCopy = findScratchNonCalleeSaveRegister( |
| 715 | MRI, LiveRegs, *TRI.getWaveMaskRegClass()); |
| 716 | if (!ScratchExecCopy) |
| 717 | report_fatal_error("failed to find free scratch register"); |
| 718 | |
| 719 | LiveRegs.addReg(ScratchExecCopy); |
| 720 | |
| 721 | const unsigned OrSaveExec = |
| 722 | ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; |
| 723 | BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); |
| 724 | |
| 725 | return ScratchExecCopy; |
| 726 | } |
| 727 | |
| 728 | // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. |
| 729 | // Otherwise we are spilling to memory. |
| 730 | static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) { |
| 731 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 732 | return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill; |
| 733 | } |
| 734 | |
| 735 | void SIFrameLowering::emitPrologue(MachineFunction &MF, |
| 736 | MachineBasicBlock &MBB) const { |
| 737 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 738 | if (FuncInfo->isEntryFunction()) { |
| 739 | emitEntryFunctionPrologue(MF, MBB); |
| 740 | return; |
| 741 | } |
| 742 | |
| 743 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 744 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 745 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 746 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 747 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
| 748 | |
| 749 | Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
| 750 | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
| 751 | Register BasePtrReg = |
| 752 | TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); |
| 753 | LivePhysRegs LiveRegs; |
| 754 | |
| 755 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
| 756 | DebugLoc DL; |
| 757 | |
| 758 | bool HasFP = false; |
| 759 | bool HasBP = false; |
| 760 | uint32_t NumBytes = MFI.getStackSize(); |
| 761 | uint32_t RoundedSize = NumBytes; |
| 762 | // To avoid clobbering VGPRs in lanes that weren't active on function entry, |
| 763 | // turn on all lanes before doing the spill to memory. |
| 764 | Register ScratchExecCopy; |
| 765 | |
| 766 | Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; |
| 767 | Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; |
| 768 | |
| 769 | // VGPRs used for SGPR->VGPR spills |
| 770 | for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : |
| 771 | FuncInfo->getSGPRSpillVGPRs()) { |
| 772 | if (!Reg.FI) |
| 773 | continue; |
| 774 | |
| 775 | if (!ScratchExecCopy) |
| 776 | ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, |
| 777 | /*IsProlog*/ true); |
| 778 | |
| 779 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, |
| 780 | *Reg.FI); |
| 781 | } |
| 782 | |
| 783 | // VGPRs used for Whole Wave Mode |
| 784 | for (const auto &Reg : FuncInfo->WWMReservedRegs) { |
| 785 | auto VGPR = Reg.first; |
| 786 | auto FI = Reg.second; |
| 787 | if (!FI) |
| 788 | continue; |
| 789 | |
| 790 | if (!ScratchExecCopy) |
| 791 | ScratchExecCopy = |
| 792 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); |
| 793 | |
| 794 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); |
| 795 | } |
| 796 | |
| 797 | if (ScratchExecCopy) { |
| 798 | // FIXME: Split block and make terminator. |
| 799 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
| 800 | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
| 801 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
| 802 | .addReg(ScratchExecCopy, RegState::Kill); |
| 803 | LiveRegs.addReg(ScratchExecCopy); |
| 804 | } |
| 805 | |
| 806 | if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) { |
| 807 | const int FramePtrFI = *FPSaveIndex; |
| 808 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
| 809 | |
| 810 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); |
| 811 | |
| 812 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
| 813 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
| 814 | if (!TmpVGPR) |
| 815 | report_fatal_error("failed to find free scratch register"); |
| 816 | |
| 817 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) |
| 818 | .addReg(FramePtrReg); |
| 819 | |
| 820 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
| 821 | FramePtrFI); |
| 822 | } |
| 823 | |
| 824 | if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) { |
| 825 | const int BasePtrFI = *BPSaveIndex; |
| 826 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
| 827 | |
| 828 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); |
| 829 | |
| 830 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
| 831 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
| 832 | if (!TmpVGPR) |
| 833 | report_fatal_error("failed to find free scratch register"); |
| 834 | |
| 835 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) |
| 836 | .addReg(BasePtrReg); |
| 837 | |
| 838 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
| 839 | BasePtrFI); |
| 840 | } |
| 841 | |
| 842 | // In this case, spill the FP to a reserved VGPR. |
| 843 | if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) { |
| 844 | const int FramePtrFI = *FPSaveIndex; |
| 845 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
| 846 | |
| 847 | assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0); |
| 848 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
| 849 | FuncInfo->getSGPRToVGPRSpills(FramePtrFI); |
| 850 | assert(Spill.size() == 1)((void)0); |
| 851 | |
| 852 | // Save FP before setting it up. |
| 853 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) |
| 854 | .addReg(FramePtrReg) |
| 855 | .addImm(Spill[0].Lane) |
| 856 | .addReg(Spill[0].VGPR, RegState::Undef); |
| 857 | } |
| 858 | |
| 859 | // In this case, spill the BP to a reserved VGPR. |
| 860 | if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) { |
| 861 | const int BasePtrFI = *BPSaveIndex; |
| 862 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
| 863 | |
| 864 | assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0); |
| 865 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
| 866 | FuncInfo->getSGPRToVGPRSpills(BasePtrFI); |
| 867 | assert(Spill.size() == 1)((void)0); |
| 868 | |
| 869 | // Save BP before setting it up. |
| 870 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) |
| 871 | .addReg(BasePtrReg) |
| 872 | .addImm(Spill[0].Lane) |
| 873 | .addReg(Spill[0].VGPR, RegState::Undef); |
| 874 | } |
| 875 | |
| 876 | // Emit the copy if we need an FP, and are using a free SGPR to save it. |
| 877 | if (FuncInfo->SGPRForFPSaveRestoreCopy) { |
| 878 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), |
| 879 | FuncInfo->SGPRForFPSaveRestoreCopy) |
| 880 | .addReg(FramePtrReg) |
| 881 | .setMIFlag(MachineInstr::FrameSetup); |
| 882 | } |
| 883 | |
| 884 | // Emit the copy if we need a BP, and are using a free SGPR to save it. |
| 885 | if (FuncInfo->SGPRForBPSaveRestoreCopy) { |
| 886 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), |
| 887 | FuncInfo->SGPRForBPSaveRestoreCopy) |
| 888 | .addReg(BasePtrReg) |
| 889 | .setMIFlag(MachineInstr::FrameSetup); |
| 890 | } |
| 891 | |
| 892 | // If a copy has been emitted for FP and/or BP, Make the SGPRs |
| 893 | // used in the copy instructions live throughout the function. |
| 894 | SmallVector<MCPhysReg, 2> TempSGPRs; |
| 895 | if (FuncInfo->SGPRForFPSaveRestoreCopy) |
| 896 | TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); |
| 897 | |
| 898 | if (FuncInfo->SGPRForBPSaveRestoreCopy) |
| 899 | TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); |
| 900 | |
| 901 | if (!TempSGPRs.empty()) { |
| 902 | for (MachineBasicBlock &MBB : MF) { |
| 903 | for (MCPhysReg Reg : TempSGPRs) |
| 904 | MBB.addLiveIn(Reg); |
| 905 | |
| 906 | MBB.sortUniqueLiveIns(); |
| 907 | } |
| 908 | if (!LiveRegs.empty()) { |
| 909 | LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
| 910 | LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); |
| 911 | } |
| 912 | } |
| 913 | |
| 914 | if (TRI.hasStackRealignment(MF)) { |
| 915 | HasFP = true; |
| 916 | const unsigned Alignment = MFI.getMaxAlign().value(); |
| 917 | |
| 918 | RoundedSize += Alignment; |
| 919 | if (LiveRegs.empty()) { |
| 920 | LiveRegs.init(TRI); |
| 921 | LiveRegs.addLiveIns(MBB); |
| 922 | } |
| 923 | |
| 924 | // s_add_i32 s33, s32, NumBytes |
| 925 | // s_and_b32 s33, s33, 0b111...0000 |
| 926 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg) |
| 927 | .addReg(StackPtrReg) |
| 928 | .addImm((Alignment - 1) * getScratchScaleFactor(ST)) |
| 929 | .setMIFlag(MachineInstr::FrameSetup); |
| 930 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) |
| 931 | .addReg(FramePtrReg, RegState::Kill) |
| 932 | .addImm(-Alignment * getScratchScaleFactor(ST)) |
| 933 | .setMIFlag(MachineInstr::FrameSetup); |
| 934 | FuncInfo->setIsStackRealigned(true); |
| 935 | } else if ((HasFP = hasFP(MF))) { |
| 936 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) |
| 937 | .addReg(StackPtrReg) |
| 938 | .setMIFlag(MachineInstr::FrameSetup); |
| 939 | } |
| 940 | |
| 941 | // If we need a base pointer, set it up here. It's whatever the value of |
| 942 | // the stack pointer is at this point. Any variable size objects will be |
| 943 | // allocated after this, so we can still use the base pointer to reference |
| 944 | // the incoming arguments. |
| 945 | if ((HasBP = TRI.hasBasePointer(MF))) { |
Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP' | |
| 946 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) |
| 947 | .addReg(StackPtrReg) |
| 948 | .setMIFlag(MachineInstr::FrameSetup); |
| 949 | } |
| 950 | |
| 951 | if (HasFP && RoundedSize != 0) { |
| 952 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) |
| 953 | .addReg(StackPtrReg) |
| 954 | .addImm(RoundedSize * getScratchScaleFactor(ST)) |
| 955 | .setMIFlag(MachineInstr::FrameSetup); |
| 956 | } |
| 957 | |
| 958 | assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||((void)0) |
| 959 | FuncInfo->FramePointerSaveIndex)) &&((void)0) |
| 960 | "Needed to save FP but didn't save it anywhere")((void)0); |
| 961 | |
| 962 | assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&((void)0) |
| 963 | !FuncInfo->FramePointerSaveIndex)) &&((void)0) |
| 964 | "Saved FP but didn't need it")((void)0); |
| 965 | |
| 966 | assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||((void)0) |
| 967 | FuncInfo->BasePointerSaveIndex)) &&((void)0) |
| 968 | "Needed to save BP but didn't save it anywhere")((void)0); |
| 969 | |
| 970 | assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&((void)0) |
| 971 | !FuncInfo->BasePointerSaveIndex)) &&((void)0) |
| 972 | "Saved BP but didn't need it")((void)0); |
| 973 | } |
| 974 | |
| 975 | void SIFrameLowering::emitEpilogue(MachineFunction &MF, |
| 976 | MachineBasicBlock &MBB) const { |
| 977 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 978 | if (FuncInfo->isEntryFunction()) |
| 979 | return; |
| 980 | |
| 981 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 982 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 983 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 984 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
| 985 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
| 986 | LivePhysRegs LiveRegs; |
| 987 | DebugLoc DL; |
| 988 | |
| 989 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 990 | uint32_t NumBytes = MFI.getStackSize(); |
| 991 | uint32_t RoundedSize = FuncInfo->isStackRealigned() |
| 992 | ? NumBytes + MFI.getMaxAlign().value() |
| 993 | : NumBytes; |
| 994 | const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
| 995 | const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
| 996 | const Register BasePtrReg = |
| 997 | TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); |
| 998 | |
| 999 | Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; |
| 1000 | Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; |
| 1001 | |
| 1002 | if (RoundedSize != 0 && hasFP(MF)) { |
| 1003 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) |
| 1004 | .addReg(StackPtrReg) |
| 1005 | .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) |
| 1006 | .setMIFlag(MachineInstr::FrameDestroy); |
| 1007 | } |
| 1008 | |
| 1009 | if (FuncInfo->SGPRForFPSaveRestoreCopy) { |
| 1010 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) |
| 1011 | .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) |
| 1012 | .setMIFlag(MachineInstr::FrameDestroy); |
| 1013 | } |
| 1014 | |
| 1015 | if (FuncInfo->SGPRForBPSaveRestoreCopy) { |
| 1016 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) |
| 1017 | .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) |
| 1018 | .setMIFlag(MachineInstr::FrameDestroy); |
| 1019 | } |
| 1020 | |
| 1021 | if (FPSaveIndex) { |
| 1022 | const int FramePtrFI = *FPSaveIndex; |
| 1023 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
| 1024 | if (spilledToMemory(MF, FramePtrFI)) { |
| 1025 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); |
| 1026 | |
| 1027 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
| 1028 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
| 1029 | if (!TmpVGPR) |
| 1030 | report_fatal_error("failed to find free scratch register"); |
| 1031 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
| 1032 | FramePtrFI); |
| 1033 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) |
| 1034 | .addReg(TmpVGPR, RegState::Kill); |
| 1035 | } else { |
| 1036 | // Reload from VGPR spill. |
| 1037 | assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0); |
| 1038 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
| 1039 | FuncInfo->getSGPRToVGPRSpills(FramePtrFI); |
| 1040 | assert(Spill.size() == 1)((void)0); |
| 1041 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) |
| 1042 | .addReg(Spill[0].VGPR) |
| 1043 | .addImm(Spill[0].Lane); |
| 1044 | } |
| 1045 | } |
| 1046 | |
| 1047 | if (BPSaveIndex) { |
| 1048 | const int BasePtrFI = *BPSaveIndex; |
| 1049 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
| 1050 | if (spilledToMemory(MF, BasePtrFI)) { |
| 1051 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); |
| 1052 | |
| 1053 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
| 1054 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
| 1055 | if (!TmpVGPR) |
| 1056 | report_fatal_error("failed to find free scratch register"); |
| 1057 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
| 1058 | BasePtrFI); |
| 1059 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) |
| 1060 | .addReg(TmpVGPR, RegState::Kill); |
| 1061 | } else { |
| 1062 | // Reload from VGPR spill. |
| 1063 | assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0); |
| 1064 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
| 1065 | FuncInfo->getSGPRToVGPRSpills(BasePtrFI); |
| 1066 | assert(Spill.size() == 1)((void)0); |
| 1067 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) |
| 1068 | .addReg(Spill[0].VGPR) |
| 1069 | .addImm(Spill[0].Lane); |
| 1070 | } |
| 1071 | } |
| 1072 | |
| 1073 | Register ScratchExecCopy; |
| 1074 | for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : |
| 1075 | FuncInfo->getSGPRSpillVGPRs()) { |
| 1076 | if (!Reg.FI) |
| 1077 | continue; |
| 1078 | |
| 1079 | if (!ScratchExecCopy) |
| 1080 | ScratchExecCopy = |
| 1081 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); |
| 1082 | |
| 1083 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, |
| 1084 | *Reg.FI); |
| 1085 | } |
| 1086 | |
| 1087 | for (const auto &Reg : FuncInfo->WWMReservedRegs) { |
| 1088 | auto VGPR = Reg.first; |
| 1089 | auto FI = Reg.second; |
| 1090 | if (!FI) |
| 1091 | continue; |
| 1092 | |
| 1093 | if (!ScratchExecCopy) |
| 1094 | ScratchExecCopy = |
| 1095 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); |
| 1096 | |
| 1097 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); |
| 1098 | } |
| 1099 | |
| 1100 | if (ScratchExecCopy) { |
| 1101 | // FIXME: Split block and make terminator. |
| 1102 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
| 1103 | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
| 1104 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
| 1105 | .addReg(ScratchExecCopy, RegState::Kill); |
| 1106 | } |
| 1107 | } |
| 1108 | |
| 1109 | #ifndef NDEBUG1 |
| 1110 | static bool allSGPRSpillsAreDead(const MachineFunction &MF) { |
| 1111 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1112 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 1113 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
| 1114 | I != E; ++I) { |
| 1115 | if (!MFI.isDeadObjectIndex(I) && |
| 1116 | MFI.getStackID(I) == TargetStackID::SGPRSpill && |
| 1117 | (I != FuncInfo->FramePointerSaveIndex && |
| 1118 | I != FuncInfo->BasePointerSaveIndex)) { |
| 1119 | return false; |
| 1120 | } |
| 1121 | } |
| 1122 | |
| 1123 | return true; |
| 1124 | } |
| 1125 | #endif |
| 1126 | |
| 1127 | StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, |
| 1128 | int FI, |
| 1129 | Register &FrameReg) const { |
| 1130 | const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
| 1131 | |
| 1132 | FrameReg = RI->getFrameRegister(MF); |
| 1133 | return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); |
| 1134 | } |
| 1135 | |
| 1136 | void SIFrameLowering::processFunctionBeforeFrameFinalized( |
| 1137 | MachineFunction &MF, |
| 1138 | RegScavenger *RS) const { |
| 1139 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1140 | |
| 1141 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 1142 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 1143 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 1144 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 1145 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 1146 | |
| 1147 | const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() |
| 1148 | && EnableSpillVGPRToAGPR; |
| 1149 | |
| 1150 | if (SpillVGPRToAGPR) { |
| 1151 | // To track the spill frame indices handled in this pass. |
| 1152 | BitVector SpillFIs(MFI.getObjectIndexEnd(), false); |
| 1153 | |
| 1154 | bool SeenDbgInstr = false; |
| 1155 | |
| 1156 | for (MachineBasicBlock &MBB : MF) { |
| 1157 | MachineBasicBlock::iterator Next; |
| 1158 | for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { |
| 1159 | MachineInstr &MI = *I; |
| 1160 | Next = std::next(I); |
| 1161 | |
| 1162 | if (MI.isDebugInstr()) |
| 1163 | SeenDbgInstr = true; |
| 1164 | |
| 1165 | if (TII->isVGPRSpill(MI)) { |
| 1166 | // Try to eliminate stack used by VGPR spills before frame |
| 1167 | // finalization. |
| 1168 | unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
| 1169 | AMDGPU::OpName::vaddr); |
| 1170 | int FI = MI.getOperand(FIOp).getIndex(); |
| 1171 | Register VReg = |
| 1172 | TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); |
| 1173 | if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, |
| 1174 | TRI->isAGPR(MRI, VReg))) { |
| 1175 | // FIXME: change to enterBasicBlockEnd() |
| 1176 | RS->enterBasicBlock(MBB); |
| 1177 | TRI->eliminateFrameIndex(MI, 0, FIOp, RS); |
| 1178 | SpillFIs.set(FI); |
| 1179 | continue; |
| 1180 | } |
| 1181 | } |
| 1182 | } |
| 1183 | } |
| 1184 | |
| 1185 | for (MachineBasicBlock &MBB : MF) { |
| 1186 | for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) |
| 1187 | MBB.addLiveIn(Reg); |
| 1188 | |
| 1189 | for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) |
| 1190 | MBB.addLiveIn(Reg); |
| 1191 | |
| 1192 | MBB.sortUniqueLiveIns(); |
| 1193 | |
| 1194 | if (!SpillFIs.empty() && SeenDbgInstr) { |
| 1195 | // FIXME: The dead frame indices are replaced with a null register from |
| 1196 | // the debug value instructions. We should instead, update it with the |
| 1197 | // correct register value. But not sure the register value alone is |
| 1198 | for (MachineInstr &MI : MBB) { |
| 1199 | if (MI.isDebugValue() && MI.getOperand(0).isFI() && |
| 1200 | SpillFIs[MI.getOperand(0).getIndex()]) { |
| 1201 | MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); |
| 1202 | MI.getOperand(0).setIsDebug(); |
| 1203 | } |
| 1204 | } |
| 1205 | } |
| 1206 | } |
| 1207 | } |
| 1208 | |
| 1209 | FuncInfo->removeDeadFrameIndices(MFI); |
| 1210 | assert(allSGPRSpillsAreDead(MF) &&((void)0) |
| 1211 | "SGPR spill should have been removed in SILowerSGPRSpills")((void)0); |
| 1212 | |
| 1213 | // FIXME: The other checks should be redundant with allStackObjectsAreDead, |
| 1214 | // but currently hasNonSpillStackObjects is set only from source |
| 1215 | // allocas. Stack temps produced from legalization are not counted currently. |
| 1216 | if (!allStackObjectsAreDead(MFI)) { |
| 1217 | assert(RS && "RegScavenger required if spilling")((void)0); |
| 1218 | |
| 1219 | // Add an emergency spill slot |
| 1220 | RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); |
| 1221 | } |
| 1222 | } |
| 1223 | |
| 1224 | // Only report VGPRs to generic code. |
| 1225 | void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, |
| 1226 | BitVector &SavedVGPRs, |
| 1227 | RegScavenger *RS) const { |
| 1228 | TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); |
| 1229 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 1230 | if (MFI->isEntryFunction()) |
| 1231 | return; |
| 1232 | |
| 1233 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 1234 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 1235 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 1236 | |
| 1237 | // Ignore the SGPRs the default implementation found. |
| 1238 | SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); |
| 1239 | |
| 1240 | // Do not save AGPRs prior to GFX90A because there was no easy way to do so. |
| 1241 | // In gfx908 there was do AGPR loads and stores and thus spilling also |
| 1242 | // require a temporary VGPR. |
| 1243 | if (!ST.hasGFX90AInsts()) |
| 1244 | SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); |
| 1245 | |
| 1246 | // hasFP only knows about stack objects that already exist. We're now |
| 1247 | // determining the stack slots that will be created, so we have to predict |
| 1248 | // them. Stack objects force FP usage with calls. |
| 1249 | // |
| 1250 | // Note a new VGPR CSR may be introduced if one is used for the spill, but we |
| 1251 | // don't want to report it here. |
| 1252 | // |
| 1253 | // FIXME: Is this really hasReservedCallFrame? |
| 1254 | const bool WillHaveFP = |
| 1255 | FrameInfo.hasCalls() && |
| 1256 | (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); |
| 1257 | |
| 1258 | // VGPRs used for SGPR spilling need to be specially inserted in the prolog, |
| 1259 | // so don't allow the default insertion to handle them. |
| 1260 | for (auto SSpill : MFI->getSGPRSpillVGPRs()) |
| 1261 | SavedVGPRs.reset(SSpill.VGPR); |
| 1262 | |
| 1263 | LivePhysRegs LiveRegs; |
| 1264 | LiveRegs.init(*TRI); |
| 1265 | |
| 1266 | if (WillHaveFP || hasFP(MF)) { |
| 1267 | assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&((void)0) |
| 1268 | "Re-reserving spill slot for FP")((void)0); |
| 1269 | getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, |
| 1270 | MFI->FramePointerSaveIndex, true); |
| 1271 | } |
| 1272 | |
| 1273 | if (TRI->hasBasePointer(MF)) { |
| 1274 | if (MFI->SGPRForFPSaveRestoreCopy) |
| 1275 | LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); |
| 1276 | |
| 1277 | assert(!MFI->SGPRForBPSaveRestoreCopy &&((void)0) |
| 1278 | !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP")((void)0); |
| 1279 | getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, |
| 1280 | MFI->BasePointerSaveIndex, false); |
| 1281 | } |
| 1282 | } |
| 1283 | |
| 1284 | void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, |
| 1285 | BitVector &SavedRegs, |
| 1286 | RegScavenger *RS) const { |
| 1287 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
| 1288 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 1289 | if (MFI->isEntryFunction()) |
| 1290 | return; |
| 1291 | |
| 1292 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 1293 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 1294 | |
| 1295 | // The SP is specifically managed and we don't want extra spills of it. |
| 1296 | SavedRegs.reset(MFI->getStackPtrOffsetReg()); |
| 1297 | |
| 1298 | const BitVector AllSavedRegs = SavedRegs; |
| 1299 | SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask()); |
| 1300 | |
| 1301 | // If clearing VGPRs changed the mask, we will have some CSR VGPR spills. |
| 1302 | const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs; |
| 1303 | |
| 1304 | // We have to anticipate introducing CSR VGPR spills if we don't have any |
| 1305 | // stack objects already, since we require an FP if there is a call and stack. |
| 1306 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
| 1307 | const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR; |
| 1308 | |
| 1309 | // FP will be specially managed like SP. |
| 1310 | if (WillHaveFP || hasFP(MF)) |
| 1311 | SavedRegs.reset(MFI->getFrameOffsetReg()); |
| 1312 | } |
| 1313 | |
| 1314 | bool SIFrameLowering::assignCalleeSavedSpillSlots( |
| 1315 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
| 1316 | std::vector<CalleeSavedInfo> &CSI) const { |
| 1317 | if (CSI.empty()) |
| 1318 | return true; // Early exit if no callee saved registers are modified! |
| 1319 | |
| 1320 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
| 1321 | if (!FuncInfo->SGPRForFPSaveRestoreCopy && |
| 1322 | !FuncInfo->SGPRForBPSaveRestoreCopy) |
| 1323 | return false; |
| 1324 | |
| 1325 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 1326 | const SIRegisterInfo *RI = ST.getRegisterInfo(); |
| 1327 | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
| 1328 | Register BasePtrReg = RI->getBaseRegister(); |
| 1329 | unsigned NumModifiedRegs = 0; |
| 1330 | |
| 1331 | if (FuncInfo->SGPRForFPSaveRestoreCopy) |
| 1332 | NumModifiedRegs++; |
| 1333 | if (FuncInfo->SGPRForBPSaveRestoreCopy) |
| 1334 | NumModifiedRegs++; |
| 1335 | |
| 1336 | for (auto &CS : CSI) { |
| 1337 | if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { |
| 1338 | CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
| 1339 | if (--NumModifiedRegs) |
| 1340 | break; |
| 1341 | } else if (CS.getReg() == BasePtrReg && |
| 1342 | FuncInfo->SGPRForBPSaveRestoreCopy) { |
| 1343 | CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); |
| 1344 | if (--NumModifiedRegs) |
| 1345 | break; |
| 1346 | } |
| 1347 | } |
| 1348 | |
| 1349 | return false; |
| 1350 | } |
| 1351 | |
| 1352 | MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( |
| 1353 | MachineFunction &MF, |
| 1354 | MachineBasicBlock &MBB, |
| 1355 | MachineBasicBlock::iterator I) const { |
| 1356 | int64_t Amount = I->getOperand(0).getImm(); |
| 1357 | if (Amount == 0) |
| 1358 | return MBB.erase(I); |
| 1359 | |
| 1360 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 1361 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 1362 | const DebugLoc &DL = I->getDebugLoc(); |
| 1363 | unsigned Opc = I->getOpcode(); |
| 1364 | bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); |
| 1365 | uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; |
| 1366 | |
| 1367 | if (!hasReservedCallFrame(MF)) { |
| 1368 | Amount = alignTo(Amount, getStackAlign()); |
| 1369 | assert(isUInt<32>(Amount) && "exceeded stack address space size")((void)0); |
| 1370 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
| 1371 | Register SPReg = MFI->getStackPtrOffsetReg(); |
| 1372 | |
| 1373 | Amount *= getScratchScaleFactor(ST); |
| 1374 | if (IsDestroy) |
| 1375 | Amount = -Amount; |
| 1376 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg) |
| 1377 | .addReg(SPReg) |
| 1378 | .addImm(Amount); |
| 1379 | } else if (CalleePopAmount != 0) { |
| 1380 | llvm_unreachable("is this used?")__builtin_unreachable(); |
| 1381 | } |
| 1382 | |
| 1383 | return MBB.erase(I); |
| 1384 | } |
| 1385 | |
| 1386 | /// Returns true if the frame will require a reference to the stack pointer. |
| 1387 | /// |
| 1388 | /// This is the set of conditions common to setting up the stack pointer in a |
| 1389 | /// kernel, and for using a frame pointer in a callable function. |
| 1390 | /// |
| 1391 | /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm |
| 1392 | /// references SP. |
| 1393 | static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { |
| 1394 | return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); |
| 1395 | } |
| 1396 | |
| 1397 | // The FP for kernels is always known 0, so we never really need to setup an |
| 1398 | // explicit register for it. However, DisableFramePointerElim will force us to |
| 1399 | // use a register for it. |
| 1400 | bool SIFrameLowering::hasFP(const MachineFunction &MF) const { |
| 1401 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1402 | |
| 1403 | // For entry functions we can use an immediate offset in most cases, so the |
| 1404 | // presence of calls doesn't imply we need a distinct frame pointer. |
| 1405 | if (MFI.hasCalls() && |
| 1406 | !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { |
| 1407 | // All offsets are unsigned, so need to be addressed in the same direction |
| 1408 | // as stack growth. |
| 1409 | |
| 1410 | // FIXME: This function is pretty broken, since it can be called before the |
| 1411 | // frame layout is determined or CSR spills are inserted. |
| 1412 | return MFI.getStackSize() != 0; |
| 1413 | } |
| 1414 | |
| 1415 | return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || |
| 1416 | MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment( |
| 1417 | MF) || |
| 1418 | MF.getTarget().Options.DisableFramePointerElim(MF); |
| 1419 | } |
| 1420 | |
| 1421 | // This is essentially a reduced version of hasFP for entry functions. Since the |
| 1422 | // stack pointer is known 0 on entry to kernels, we never really need an FP |
| 1423 | // register. We may need to initialize the stack pointer depending on the frame |
| 1424 | // properties, which logically overlaps many of the cases where an ordinary |
| 1425 | // function would require an FP. |
| 1426 | bool SIFrameLowering::requiresStackPointerReference( |
| 1427 | const MachineFunction &MF) const { |
| 1428 | // Callable functions always require a stack pointer reference. |
| 1429 | assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&((void)0) |
| 1430 | "only expected to call this for entry points")((void)0); |
| 1431 | |
| 1432 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1433 | |
| 1434 | // Entry points ordinarily don't need to initialize SP. We have to set it up |
| 1435 | // for callees if there are any. Also note tail calls are impossible/don't |
| 1436 | // make any sense for kernels. |
| 1437 | if (MFI.hasCalls()) |
| 1438 | return true; |
| 1439 | |
| 1440 | // We still need to initialize the SP if we're doing anything weird that |
| 1441 | // references the SP, like variable sized stack objects. |
| 1442 | return frameTriviallyRequiresSP(MFI); |
| 1443 | } |