File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp |
Warning: | line 945, column 8 Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===----------------------- SIFrameLowering.cpp --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //==-----------------------------------------------------------------------===// |
8 | |
9 | #include "SIFrameLowering.h" |
10 | #include "AMDGPU.h" |
11 | #include "GCNSubtarget.h" |
12 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
13 | #include "SIMachineFunctionInfo.h" |
14 | #include "llvm/CodeGen/LivePhysRegs.h" |
15 | #include "llvm/CodeGen/MachineFrameInfo.h" |
16 | #include "llvm/CodeGen/RegisterScavenging.h" |
17 | #include "llvm/Target/TargetMachine.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | #define DEBUG_TYPE"frame-info" "frame-info" |
22 | |
23 | static cl::opt<bool> EnableSpillVGPRToAGPR( |
24 | "amdgpu-spill-vgpr-to-agpr", |
25 | cl::desc("Enable spilling VGPRs to AGPRs"), |
26 | cl::ReallyHidden, |
27 | cl::init(true)); |
28 | |
29 | // Find a scratch register that we can use in the prologue. We avoid using |
30 | // callee-save registers since they may appear to be free when this is called |
31 | // from canUseAsPrologue (during shrink wrapping), but then no longer be free |
32 | // when this is called from emitPrologue. |
33 | static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, |
34 | LivePhysRegs &LiveRegs, |
35 | const TargetRegisterClass &RC, |
36 | bool Unused = false) { |
37 | // Mark callee saved registers as used so we will not choose them. |
38 | const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); |
39 | for (unsigned i = 0; CSRegs[i]; ++i) |
40 | LiveRegs.addReg(CSRegs[i]); |
41 | |
42 | if (Unused) { |
43 | // We are looking for a register that can be used throughout the entire |
44 | // function, so any use is unacceptable. |
45 | for (MCRegister Reg : RC) { |
46 | if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg)) |
47 | return Reg; |
48 | } |
49 | } else { |
50 | for (MCRegister Reg : RC) { |
51 | if (LiveRegs.available(MRI, Reg)) |
52 | return Reg; |
53 | } |
54 | } |
55 | |
56 | return MCRegister(); |
57 | } |
58 | |
59 | static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, |
60 | LivePhysRegs &LiveRegs, |
61 | Register &TempSGPR, |
62 | Optional<int> &FrameIndex, |
63 | bool IsFP) { |
64 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
65 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
66 | |
67 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
68 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
69 | |
70 | // We need to save and restore the current FP/BP. |
71 | |
72 | // 1: If there is already a VGPR with free lanes, use it. We |
73 | // may already have to pay the penalty for spilling a CSR VGPR. |
74 | if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { |
75 | int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, |
76 | TargetStackID::SGPRSpill); |
77 | |
78 | if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) |
79 | llvm_unreachable("allocate SGPR spill should have worked")__builtin_unreachable(); |
80 | |
81 | FrameIndex = NewFI; |
82 | |
83 | LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false) |
84 | dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "do { } while (false) |
85 | << printReg(Spill.VGPR, TRI) << ':' << Spill.Lanedo { } while (false) |
86 | << '\n')do { } while (false); |
87 | return; |
88 | } |
89 | |
90 | // 2: Next, try to save the FP/BP in an unused SGPR. |
91 | TempSGPR = findScratchNonCalleeSaveRegister( |
92 | MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true); |
93 | |
94 | if (!TempSGPR) { |
95 | int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr, |
96 | TargetStackID::SGPRSpill); |
97 | |
98 | if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { |
99 | // 3: There's no free lane to spill, and no free register to save FP/BP, |
100 | // so we're forced to spill another VGPR to use for the spill. |
101 | FrameIndex = NewFI; |
102 | |
103 | LLVM_DEBUG(do { } while (false) |
104 | auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false) |
105 | dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "do { } while (false) |
106 | << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';)do { } while (false); |
107 | } else { |
108 | // Remove dead <NewFI> index |
109 | MF.getFrameInfo().RemoveStackObject(NewFI); |
110 | // 4: If all else fails, spill the FP/BP to memory. |
111 | FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4)); |
112 | LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "do { } while (false) |
113 | << (IsFP ? "FP" : "BP") << '\n')do { } while (false); |
114 | } |
115 | } else { |
116 | LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "do { } while (false) |
117 | << printReg(TempSGPR, TRI) << '\n')do { } while (false); |
118 | } |
119 | } |
120 | |
121 | // We need to specially emit stack operations here because a different frame |
122 | // register is used than in the rest of the function, as getFrameRegister would |
123 | // use. |
124 | static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, |
125 | const SIMachineFunctionInfo &FuncInfo, |
126 | LivePhysRegs &LiveRegs, MachineFunction &MF, |
127 | MachineBasicBlock &MBB, |
128 | MachineBasicBlock::iterator I, Register SpillReg, |
129 | int FI) { |
130 | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR |
131 | : AMDGPU::BUFFER_STORE_DWORD_OFFSET; |
132 | |
133 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
134 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
135 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
136 | PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), |
137 | FrameInfo.getObjectAlign(FI)); |
138 | LiveRegs.addReg(SpillReg); |
139 | TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true, |
140 | FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, |
141 | &LiveRegs); |
142 | LiveRegs.removeReg(SpillReg); |
143 | } |
144 | |
145 | static void buildEpilogRestore(const GCNSubtarget &ST, |
146 | const SIRegisterInfo &TRI, |
147 | const SIMachineFunctionInfo &FuncInfo, |
148 | LivePhysRegs &LiveRegs, MachineFunction &MF, |
149 | MachineBasicBlock &MBB, |
150 | MachineBasicBlock::iterator I, Register SpillReg, |
151 | int FI) { |
152 | unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR |
153 | : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; |
154 | |
155 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
156 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); |
157 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
158 | PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), |
159 | FrameInfo.getObjectAlign(FI)); |
160 | TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false, |
161 | FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, |
162 | &LiveRegs); |
163 | } |
164 | |
165 | static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
166 | const DebugLoc &DL, const SIInstrInfo *TII, |
167 | Register TargetReg) { |
168 | MachineFunction *MF = MBB.getParent(); |
169 | const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); |
170 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
171 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
172 | Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0); |
173 | Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1); |
174 | |
175 | if (MFI->getGITPtrHigh() != 0xffffffff) { |
176 | BuildMI(MBB, I, DL, SMovB32, TargetHi) |
177 | .addImm(MFI->getGITPtrHigh()) |
178 | .addReg(TargetReg, RegState::ImplicitDefine); |
179 | } else { |
180 | const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); |
181 | BuildMI(MBB, I, DL, GetPC64, TargetReg); |
182 | } |
183 | Register GitPtrLo = MFI->getGITPtrLoReg(*MF); |
184 | MF->getRegInfo().addLiveIn(GitPtrLo); |
185 | MBB.addLiveIn(GitPtrLo); |
186 | BuildMI(MBB, I, DL, SMovB32, TargetLo) |
187 | .addReg(GitPtrLo); |
188 | } |
189 | |
190 | // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()` |
191 | void SIFrameLowering::emitEntryFunctionFlatScratchInit( |
192 | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
193 | const DebugLoc &DL, Register ScratchWaveOffsetReg) const { |
194 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
195 | const SIInstrInfo *TII = ST.getInstrInfo(); |
196 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
197 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
198 | |
199 | // We don't need this if we only have spills since there is no user facing |
200 | // scratch. |
201 | |
202 | // TODO: If we know we don't have flat instructions earlier, we can omit |
203 | // this from the input registers. |
204 | // |
205 | // TODO: We only need to know if we access scratch space through a flat |
206 | // pointer. Because we only detect if flat instructions are used at all, |
207 | // this will be used more often than necessary on VI. |
208 | |
209 | Register FlatScrInitLo; |
210 | Register FlatScrInitHi; |
211 | |
212 | if (ST.isAmdPalOS()) { |
213 | // Extract the scratch offset from the descriptor in the GIT |
214 | LivePhysRegs LiveRegs; |
215 | LiveRegs.init(*TRI); |
216 | LiveRegs.addLiveIns(MBB); |
217 | |
218 | // Find unused reg to load flat scratch init into |
219 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
220 | Register FlatScrInit = AMDGPU::NoRegister; |
221 | ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF); |
222 | unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2; |
223 | AllSGPR64s = AllSGPR64s.slice( |
224 | std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded)); |
225 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
226 | for (MCPhysReg Reg : AllSGPR64s) { |
227 | if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) && |
228 | !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { |
229 | FlatScrInit = Reg; |
230 | break; |
231 | } |
232 | } |
233 | assert(FlatScrInit && "Failed to find free register for scratch init")((void)0); |
234 | |
235 | FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0); |
236 | FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1); |
237 | |
238 | buildGitPtr(MBB, I, DL, TII, FlatScrInit); |
239 | |
240 | // We now have the GIT ptr - now get the scratch descriptor from the entry |
241 | // at offset 0 (or offset 16 for a compute shader). |
242 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
243 | const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); |
244 | auto *MMO = MF.getMachineMemOperand( |
245 | PtrInfo, |
246 | MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
247 | MachineMemOperand::MODereferenceable, |
248 | 8, Align(4)); |
249 | unsigned Offset = |
250 | MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; |
251 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
252 | unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); |
253 | BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit) |
254 | .addReg(FlatScrInit) |
255 | .addImm(EncodedOffset) // offset |
256 | .addImm(0) // cpol |
257 | .addMemOperand(MMO); |
258 | |
259 | // Mask the offset in [47:0] of the descriptor |
260 | const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32); |
261 | BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi) |
262 | .addReg(FlatScrInitHi) |
263 | .addImm(0xffff); |
264 | } else { |
265 | Register FlatScratchInitReg = |
266 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT); |
267 | assert(FlatScratchInitReg)((void)0); |
268 | |
269 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
270 | MRI.addLiveIn(FlatScratchInitReg); |
271 | MBB.addLiveIn(FlatScratchInitReg); |
272 | |
273 | FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0); |
274 | FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1); |
275 | } |
276 | |
277 | // Do a 64-bit pointer add. |
278 | if (ST.flatScratchIsPointer()) { |
279 | if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { |
280 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) |
281 | .addReg(FlatScrInitLo) |
282 | .addReg(ScratchWaveOffsetReg); |
283 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) |
284 | .addReg(FlatScrInitHi) |
285 | .addImm(0); |
286 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
287 | addReg(FlatScrInitLo). |
288 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | |
289 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
290 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). |
291 | addReg(FlatScrInitHi). |
292 | addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | |
293 | (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); |
294 | return; |
295 | } |
296 | |
297 | // For GFX9. |
298 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) |
299 | .addReg(FlatScrInitLo) |
300 | .addReg(ScratchWaveOffsetReg); |
301 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI) |
302 | .addReg(FlatScrInitHi) |
303 | .addImm(0); |
304 | |
305 | return; |
306 | } |
307 | |
308 | assert(ST.getGeneration() < AMDGPUSubtarget::GFX9)((void)0); |
309 | |
310 | // Copy the size in bytes. |
311 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) |
312 | .addReg(FlatScrInitHi, RegState::Kill); |
313 | |
314 | // Add wave offset in bytes to private base offset. |
315 | // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init. |
316 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo) |
317 | .addReg(FlatScrInitLo) |
318 | .addReg(ScratchWaveOffsetReg); |
319 | |
320 | // Convert offset to 256-byte units. |
321 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI) |
322 | .addReg(FlatScrInitLo, RegState::Kill) |
323 | .addImm(8); |
324 | } |
325 | |
326 | // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not |
327 | // memory. They should have been removed by now. |
328 | static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { |
329 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
330 | I != E; ++I) { |
331 | if (!MFI.isDeadObjectIndex(I)) |
332 | return false; |
333 | } |
334 | |
335 | return true; |
336 | } |
337 | |
338 | // Shift down registers reserved for the scratch RSRC. |
339 | Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg( |
340 | MachineFunction &MF) const { |
341 | |
342 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
343 | const SIInstrInfo *TII = ST.getInstrInfo(); |
344 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
345 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
346 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
347 | |
348 | assert(MFI->isEntryFunction())((void)0); |
349 | |
350 | Register ScratchRsrcReg = MFI->getScratchRSrcReg(); |
351 | |
352 | if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) && |
353 | allStackObjectsAreDead(MF.getFrameInfo()))) |
354 | return Register(); |
355 | |
356 | if (ST.hasSGPRInitBug() || |
357 | ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF)) |
358 | return ScratchRsrcReg; |
359 | |
360 | // We reserved the last registers for this. Shift it down to the end of those |
361 | // which were actually used. |
362 | // |
363 | // FIXME: It might be safer to use a pseudoregister before replacement. |
364 | |
365 | // FIXME: We should be able to eliminate unused input registers. We only |
366 | // cannot do this for the resources required for scratch access. For now we |
367 | // skip over user SGPRs and may leave unused holes. |
368 | |
369 | unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; |
370 | ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF); |
371 | AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); |
372 | |
373 | // Skip the last N reserved elements because they should have already been |
374 | // reserved for VCC etc. |
375 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
376 | for (MCPhysReg Reg : AllSGPR128s) { |
377 | // Pick the first unallocated one. Make sure we don't clobber the other |
378 | // reserved input we needed. Also for PAL, make sure we don't clobber |
379 | // the GIT pointer passed in SGPR0 or SGPR8. |
380 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && |
381 | !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) { |
382 | MRI.replaceRegWith(ScratchRsrcReg, Reg); |
383 | MFI->setScratchRSrcReg(Reg); |
384 | return Reg; |
385 | } |
386 | } |
387 | |
388 | return ScratchRsrcReg; |
389 | } |
390 | |
391 | static unsigned getScratchScaleFactor(const GCNSubtarget &ST) { |
392 | return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize(); |
393 | } |
394 | |
395 | void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, |
396 | MachineBasicBlock &MBB) const { |
397 | assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")((void)0); |
398 | |
399 | // FIXME: If we only have SGPR spills, we won't actually be using scratch |
400 | // memory since these spill to VGPRs. We should be cleaning up these unused |
401 | // SGPR spill frame indices somewhere. |
402 | |
403 | // FIXME: We still have implicit uses on SGPR spill instructions in case they |
404 | // need to spill to vector memory. It's likely that will not happen, but at |
405 | // this point it appears we need the setup. This part of the prolog should be |
406 | // emitted after frame indices are eliminated. |
407 | |
408 | // FIXME: Remove all of the isPhysRegUsed checks |
409 | |
410 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
411 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
412 | const SIInstrInfo *TII = ST.getInstrInfo(); |
413 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
414 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
415 | const Function &F = MF.getFunction(); |
416 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
417 | |
418 | assert(MFI->isEntryFunction())((void)0); |
419 | |
420 | Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( |
421 | AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); |
422 | // FIXME: Hack to not crash in situations which emitted an error. |
423 | if (!PreloadedScratchWaveOffsetReg) |
424 | return; |
425 | |
426 | // We need to do the replacement of the private segment buffer register even |
427 | // if there are no stack objects. There could be stores to undef or a |
428 | // constant without an associated object. |
429 | // |
430 | // This will return `Register()` in cases where there are no actual |
431 | // uses of the SRSRC. |
432 | Register ScratchRsrcReg; |
433 | if (!ST.enableFlatScratch()) |
434 | ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF); |
435 | |
436 | // Make the selected register live throughout the function. |
437 | if (ScratchRsrcReg) { |
438 | for (MachineBasicBlock &OtherBB : MF) { |
439 | if (&OtherBB != &MBB) { |
440 | OtherBB.addLiveIn(ScratchRsrcReg); |
441 | } |
442 | } |
443 | } |
444 | |
445 | // Now that we have fixed the reserved SRSRC we need to locate the |
446 | // (potentially) preloaded SRSRC. |
447 | Register PreloadedScratchRsrcReg; |
448 | if (ST.isAmdHsaOrMesa(F)) { |
449 | PreloadedScratchRsrcReg = |
450 | MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); |
451 | if (ScratchRsrcReg && PreloadedScratchRsrcReg) { |
452 | // We added live-ins during argument lowering, but since they were not |
453 | // used they were deleted. We're adding the uses now, so add them back. |
454 | MRI.addLiveIn(PreloadedScratchRsrcReg); |
455 | MBB.addLiveIn(PreloadedScratchRsrcReg); |
456 | } |
457 | } |
458 | |
459 | // Debug location must be unknown since the first debug location is used to |
460 | // determine the end of the prologue. |
461 | DebugLoc DL; |
462 | MachineBasicBlock::iterator I = MBB.begin(); |
463 | |
464 | // We found the SRSRC first because it needs four registers and has an |
465 | // alignment requirement. If the SRSRC that we found is clobbering with |
466 | // the scratch wave offset, which may be in a fixed SGPR or a free SGPR |
467 | // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch |
468 | // wave offset to a free SGPR. |
469 | Register ScratchWaveOffsetReg; |
470 | if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) { |
471 | ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF); |
472 | unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); |
473 | AllSGPRs = AllSGPRs.slice( |
474 | std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded)); |
475 | Register GITPtrLoReg = MFI->getGITPtrLoReg(MF); |
476 | for (MCPhysReg Reg : AllSGPRs) { |
477 | if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) && |
478 | !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) { |
479 | ScratchWaveOffsetReg = Reg; |
480 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) |
481 | .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); |
482 | break; |
483 | } |
484 | } |
485 | } else { |
486 | ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; |
487 | } |
488 | assert(ScratchWaveOffsetReg)((void)0); |
489 | |
490 | if (requiresStackPointerReference(MF)) { |
491 | Register SPReg = MFI->getStackPtrOffsetReg(); |
492 | assert(SPReg != AMDGPU::SP_REG)((void)0); |
493 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg) |
494 | .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST)); |
495 | } |
496 | |
497 | if (hasFP(MF)) { |
498 | Register FPReg = MFI->getFrameOffsetReg(); |
499 | assert(FPReg != AMDGPU::FP_REG)((void)0); |
500 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0); |
501 | } |
502 | |
503 | bool NeedsFlatScratchInit = |
504 | MFI->hasFlatScratchInit() && |
505 | (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() || |
506 | (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch())); |
507 | |
508 | if ((NeedsFlatScratchInit || ScratchRsrcReg) && |
509 | !ST.flatScratchIsArchitected()) { |
510 | MRI.addLiveIn(PreloadedScratchWaveOffsetReg); |
511 | MBB.addLiveIn(PreloadedScratchWaveOffsetReg); |
512 | } |
513 | |
514 | if (NeedsFlatScratchInit) { |
515 | emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg); |
516 | } |
517 | |
518 | if (ScratchRsrcReg) { |
519 | emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL, |
520 | PreloadedScratchRsrcReg, |
521 | ScratchRsrcReg, ScratchWaveOffsetReg); |
522 | } |
523 | } |
524 | |
525 | // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg` |
526 | void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup( |
527 | MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, |
528 | const DebugLoc &DL, Register PreloadedScratchRsrcReg, |
529 | Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const { |
530 | |
531 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
532 | const SIInstrInfo *TII = ST.getInstrInfo(); |
533 | const SIRegisterInfo *TRI = &TII->getRegisterInfo(); |
534 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
535 | const Function &Fn = MF.getFunction(); |
536 | |
537 | if (ST.isAmdPalOS()) { |
538 | // The pointer to the GIT is formed from the offset passed in and either |
539 | // the amdgpu-git-ptr-high function attribute or the top part of the PC |
540 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
541 | Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); |
542 | |
543 | buildGitPtr(MBB, I, DL, TII, Rsrc01); |
544 | |
545 | // We now have the GIT ptr - now get the scratch descriptor from the entry |
546 | // at offset 0 (or offset 16 for a compute shader). |
547 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
548 | const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); |
549 | auto MMO = MF.getMachineMemOperand(PtrInfo, |
550 | MachineMemOperand::MOLoad | |
551 | MachineMemOperand::MOInvariant | |
552 | MachineMemOperand::MODereferenceable, |
553 | 16, Align(4)); |
554 | unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0; |
555 | const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>(); |
556 | unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset); |
557 | BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) |
558 | .addReg(Rsrc01) |
559 | .addImm(EncodedOffset) // offset |
560 | .addImm(0) // cpol |
561 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine) |
562 | .addMemOperand(MMO); |
563 | |
564 | // The driver will always set the SRD for wave 64 (bits 118:117 of |
565 | // descriptor / bits 22:21 of third sub-reg will be 0b11) |
566 | // If the shader is actually wave32 we have to modify the const_index_stride |
567 | // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The |
568 | // reason the driver does this is that there can be cases where it presents |
569 | // 2 shaders with different wave size (e.g. VsFs). |
570 | // TODO: convert to using SCRATCH instructions or multiple SRD buffers |
571 | if (ST.isWave32()) { |
572 | const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32); |
573 | BuildMI(MBB, I, DL, SBitsetB32, Rsrc03) |
574 | .addImm(21) |
575 | .addReg(Rsrc03); |
576 | } |
577 | } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) { |
578 | assert(!ST.isAmdHsaOrMesa(Fn))((void)0); |
579 | const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); |
580 | |
581 | Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); |
582 | Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); |
583 | |
584 | // Use relocations to get the pointer, and setup the other bits manually. |
585 | uint64_t Rsrc23 = TII->getScratchRsrcWords23(); |
586 | |
587 | if (MFI->hasImplicitBufferPtr()) { |
588 | Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); |
589 | |
590 | if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { |
591 | const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); |
592 | |
593 | BuildMI(MBB, I, DL, Mov64, Rsrc01) |
594 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
595 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
596 | } else { |
597 | const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); |
598 | |
599 | MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS); |
600 | auto MMO = MF.getMachineMemOperand( |
601 | PtrInfo, |
602 | MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | |
603 | MachineMemOperand::MODereferenceable, |
604 | 8, Align(4)); |
605 | BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01) |
606 | .addReg(MFI->getImplicitBufferPtrUserSGPR()) |
607 | .addImm(0) // offset |
608 | .addImm(0) // cpol |
609 | .addMemOperand(MMO) |
610 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
611 | |
612 | MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
613 | MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR()); |
614 | } |
615 | } else { |
616 | Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
617 | Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
618 | |
619 | BuildMI(MBB, I, DL, SMovB32, Rsrc0) |
620 | .addExternalSymbol("SCRATCH_RSRC_DWORD0") |
621 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
622 | |
623 | BuildMI(MBB, I, DL, SMovB32, Rsrc1) |
624 | .addExternalSymbol("SCRATCH_RSRC_DWORD1") |
625 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
626 | |
627 | } |
628 | |
629 | BuildMI(MBB, I, DL, SMovB32, Rsrc2) |
630 | .addImm(Rsrc23 & 0xffffffff) |
631 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
632 | |
633 | BuildMI(MBB, I, DL, SMovB32, Rsrc3) |
634 | .addImm(Rsrc23 >> 32) |
635 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
636 | } else if (ST.isAmdHsaOrMesa(Fn)) { |
637 | assert(PreloadedScratchRsrcReg)((void)0); |
638 | |
639 | if (ScratchRsrcReg != PreloadedScratchRsrcReg) { |
640 | BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg) |
641 | .addReg(PreloadedScratchRsrcReg, RegState::Kill); |
642 | } |
643 | } |
644 | |
645 | // Add the scratch wave offset into the scratch RSRC. |
646 | // |
647 | // We only want to update the first 48 bits, which is the base address |
648 | // pointer, without touching the adjacent 16 bits of flags. We know this add |
649 | // cannot carry-out from bit 47, otherwise the scratch allocation would be |
650 | // impossible to fit in the 48-bit global address space. |
651 | // |
652 | // TODO: Evaluate if it is better to just construct an SRD using the flat |
653 | // scratch init and some constants rather than update the one we are passed. |
654 | Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); |
655 | Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); |
656 | |
657 | // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in |
658 | // the kernel body via inreg arguments. |
659 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0) |
660 | .addReg(ScratchRsrcSub0) |
661 | .addReg(ScratchWaveOffsetReg) |
662 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
663 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1) |
664 | .addReg(ScratchRsrcSub1) |
665 | .addImm(0) |
666 | .addReg(ScratchRsrcReg, RegState::ImplicitDefine); |
667 | } |
668 | |
669 | bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { |
670 | switch (ID) { |
671 | case TargetStackID::Default: |
672 | case TargetStackID::NoAlloc: |
673 | case TargetStackID::SGPRSpill: |
674 | return true; |
675 | case TargetStackID::ScalableVector: |
676 | case TargetStackID::WasmLocal: |
677 | return false; |
678 | } |
679 | llvm_unreachable("Invalid TargetStackID::Value")__builtin_unreachable(); |
680 | } |
681 | |
682 | static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI, |
683 | const SIMachineFunctionInfo *FuncInfo, |
684 | MachineFunction &MF, MachineBasicBlock &MBB, |
685 | MachineBasicBlock::iterator MBBI, bool IsProlog) { |
686 | if (LiveRegs.empty()) { |
687 | LiveRegs.init(TRI); |
688 | if (IsProlog) { |
689 | LiveRegs.addLiveIns(MBB); |
690 | } else { |
691 | // In epilog. |
692 | LiveRegs.addLiveOuts(MBB); |
693 | LiveRegs.stepBackward(*MBBI); |
694 | } |
695 | } |
696 | } |
697 | |
698 | // Activate all lanes, returns saved exec. |
699 | static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, |
700 | MachineFunction &MF, |
701 | MachineBasicBlock &MBB, |
702 | MachineBasicBlock::iterator MBBI, |
703 | bool IsProlog) { |
704 | Register ScratchExecCopy; |
705 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
706 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
707 | const SIInstrInfo *TII = ST.getInstrInfo(); |
708 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
709 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
710 | DebugLoc DL; |
711 | |
712 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog); |
713 | |
714 | ScratchExecCopy = findScratchNonCalleeSaveRegister( |
715 | MRI, LiveRegs, *TRI.getWaveMaskRegClass()); |
716 | if (!ScratchExecCopy) |
717 | report_fatal_error("failed to find free scratch register"); |
718 | |
719 | LiveRegs.addReg(ScratchExecCopy); |
720 | |
721 | const unsigned OrSaveExec = |
722 | ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; |
723 | BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); |
724 | |
725 | return ScratchExecCopy; |
726 | } |
727 | |
728 | // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. |
729 | // Otherwise we are spilling to memory. |
730 | static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) { |
731 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
732 | return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill; |
733 | } |
734 | |
735 | void SIFrameLowering::emitPrologue(MachineFunction &MF, |
736 | MachineBasicBlock &MBB) const { |
737 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
738 | if (FuncInfo->isEntryFunction()) { |
739 | emitEntryFunctionPrologue(MF, MBB); |
740 | return; |
741 | } |
742 | |
743 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
744 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
745 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
746 | const SIInstrInfo *TII = ST.getInstrInfo(); |
747 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
748 | |
749 | Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
750 | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
751 | Register BasePtrReg = |
752 | TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); |
753 | LivePhysRegs LiveRegs; |
754 | |
755 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
756 | DebugLoc DL; |
757 | |
758 | bool HasFP = false; |
759 | bool HasBP = false; |
760 | uint32_t NumBytes = MFI.getStackSize(); |
761 | uint32_t RoundedSize = NumBytes; |
762 | // To avoid clobbering VGPRs in lanes that weren't active on function entry, |
763 | // turn on all lanes before doing the spill to memory. |
764 | Register ScratchExecCopy; |
765 | |
766 | Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; |
767 | Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; |
768 | |
769 | // VGPRs used for SGPR->VGPR spills |
770 | for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : |
771 | FuncInfo->getSGPRSpillVGPRs()) { |
772 | if (!Reg.FI) |
773 | continue; |
774 | |
775 | if (!ScratchExecCopy) |
776 | ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, |
777 | /*IsProlog*/ true); |
778 | |
779 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, |
780 | *Reg.FI); |
781 | } |
782 | |
783 | // VGPRs used for Whole Wave Mode |
784 | for (const auto &Reg : FuncInfo->WWMReservedRegs) { |
785 | auto VGPR = Reg.first; |
786 | auto FI = Reg.second; |
787 | if (!FI) |
788 | continue; |
789 | |
790 | if (!ScratchExecCopy) |
791 | ScratchExecCopy = |
792 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); |
793 | |
794 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); |
795 | } |
796 | |
797 | if (ScratchExecCopy) { |
798 | // FIXME: Split block and make terminator. |
799 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
800 | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
801 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
802 | .addReg(ScratchExecCopy, RegState::Kill); |
803 | LiveRegs.addReg(ScratchExecCopy); |
804 | } |
805 | |
806 | if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) { |
807 | const int FramePtrFI = *FPSaveIndex; |
808 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
809 | |
810 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); |
811 | |
812 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
813 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
814 | if (!TmpVGPR) |
815 | report_fatal_error("failed to find free scratch register"); |
816 | |
817 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) |
818 | .addReg(FramePtrReg); |
819 | |
820 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
821 | FramePtrFI); |
822 | } |
823 | |
824 | if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) { |
825 | const int BasePtrFI = *BPSaveIndex; |
826 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
827 | |
828 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true); |
829 | |
830 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
831 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
832 | if (!TmpVGPR) |
833 | report_fatal_error("failed to find free scratch register"); |
834 | |
835 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) |
836 | .addReg(BasePtrReg); |
837 | |
838 | buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
839 | BasePtrFI); |
840 | } |
841 | |
842 | // In this case, spill the FP to a reserved VGPR. |
843 | if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) { |
844 | const int FramePtrFI = *FPSaveIndex; |
845 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
846 | |
847 | assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0); |
848 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
849 | FuncInfo->getSGPRToVGPRSpills(FramePtrFI); |
850 | assert(Spill.size() == 1)((void)0); |
851 | |
852 | // Save FP before setting it up. |
853 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) |
854 | .addReg(FramePtrReg) |
855 | .addImm(Spill[0].Lane) |
856 | .addReg(Spill[0].VGPR, RegState::Undef); |
857 | } |
858 | |
859 | // In this case, spill the BP to a reserved VGPR. |
860 | if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) { |
861 | const int BasePtrFI = *BPSaveIndex; |
862 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
863 | |
864 | assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0); |
865 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
866 | FuncInfo->getSGPRToVGPRSpills(BasePtrFI); |
867 | assert(Spill.size() == 1)((void)0); |
868 | |
869 | // Save BP before setting it up. |
870 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) |
871 | .addReg(BasePtrReg) |
872 | .addImm(Spill[0].Lane) |
873 | .addReg(Spill[0].VGPR, RegState::Undef); |
874 | } |
875 | |
876 | // Emit the copy if we need an FP, and are using a free SGPR to save it. |
877 | if (FuncInfo->SGPRForFPSaveRestoreCopy) { |
878 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), |
879 | FuncInfo->SGPRForFPSaveRestoreCopy) |
880 | .addReg(FramePtrReg) |
881 | .setMIFlag(MachineInstr::FrameSetup); |
882 | } |
883 | |
884 | // Emit the copy if we need a BP, and are using a free SGPR to save it. |
885 | if (FuncInfo->SGPRForBPSaveRestoreCopy) { |
886 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), |
887 | FuncInfo->SGPRForBPSaveRestoreCopy) |
888 | .addReg(BasePtrReg) |
889 | .setMIFlag(MachineInstr::FrameSetup); |
890 | } |
891 | |
892 | // If a copy has been emitted for FP and/or BP, Make the SGPRs |
893 | // used in the copy instructions live throughout the function. |
894 | SmallVector<MCPhysReg, 2> TempSGPRs; |
895 | if (FuncInfo->SGPRForFPSaveRestoreCopy) |
896 | TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy); |
897 | |
898 | if (FuncInfo->SGPRForBPSaveRestoreCopy) |
899 | TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy); |
900 | |
901 | if (!TempSGPRs.empty()) { |
902 | for (MachineBasicBlock &MBB : MF) { |
903 | for (MCPhysReg Reg : TempSGPRs) |
904 | MBB.addLiveIn(Reg); |
905 | |
906 | MBB.sortUniqueLiveIns(); |
907 | } |
908 | if (!LiveRegs.empty()) { |
909 | LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
910 | LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy); |
911 | } |
912 | } |
913 | |
914 | if (TRI.hasStackRealignment(MF)) { |
915 | HasFP = true; |
916 | const unsigned Alignment = MFI.getMaxAlign().value(); |
917 | |
918 | RoundedSize += Alignment; |
919 | if (LiveRegs.empty()) { |
920 | LiveRegs.init(TRI); |
921 | LiveRegs.addLiveIns(MBB); |
922 | } |
923 | |
924 | // s_add_i32 s33, s32, NumBytes |
925 | // s_and_b32 s33, s33, 0b111...0000 |
926 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg) |
927 | .addReg(StackPtrReg) |
928 | .addImm((Alignment - 1) * getScratchScaleFactor(ST)) |
929 | .setMIFlag(MachineInstr::FrameSetup); |
930 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) |
931 | .addReg(FramePtrReg, RegState::Kill) |
932 | .addImm(-Alignment * getScratchScaleFactor(ST)) |
933 | .setMIFlag(MachineInstr::FrameSetup); |
934 | FuncInfo->setIsStackRealigned(true); |
935 | } else if ((HasFP = hasFP(MF))) { |
936 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) |
937 | .addReg(StackPtrReg) |
938 | .setMIFlag(MachineInstr::FrameSetup); |
939 | } |
940 | |
941 | // If we need a base pointer, set it up here. It's whatever the value of |
942 | // the stack pointer is at this point. Any variable size objects will be |
943 | // allocated after this, so we can still use the base pointer to reference |
944 | // the incoming arguments. |
945 | if ((HasBP = TRI.hasBasePointer(MF))) { |
Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP' | |
946 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) |
947 | .addReg(StackPtrReg) |
948 | .setMIFlag(MachineInstr::FrameSetup); |
949 | } |
950 | |
951 | if (HasFP && RoundedSize != 0) { |
952 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) |
953 | .addReg(StackPtrReg) |
954 | .addImm(RoundedSize * getScratchScaleFactor(ST)) |
955 | .setMIFlag(MachineInstr::FrameSetup); |
956 | } |
957 | |
958 | assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||((void)0) |
959 | FuncInfo->FramePointerSaveIndex)) &&((void)0) |
960 | "Needed to save FP but didn't save it anywhere")((void)0); |
961 | |
962 | assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&((void)0) |
963 | !FuncInfo->FramePointerSaveIndex)) &&((void)0) |
964 | "Saved FP but didn't need it")((void)0); |
965 | |
966 | assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||((void)0) |
967 | FuncInfo->BasePointerSaveIndex)) &&((void)0) |
968 | "Needed to save BP but didn't save it anywhere")((void)0); |
969 | |
970 | assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&((void)0) |
971 | !FuncInfo->BasePointerSaveIndex)) &&((void)0) |
972 | "Saved BP but didn't need it")((void)0); |
973 | } |
974 | |
975 | void SIFrameLowering::emitEpilogue(MachineFunction &MF, |
976 | MachineBasicBlock &MBB) const { |
977 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
978 | if (FuncInfo->isEntryFunction()) |
979 | return; |
980 | |
981 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
982 | const SIInstrInfo *TII = ST.getInstrInfo(); |
983 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
984 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
985 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
986 | LivePhysRegs LiveRegs; |
987 | DebugLoc DL; |
988 | |
989 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
990 | uint32_t NumBytes = MFI.getStackSize(); |
991 | uint32_t RoundedSize = FuncInfo->isStackRealigned() |
992 | ? NumBytes + MFI.getMaxAlign().value() |
993 | : NumBytes; |
994 | const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); |
995 | const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
996 | const Register BasePtrReg = |
997 | TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register(); |
998 | |
999 | Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex; |
1000 | Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex; |
1001 | |
1002 | if (RoundedSize != 0 && hasFP(MF)) { |
1003 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg) |
1004 | .addReg(StackPtrReg) |
1005 | .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST))) |
1006 | .setMIFlag(MachineInstr::FrameDestroy); |
1007 | } |
1008 | |
1009 | if (FuncInfo->SGPRForFPSaveRestoreCopy) { |
1010 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) |
1011 | .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) |
1012 | .setMIFlag(MachineInstr::FrameDestroy); |
1013 | } |
1014 | |
1015 | if (FuncInfo->SGPRForBPSaveRestoreCopy) { |
1016 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg) |
1017 | .addReg(FuncInfo->SGPRForBPSaveRestoreCopy) |
1018 | .setMIFlag(MachineInstr::FrameDestroy); |
1019 | } |
1020 | |
1021 | if (FPSaveIndex) { |
1022 | const int FramePtrFI = *FPSaveIndex; |
1023 | assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0); |
1024 | if (spilledToMemory(MF, FramePtrFI)) { |
1025 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); |
1026 | |
1027 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
1028 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
1029 | if (!TmpVGPR) |
1030 | report_fatal_error("failed to find free scratch register"); |
1031 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
1032 | FramePtrFI); |
1033 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) |
1034 | .addReg(TmpVGPR, RegState::Kill); |
1035 | } else { |
1036 | // Reload from VGPR spill. |
1037 | assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0); |
1038 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
1039 | FuncInfo->getSGPRToVGPRSpills(FramePtrFI); |
1040 | assert(Spill.size() == 1)((void)0); |
1041 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg) |
1042 | .addReg(Spill[0].VGPR) |
1043 | .addImm(Spill[0].Lane); |
1044 | } |
1045 | } |
1046 | |
1047 | if (BPSaveIndex) { |
1048 | const int BasePtrFI = *BPSaveIndex; |
1049 | assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0); |
1050 | if (spilledToMemory(MF, BasePtrFI)) { |
1051 | initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false); |
1052 | |
1053 | MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( |
1054 | MRI, LiveRegs, AMDGPU::VGPR_32RegClass); |
1055 | if (!TmpVGPR) |
1056 | report_fatal_error("failed to find free scratch register"); |
1057 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, |
1058 | BasePtrFI); |
1059 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) |
1060 | .addReg(TmpVGPR, RegState::Kill); |
1061 | } else { |
1062 | // Reload from VGPR spill. |
1063 | assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0); |
1064 | ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill = |
1065 | FuncInfo->getSGPRToVGPRSpills(BasePtrFI); |
1066 | assert(Spill.size() == 1)((void)0); |
1067 | BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg) |
1068 | .addReg(Spill[0].VGPR) |
1069 | .addImm(Spill[0].Lane); |
1070 | } |
1071 | } |
1072 | |
1073 | Register ScratchExecCopy; |
1074 | for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg : |
1075 | FuncInfo->getSGPRSpillVGPRs()) { |
1076 | if (!Reg.FI) |
1077 | continue; |
1078 | |
1079 | if (!ScratchExecCopy) |
1080 | ScratchExecCopy = |
1081 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); |
1082 | |
1083 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, |
1084 | *Reg.FI); |
1085 | } |
1086 | |
1087 | for (const auto &Reg : FuncInfo->WWMReservedRegs) { |
1088 | auto VGPR = Reg.first; |
1089 | auto FI = Reg.second; |
1090 | if (!FI) |
1091 | continue; |
1092 | |
1093 | if (!ScratchExecCopy) |
1094 | ScratchExecCopy = |
1095 | buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); |
1096 | |
1097 | buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); |
1098 | } |
1099 | |
1100 | if (ScratchExecCopy) { |
1101 | // FIXME: Split block and make terminator. |
1102 | unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; |
1103 | MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; |
1104 | BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) |
1105 | .addReg(ScratchExecCopy, RegState::Kill); |
1106 | } |
1107 | } |
1108 | |
1109 | #ifndef NDEBUG1 |
1110 | static bool allSGPRSpillsAreDead(const MachineFunction &MF) { |
1111 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1112 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
1113 | for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); |
1114 | I != E; ++I) { |
1115 | if (!MFI.isDeadObjectIndex(I) && |
1116 | MFI.getStackID(I) == TargetStackID::SGPRSpill && |
1117 | (I != FuncInfo->FramePointerSaveIndex && |
1118 | I != FuncInfo->BasePointerSaveIndex)) { |
1119 | return false; |
1120 | } |
1121 | } |
1122 | |
1123 | return true; |
1124 | } |
1125 | #endif |
1126 | |
1127 | StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, |
1128 | int FI, |
1129 | Register &FrameReg) const { |
1130 | const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); |
1131 | |
1132 | FrameReg = RI->getFrameRegister(MF); |
1133 | return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI)); |
1134 | } |
1135 | |
1136 | void SIFrameLowering::processFunctionBeforeFrameFinalized( |
1137 | MachineFunction &MF, |
1138 | RegScavenger *RS) const { |
1139 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1140 | |
1141 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1142 | const SIInstrInfo *TII = ST.getInstrInfo(); |
1143 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1144 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1145 | SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
1146 | |
1147 | const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() |
1148 | && EnableSpillVGPRToAGPR; |
1149 | |
1150 | if (SpillVGPRToAGPR) { |
1151 | // To track the spill frame indices handled in this pass. |
1152 | BitVector SpillFIs(MFI.getObjectIndexEnd(), false); |
1153 | |
1154 | bool SeenDbgInstr = false; |
1155 | |
1156 | for (MachineBasicBlock &MBB : MF) { |
1157 | MachineBasicBlock::iterator Next; |
1158 | for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { |
1159 | MachineInstr &MI = *I; |
1160 | Next = std::next(I); |
1161 | |
1162 | if (MI.isDebugInstr()) |
1163 | SeenDbgInstr = true; |
1164 | |
1165 | if (TII->isVGPRSpill(MI)) { |
1166 | // Try to eliminate stack used by VGPR spills before frame |
1167 | // finalization. |
1168 | unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
1169 | AMDGPU::OpName::vaddr); |
1170 | int FI = MI.getOperand(FIOp).getIndex(); |
1171 | Register VReg = |
1172 | TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); |
1173 | if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, |
1174 | TRI->isAGPR(MRI, VReg))) { |
1175 | // FIXME: change to enterBasicBlockEnd() |
1176 | RS->enterBasicBlock(MBB); |
1177 | TRI->eliminateFrameIndex(MI, 0, FIOp, RS); |
1178 | SpillFIs.set(FI); |
1179 | continue; |
1180 | } |
1181 | } |
1182 | } |
1183 | } |
1184 | |
1185 | for (MachineBasicBlock &MBB : MF) { |
1186 | for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) |
1187 | MBB.addLiveIn(Reg); |
1188 | |
1189 | for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) |
1190 | MBB.addLiveIn(Reg); |
1191 | |
1192 | MBB.sortUniqueLiveIns(); |
1193 | |
1194 | if (!SpillFIs.empty() && SeenDbgInstr) { |
1195 | // FIXME: The dead frame indices are replaced with a null register from |
1196 | // the debug value instructions. We should instead, update it with the |
1197 | // correct register value. But not sure the register value alone is |
1198 | for (MachineInstr &MI : MBB) { |
1199 | if (MI.isDebugValue() && MI.getOperand(0).isFI() && |
1200 | SpillFIs[MI.getOperand(0).getIndex()]) { |
1201 | MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); |
1202 | MI.getOperand(0).setIsDebug(); |
1203 | } |
1204 | } |
1205 | } |
1206 | } |
1207 | } |
1208 | |
1209 | FuncInfo->removeDeadFrameIndices(MFI); |
1210 | assert(allSGPRSpillsAreDead(MF) &&((void)0) |
1211 | "SGPR spill should have been removed in SILowerSGPRSpills")((void)0); |
1212 | |
1213 | // FIXME: The other checks should be redundant with allStackObjectsAreDead, |
1214 | // but currently hasNonSpillStackObjects is set only from source |
1215 | // allocas. Stack temps produced from legalization are not counted currently. |
1216 | if (!allStackObjectsAreDead(MFI)) { |
1217 | assert(RS && "RegScavenger required if spilling")((void)0); |
1218 | |
1219 | // Add an emergency spill slot |
1220 | RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI)); |
1221 | } |
1222 | } |
1223 | |
1224 | // Only report VGPRs to generic code. |
1225 | void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1226 | BitVector &SavedVGPRs, |
1227 | RegScavenger *RS) const { |
1228 | TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); |
1229 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1230 | if (MFI->isEntryFunction()) |
1231 | return; |
1232 | |
1233 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
1234 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1235 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1236 | |
1237 | // Ignore the SGPRs the default implementation found. |
1238 | SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); |
1239 | |
1240 | // Do not save AGPRs prior to GFX90A because there was no easy way to do so. |
1241 | // In gfx908 there was do AGPR loads and stores and thus spilling also |
1242 | // require a temporary VGPR. |
1243 | if (!ST.hasGFX90AInsts()) |
1244 | SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask()); |
1245 | |
1246 | // hasFP only knows about stack objects that already exist. We're now |
1247 | // determining the stack slots that will be created, so we have to predict |
1248 | // them. Stack objects force FP usage with calls. |
1249 | // |
1250 | // Note a new VGPR CSR may be introduced if one is used for the spill, but we |
1251 | // don't want to report it here. |
1252 | // |
1253 | // FIXME: Is this really hasReservedCallFrame? |
1254 | const bool WillHaveFP = |
1255 | FrameInfo.hasCalls() && |
1256 | (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo)); |
1257 | |
1258 | // VGPRs used for SGPR spilling need to be specially inserted in the prolog, |
1259 | // so don't allow the default insertion to handle them. |
1260 | for (auto SSpill : MFI->getSGPRSpillVGPRs()) |
1261 | SavedVGPRs.reset(SSpill.VGPR); |
1262 | |
1263 | LivePhysRegs LiveRegs; |
1264 | LiveRegs.init(*TRI); |
1265 | |
1266 | if (WillHaveFP || hasFP(MF)) { |
1267 | assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&((void)0) |
1268 | "Re-reserving spill slot for FP")((void)0); |
1269 | getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy, |
1270 | MFI->FramePointerSaveIndex, true); |
1271 | } |
1272 | |
1273 | if (TRI->hasBasePointer(MF)) { |
1274 | if (MFI->SGPRForFPSaveRestoreCopy) |
1275 | LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy); |
1276 | |
1277 | assert(!MFI->SGPRForBPSaveRestoreCopy &&((void)0) |
1278 | !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP")((void)0); |
1279 | getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy, |
1280 | MFI->BasePointerSaveIndex, false); |
1281 | } |
1282 | } |
1283 | |
1284 | void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, |
1285 | BitVector &SavedRegs, |
1286 | RegScavenger *RS) const { |
1287 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1288 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1289 | if (MFI->isEntryFunction()) |
1290 | return; |
1291 | |
1292 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1293 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
1294 | |
1295 | // The SP is specifically managed and we don't want extra spills of it. |
1296 | SavedRegs.reset(MFI->getStackPtrOffsetReg()); |
1297 | |
1298 | const BitVector AllSavedRegs = SavedRegs; |
1299 | SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask()); |
1300 | |
1301 | // If clearing VGPRs changed the mask, we will have some CSR VGPR spills. |
1302 | const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs; |
1303 | |
1304 | // We have to anticipate introducing CSR VGPR spills if we don't have any |
1305 | // stack objects already, since we require an FP if there is a call and stack. |
1306 | MachineFrameInfo &FrameInfo = MF.getFrameInfo(); |
1307 | const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR; |
1308 | |
1309 | // FP will be specially managed like SP. |
1310 | if (WillHaveFP || hasFP(MF)) |
1311 | SavedRegs.reset(MFI->getFrameOffsetReg()); |
1312 | } |
1313 | |
1314 | bool SIFrameLowering::assignCalleeSavedSpillSlots( |
1315 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
1316 | std::vector<CalleeSavedInfo> &CSI) const { |
1317 | if (CSI.empty()) |
1318 | return true; // Early exit if no callee saved registers are modified! |
1319 | |
1320 | const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); |
1321 | if (!FuncInfo->SGPRForFPSaveRestoreCopy && |
1322 | !FuncInfo->SGPRForBPSaveRestoreCopy) |
1323 | return false; |
1324 | |
1325 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1326 | const SIRegisterInfo *RI = ST.getRegisterInfo(); |
1327 | Register FramePtrReg = FuncInfo->getFrameOffsetReg(); |
1328 | Register BasePtrReg = RI->getBaseRegister(); |
1329 | unsigned NumModifiedRegs = 0; |
1330 | |
1331 | if (FuncInfo->SGPRForFPSaveRestoreCopy) |
1332 | NumModifiedRegs++; |
1333 | if (FuncInfo->SGPRForBPSaveRestoreCopy) |
1334 | NumModifiedRegs++; |
1335 | |
1336 | for (auto &CS : CSI) { |
1337 | if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) { |
1338 | CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy); |
1339 | if (--NumModifiedRegs) |
1340 | break; |
1341 | } else if (CS.getReg() == BasePtrReg && |
1342 | FuncInfo->SGPRForBPSaveRestoreCopy) { |
1343 | CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy); |
1344 | if (--NumModifiedRegs) |
1345 | break; |
1346 | } |
1347 | } |
1348 | |
1349 | return false; |
1350 | } |
1351 | |
1352 | MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( |
1353 | MachineFunction &MF, |
1354 | MachineBasicBlock &MBB, |
1355 | MachineBasicBlock::iterator I) const { |
1356 | int64_t Amount = I->getOperand(0).getImm(); |
1357 | if (Amount == 0) |
1358 | return MBB.erase(I); |
1359 | |
1360 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
1361 | const SIInstrInfo *TII = ST.getInstrInfo(); |
1362 | const DebugLoc &DL = I->getDebugLoc(); |
1363 | unsigned Opc = I->getOpcode(); |
1364 | bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); |
1365 | uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; |
1366 | |
1367 | if (!hasReservedCallFrame(MF)) { |
1368 | Amount = alignTo(Amount, getStackAlign()); |
1369 | assert(isUInt<32>(Amount) && "exceeded stack address space size")((void)0); |
1370 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
1371 | Register SPReg = MFI->getStackPtrOffsetReg(); |
1372 | |
1373 | Amount *= getScratchScaleFactor(ST); |
1374 | if (IsDestroy) |
1375 | Amount = -Amount; |
1376 | BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg) |
1377 | .addReg(SPReg) |
1378 | .addImm(Amount); |
1379 | } else if (CalleePopAmount != 0) { |
1380 | llvm_unreachable("is this used?")__builtin_unreachable(); |
1381 | } |
1382 | |
1383 | return MBB.erase(I); |
1384 | } |
1385 | |
1386 | /// Returns true if the frame will require a reference to the stack pointer. |
1387 | /// |
1388 | /// This is the set of conditions common to setting up the stack pointer in a |
1389 | /// kernel, and for using a frame pointer in a callable function. |
1390 | /// |
1391 | /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm |
1392 | /// references SP. |
1393 | static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) { |
1394 | return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint(); |
1395 | } |
1396 | |
1397 | // The FP for kernels is always known 0, so we never really need to setup an |
1398 | // explicit register for it. However, DisableFramePointerElim will force us to |
1399 | // use a register for it. |
1400 | bool SIFrameLowering::hasFP(const MachineFunction &MF) const { |
1401 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1402 | |
1403 | // For entry functions we can use an immediate offset in most cases, so the |
1404 | // presence of calls doesn't imply we need a distinct frame pointer. |
1405 | if (MFI.hasCalls() && |
1406 | !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { |
1407 | // All offsets are unsigned, so need to be addressed in the same direction |
1408 | // as stack growth. |
1409 | |
1410 | // FIXME: This function is pretty broken, since it can be called before the |
1411 | // frame layout is determined or CSR spills are inserted. |
1412 | return MFI.getStackSize() != 0; |
1413 | } |
1414 | |
1415 | return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || |
1416 | MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment( |
1417 | MF) || |
1418 | MF.getTarget().Options.DisableFramePointerElim(MF); |
1419 | } |
1420 | |
1421 | // This is essentially a reduced version of hasFP for entry functions. Since the |
1422 | // stack pointer is known 0 on entry to kernels, we never really need an FP |
1423 | // register. We may need to initialize the stack pointer depending on the frame |
1424 | // properties, which logically overlaps many of the cases where an ordinary |
1425 | // function would require an FP. |
1426 | bool SIFrameLowering::requiresStackPointerReference( |
1427 | const MachineFunction &MF) const { |
1428 | // Callable functions always require a stack pointer reference. |
1429 | assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&((void)0) |
1430 | "only expected to call this for entry points")((void)0); |
1431 | |
1432 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1433 | |
1434 | // Entry points ordinarily don't need to initialize SP. We have to set it up |
1435 | // for callees if there are any. Also note tail calls are impossible/don't |
1436 | // make any sense for kernels. |
1437 | if (MFI.hasCalls()) |
1438 | return true; |
1439 | |
1440 | // We still need to initialize the SP if we're doing anything weird that |
1441 | // references the SP, like variable sized stack objects. |
1442 | return frameTriviallyRequiresSP(MFI); |
1443 | } |