Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Warning:line 945, column 8
Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIFrameLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
1//===----------------------- SIFrameLowering.cpp --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8
9#include "SIFrameLowering.h"
10#include "AMDGPU.h"
11#include "GCNSubtarget.h"
12#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13#include "SIMachineFunctionInfo.h"
14#include "llvm/CodeGen/LivePhysRegs.h"
15#include "llvm/CodeGen/MachineFrameInfo.h"
16#include "llvm/CodeGen/RegisterScavenging.h"
17#include "llvm/Target/TargetMachine.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE"frame-info" "frame-info"
22
23static cl::opt<bool> EnableSpillVGPRToAGPR(
24 "amdgpu-spill-vgpr-to-agpr",
25 cl::desc("Enable spilling VGPRs to AGPRs"),
26 cl::ReallyHidden,
27 cl::init(true));
28
29// Find a scratch register that we can use in the prologue. We avoid using
30// callee-save registers since they may appear to be free when this is called
31// from canUseAsPrologue (during shrink wrapping), but then no longer be free
32// when this is called from emitPrologue.
33static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
34 LivePhysRegs &LiveRegs,
35 const TargetRegisterClass &RC,
36 bool Unused = false) {
37 // Mark callee saved registers as used so we will not choose them.
38 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39 for (unsigned i = 0; CSRegs[i]; ++i)
40 LiveRegs.addReg(CSRegs[i]);
41
42 if (Unused) {
43 // We are looking for a register that can be used throughout the entire
44 // function, so any use is unacceptable.
45 for (MCRegister Reg : RC) {
46 if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47 return Reg;
48 }
49 } else {
50 for (MCRegister Reg : RC) {
51 if (LiveRegs.available(MRI, Reg))
52 return Reg;
53 }
54 }
55
56 return MCRegister();
57}
58
59static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
60 LivePhysRegs &LiveRegs,
61 Register &TempSGPR,
62 Optional<int> &FrameIndex,
63 bool IsFP) {
64 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
65 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66
67 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
68 const SIRegisterInfo *TRI = ST.getRegisterInfo();
69
70 // We need to save and restore the current FP/BP.
71
72 // 1: If there is already a VGPR with free lanes, use it. We
73 // may already have to pay the penalty for spilling a CSR VGPR.
74 if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
76 TargetStackID::SGPRSpill);
77
78 if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79 llvm_unreachable("allocate SGPR spill should have worked")__builtin_unreachable();
80
81 FrameIndex = NewFI;
82
83 LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false)
84 dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to "do { } while (false)
85 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lanedo { } while (false)
86 << '\n')do { } while (false);
87 return;
88 }
89
90 // 2: Next, try to save the FP/BP in an unused SGPR.
91 TempSGPR = findScratchNonCalleeSaveRegister(
92 MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93
94 if (!TempSGPR) {
95 int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
96 TargetStackID::SGPRSpill);
97
98 if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99 // 3: There's no free lane to spill, and no free register to save FP/BP,
100 // so we're forced to spill another VGPR to use for the spill.
101 FrameIndex = NewFI;
102
103 LLVM_DEBUG(do { } while (false)
104 auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();do { } while (false)
105 dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "do { } while (false)
106 << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';)do { } while (false);
107 } else {
108 // Remove dead <NewFI> index
109 MF.getFrameInfo().RemoveStackObject(NewFI);
110 // 4: If all else fails, spill the FP/BP to memory.
111 FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112 LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "do { } while (false)
113 << (IsFP ? "FP" : "BP") << '\n')do { } while (false);
114 }
115 } else {
116 LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "do { } while (false)
117 << printReg(TempSGPR, TRI) << '\n')do { } while (false);
118 }
119}
120
121// We need to specially emit stack operations here because a different frame
122// register is used than in the rest of the function, as getFrameRegister would
123// use.
124static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125 const SIMachineFunctionInfo &FuncInfo,
126 LivePhysRegs &LiveRegs, MachineFunction &MF,
127 MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator I, Register SpillReg,
129 int FI) {
130 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132
133 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
135 MachineMemOperand *MMO = MF.getMachineMemOperand(
136 PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137 FrameInfo.getObjectAlign(FI));
138 LiveRegs.addReg(SpillReg);
139 TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
140 FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141 &LiveRegs);
142 LiveRegs.removeReg(SpillReg);
143}
144
145static void buildEpilogRestore(const GCNSubtarget &ST,
146 const SIRegisterInfo &TRI,
147 const SIMachineFunctionInfo &FuncInfo,
148 LivePhysRegs &LiveRegs, MachineFunction &MF,
149 MachineBasicBlock &MBB,
150 MachineBasicBlock::iterator I, Register SpillReg,
151 int FI) {
152 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154
155 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
156 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
157 MachineMemOperand *MMO = MF.getMachineMemOperand(
158 PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159 FrameInfo.getObjectAlign(FI));
160 TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
161 FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162 &LiveRegs);
163}
164
165static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
166 const DebugLoc &DL, const SIInstrInfo *TII,
167 Register TargetReg) {
168 MachineFunction *MF = MBB.getParent();
169 const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
170 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172 Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173 Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174
175 if (MFI->getGITPtrHigh() != 0xffffffff) {
176 BuildMI(MBB, I, DL, SMovB32, TargetHi)
177 .addImm(MFI->getGITPtrHigh())
178 .addReg(TargetReg, RegState::ImplicitDefine);
179 } else {
180 const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181 BuildMI(MBB, I, DL, GetPC64, TargetReg);
182 }
183 Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184 MF->getRegInfo().addLiveIn(GitPtrLo);
185 MBB.addLiveIn(GitPtrLo);
186 BuildMI(MBB, I, DL, SMovB32, TargetLo)
187 .addReg(GitPtrLo);
188}
189
190// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191void SIFrameLowering::emitEntryFunctionFlatScratchInit(
192 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
193 const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195 const SIInstrInfo *TII = ST.getInstrInfo();
196 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
197 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
198
199 // We don't need this if we only have spills since there is no user facing
200 // scratch.
201
202 // TODO: If we know we don't have flat instructions earlier, we can omit
203 // this from the input registers.
204 //
205 // TODO: We only need to know if we access scratch space through a flat
206 // pointer. Because we only detect if flat instructions are used at all,
207 // this will be used more often than necessary on VI.
208
209 Register FlatScrInitLo;
210 Register FlatScrInitHi;
211
212 if (ST.isAmdPalOS()) {
213 // Extract the scratch offset from the descriptor in the GIT
214 LivePhysRegs LiveRegs;
215 LiveRegs.init(*TRI);
216 LiveRegs.addLiveIns(MBB);
217
218 // Find unused reg to load flat scratch init into
219 MachineRegisterInfo &MRI = MF.getRegInfo();
220 Register FlatScrInit = AMDGPU::NoRegister;
221 ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223 AllSGPR64s = AllSGPR64s.slice(
224 std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226 for (MCPhysReg Reg : AllSGPR64s) {
227 if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229 FlatScrInit = Reg;
230 break;
231 }
232 }
233 assert(FlatScrInit && "Failed to find free register for scratch init")((void)0);
234
235 FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236 FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237
238 buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239
240 // We now have the GIT ptr - now get the scratch descriptor from the entry
241 // at offset 0 (or offset 16 for a compute shader).
242 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
243 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244 auto *MMO = MF.getMachineMemOperand(
245 PtrInfo,
246 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
247 MachineMemOperand::MODereferenceable,
248 8, Align(4));
249 unsigned Offset =
250 MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
251 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253 BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254 .addReg(FlatScrInit)
255 .addImm(EncodedOffset) // offset
256 .addImm(0) // cpol
257 .addMemOperand(MMO);
258
259 // Mask the offset in [47:0] of the descriptor
260 const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261 BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262 .addReg(FlatScrInitHi)
263 .addImm(0xffff);
264 } else {
265 Register FlatScratchInitReg =
266 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
267 assert(FlatScratchInitReg)((void)0);
268
269 MachineRegisterInfo &MRI = MF.getRegInfo();
270 MRI.addLiveIn(FlatScratchInitReg);
271 MBB.addLiveIn(FlatScratchInitReg);
272
273 FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
274 FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
275 }
276
277 // Do a 64-bit pointer add.
278 if (ST.flatScratchIsPointer()) {
279 if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
280 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
281 .addReg(FlatScrInitLo)
282 .addReg(ScratchWaveOffsetReg);
283 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
284 .addReg(FlatScrInitHi)
285 .addImm(0);
286 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
287 addReg(FlatScrInitLo).
288 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
289 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
290 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291 addReg(FlatScrInitHi).
292 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
293 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
294 return;
295 }
296
297 // For GFX9.
298 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
299 .addReg(FlatScrInitLo)
300 .addReg(ScratchWaveOffsetReg);
301 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
302 .addReg(FlatScrInitHi)
303 .addImm(0);
304
305 return;
306 }
307
308 assert(ST.getGeneration() < AMDGPUSubtarget::GFX9)((void)0);
309
310 // Copy the size in bytes.
311 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
312 .addReg(FlatScrInitHi, RegState::Kill);
313
314 // Add wave offset in bytes to private base offset.
315 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
316 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
317 .addReg(FlatScrInitLo)
318 .addReg(ScratchWaveOffsetReg);
319
320 // Convert offset to 256-byte units.
321 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
322 .addReg(FlatScrInitLo, RegState::Kill)
323 .addImm(8);
324}
325
326// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
327// memory. They should have been removed by now.
328static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
329 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
330 I != E; ++I) {
331 if (!MFI.isDeadObjectIndex(I))
332 return false;
333 }
334
335 return true;
336}
337
338// Shift down registers reserved for the scratch RSRC.
339Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
340 MachineFunction &MF) const {
341
342 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
343 const SIInstrInfo *TII = ST.getInstrInfo();
344 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
345 MachineRegisterInfo &MRI = MF.getRegInfo();
346 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
347
348 assert(MFI->isEntryFunction())((void)0);
349
350 Register ScratchRsrcReg = MFI->getScratchRSrcReg();
351
352 if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
353 allStackObjectsAreDead(MF.getFrameInfo())))
354 return Register();
355
356 if (ST.hasSGPRInitBug() ||
357 ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
358 return ScratchRsrcReg;
359
360 // We reserved the last registers for this. Shift it down to the end of those
361 // which were actually used.
362 //
363 // FIXME: It might be safer to use a pseudoregister before replacement.
364
365 // FIXME: We should be able to eliminate unused input registers. We only
366 // cannot do this for the resources required for scratch access. For now we
367 // skip over user SGPRs and may leave unused holes.
368
369 unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
370 ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
371 AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
372
373 // Skip the last N reserved elements because they should have already been
374 // reserved for VCC etc.
375 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
376 for (MCPhysReg Reg : AllSGPR128s) {
377 // Pick the first unallocated one. Make sure we don't clobber the other
378 // reserved input we needed. Also for PAL, make sure we don't clobber
379 // the GIT pointer passed in SGPR0 or SGPR8.
380 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
381 !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
382 MRI.replaceRegWith(ScratchRsrcReg, Reg);
383 MFI->setScratchRSrcReg(Reg);
384 return Reg;
385 }
386 }
387
388 return ScratchRsrcReg;
389}
390
391static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
392 return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
393}
394
395void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
396 MachineBasicBlock &MBB) const {
397 assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported")((void)0);
398
399 // FIXME: If we only have SGPR spills, we won't actually be using scratch
400 // memory since these spill to VGPRs. We should be cleaning up these unused
401 // SGPR spill frame indices somewhere.
402
403 // FIXME: We still have implicit uses on SGPR spill instructions in case they
404 // need to spill to vector memory. It's likely that will not happen, but at
405 // this point it appears we need the setup. This part of the prolog should be
406 // emitted after frame indices are eliminated.
407
408 // FIXME: Remove all of the isPhysRegUsed checks
409
410 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
411 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
412 const SIInstrInfo *TII = ST.getInstrInfo();
413 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
414 MachineRegisterInfo &MRI = MF.getRegInfo();
415 const Function &F = MF.getFunction();
416 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
417
418 assert(MFI->isEntryFunction())((void)0);
419
420 Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
421 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
422 // FIXME: Hack to not crash in situations which emitted an error.
423 if (!PreloadedScratchWaveOffsetReg)
424 return;
425
426 // We need to do the replacement of the private segment buffer register even
427 // if there are no stack objects. There could be stores to undef or a
428 // constant without an associated object.
429 //
430 // This will return `Register()` in cases where there are no actual
431 // uses of the SRSRC.
432 Register ScratchRsrcReg;
433 if (!ST.enableFlatScratch())
434 ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
435
436 // Make the selected register live throughout the function.
437 if (ScratchRsrcReg) {
438 for (MachineBasicBlock &OtherBB : MF) {
439 if (&OtherBB != &MBB) {
440 OtherBB.addLiveIn(ScratchRsrcReg);
441 }
442 }
443 }
444
445 // Now that we have fixed the reserved SRSRC we need to locate the
446 // (potentially) preloaded SRSRC.
447 Register PreloadedScratchRsrcReg;
448 if (ST.isAmdHsaOrMesa(F)) {
449 PreloadedScratchRsrcReg =
450 MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
451 if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
452 // We added live-ins during argument lowering, but since they were not
453 // used they were deleted. We're adding the uses now, so add them back.
454 MRI.addLiveIn(PreloadedScratchRsrcReg);
455 MBB.addLiveIn(PreloadedScratchRsrcReg);
456 }
457 }
458
459 // Debug location must be unknown since the first debug location is used to
460 // determine the end of the prologue.
461 DebugLoc DL;
462 MachineBasicBlock::iterator I = MBB.begin();
463
464 // We found the SRSRC first because it needs four registers and has an
465 // alignment requirement. If the SRSRC that we found is clobbering with
466 // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
467 // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
468 // wave offset to a free SGPR.
469 Register ScratchWaveOffsetReg;
470 if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
471 ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
472 unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
473 AllSGPRs = AllSGPRs.slice(
474 std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
475 Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
476 for (MCPhysReg Reg : AllSGPRs) {
477 if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
478 !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
479 ScratchWaveOffsetReg = Reg;
480 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
481 .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
482 break;
483 }
484 }
485 } else {
486 ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
487 }
488 assert(ScratchWaveOffsetReg)((void)0);
489
490 if (requiresStackPointerReference(MF)) {
491 Register SPReg = MFI->getStackPtrOffsetReg();
492 assert(SPReg != AMDGPU::SP_REG)((void)0);
493 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
494 .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
495 }
496
497 if (hasFP(MF)) {
498 Register FPReg = MFI->getFrameOffsetReg();
499 assert(FPReg != AMDGPU::FP_REG)((void)0);
500 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
501 }
502
503 bool NeedsFlatScratchInit =
504 MFI->hasFlatScratchInit() &&
505 (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
506 (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
507
508 if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
509 !ST.flatScratchIsArchitected()) {
510 MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
511 MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
512 }
513
514 if (NeedsFlatScratchInit) {
515 emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
516 }
517
518 if (ScratchRsrcReg) {
519 emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
520 PreloadedScratchRsrcReg,
521 ScratchRsrcReg, ScratchWaveOffsetReg);
522 }
523}
524
525// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
526void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
527 MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
528 const DebugLoc &DL, Register PreloadedScratchRsrcReg,
529 Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
530
531 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
532 const SIInstrInfo *TII = ST.getInstrInfo();
533 const SIRegisterInfo *TRI = &TII->getRegisterInfo();
534 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
535 const Function &Fn = MF.getFunction();
536
537 if (ST.isAmdPalOS()) {
538 // The pointer to the GIT is formed from the offset passed in and either
539 // the amdgpu-git-ptr-high function attribute or the top part of the PC
540 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
541 Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
542
543 buildGitPtr(MBB, I, DL, TII, Rsrc01);
544
545 // We now have the GIT ptr - now get the scratch descriptor from the entry
546 // at offset 0 (or offset 16 for a compute shader).
547 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
548 const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
549 auto MMO = MF.getMachineMemOperand(PtrInfo,
550 MachineMemOperand::MOLoad |
551 MachineMemOperand::MOInvariant |
552 MachineMemOperand::MODereferenceable,
553 16, Align(4));
554 unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
555 const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
556 unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
557 BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
558 .addReg(Rsrc01)
559 .addImm(EncodedOffset) // offset
560 .addImm(0) // cpol
561 .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
562 .addMemOperand(MMO);
563
564 // The driver will always set the SRD for wave 64 (bits 118:117 of
565 // descriptor / bits 22:21 of third sub-reg will be 0b11)
566 // If the shader is actually wave32 we have to modify the const_index_stride
567 // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
568 // reason the driver does this is that there can be cases where it presents
569 // 2 shaders with different wave size (e.g. VsFs).
570 // TODO: convert to using SCRATCH instructions or multiple SRD buffers
571 if (ST.isWave32()) {
572 const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
573 BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
574 .addImm(21)
575 .addReg(Rsrc03);
576 }
577 } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
578 assert(!ST.isAmdHsaOrMesa(Fn))((void)0);
579 const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
580
581 Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
582 Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
583
584 // Use relocations to get the pointer, and setup the other bits manually.
585 uint64_t Rsrc23 = TII->getScratchRsrcWords23();
586
587 if (MFI->hasImplicitBufferPtr()) {
588 Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
589
590 if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
591 const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
592
593 BuildMI(MBB, I, DL, Mov64, Rsrc01)
594 .addReg(MFI->getImplicitBufferPtrUserSGPR())
595 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
596 } else {
597 const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
598
599 MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
600 auto MMO = MF.getMachineMemOperand(
601 PtrInfo,
602 MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
603 MachineMemOperand::MODereferenceable,
604 8, Align(4));
605 BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
606 .addReg(MFI->getImplicitBufferPtrUserSGPR())
607 .addImm(0) // offset
608 .addImm(0) // cpol
609 .addMemOperand(MMO)
610 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
611
612 MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
613 MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
614 }
615 } else {
616 Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
617 Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
618
619 BuildMI(MBB, I, DL, SMovB32, Rsrc0)
620 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
621 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
622
623 BuildMI(MBB, I, DL, SMovB32, Rsrc1)
624 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
625 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
626
627 }
628
629 BuildMI(MBB, I, DL, SMovB32, Rsrc2)
630 .addImm(Rsrc23 & 0xffffffff)
631 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632
633 BuildMI(MBB, I, DL, SMovB32, Rsrc3)
634 .addImm(Rsrc23 >> 32)
635 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
636 } else if (ST.isAmdHsaOrMesa(Fn)) {
637 assert(PreloadedScratchRsrcReg)((void)0);
638
639 if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
640 BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
641 .addReg(PreloadedScratchRsrcReg, RegState::Kill);
642 }
643 }
644
645 // Add the scratch wave offset into the scratch RSRC.
646 //
647 // We only want to update the first 48 bits, which is the base address
648 // pointer, without touching the adjacent 16 bits of flags. We know this add
649 // cannot carry-out from bit 47, otherwise the scratch allocation would be
650 // impossible to fit in the 48-bit global address space.
651 //
652 // TODO: Evaluate if it is better to just construct an SRD using the flat
653 // scratch init and some constants rather than update the one we are passed.
654 Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
655 Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
656
657 // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
658 // the kernel body via inreg arguments.
659 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
660 .addReg(ScratchRsrcSub0)
661 .addReg(ScratchWaveOffsetReg)
662 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
664 .addReg(ScratchRsrcSub1)
665 .addImm(0)
666 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667}
668
669bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
670 switch (ID) {
671 case TargetStackID::Default:
672 case TargetStackID::NoAlloc:
673 case TargetStackID::SGPRSpill:
674 return true;
675 case TargetStackID::ScalableVector:
676 case TargetStackID::WasmLocal:
677 return false;
678 }
679 llvm_unreachable("Invalid TargetStackID::Value")__builtin_unreachable();
680}
681
682static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
683 const SIMachineFunctionInfo *FuncInfo,
684 MachineFunction &MF, MachineBasicBlock &MBB,
685 MachineBasicBlock::iterator MBBI, bool IsProlog) {
686 if (LiveRegs.empty()) {
687 LiveRegs.init(TRI);
688 if (IsProlog) {
689 LiveRegs.addLiveIns(MBB);
690 } else {
691 // In epilog.
692 LiveRegs.addLiveOuts(MBB);
693 LiveRegs.stepBackward(*MBBI);
694 }
695 }
696}
697
698// Activate all lanes, returns saved exec.
699static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
700 MachineFunction &MF,
701 MachineBasicBlock &MBB,
702 MachineBasicBlock::iterator MBBI,
703 bool IsProlog) {
704 Register ScratchExecCopy;
705 MachineRegisterInfo &MRI = MF.getRegInfo();
706 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
707 const SIInstrInfo *TII = ST.getInstrInfo();
708 const SIRegisterInfo &TRI = TII->getRegisterInfo();
709 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
710 DebugLoc DL;
711
712 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
713
714 ScratchExecCopy = findScratchNonCalleeSaveRegister(
715 MRI, LiveRegs, *TRI.getWaveMaskRegClass());
716 if (!ScratchExecCopy)
717 report_fatal_error("failed to find free scratch register");
718
719 LiveRegs.addReg(ScratchExecCopy);
720
721 const unsigned OrSaveExec =
722 ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
723 BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
724
725 return ScratchExecCopy;
726}
727
728// A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
729// Otherwise we are spilling to memory.
730static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
731 const MachineFrameInfo &MFI = MF.getFrameInfo();
732 return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
733}
734
735void SIFrameLowering::emitPrologue(MachineFunction &MF,
736 MachineBasicBlock &MBB) const {
737 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
738 if (FuncInfo->isEntryFunction()) {
739 emitEntryFunctionPrologue(MF, MBB);
740 return;
741 }
742
743 const MachineFrameInfo &MFI = MF.getFrameInfo();
744 MachineRegisterInfo &MRI = MF.getRegInfo();
745 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
746 const SIInstrInfo *TII = ST.getInstrInfo();
747 const SIRegisterInfo &TRI = TII->getRegisterInfo();
748
749 Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
750 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
751 Register BasePtrReg =
752 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
753 LivePhysRegs LiveRegs;
754
755 MachineBasicBlock::iterator MBBI = MBB.begin();
756 DebugLoc DL;
757
758 bool HasFP = false;
759 bool HasBP = false;
760 uint32_t NumBytes = MFI.getStackSize();
761 uint32_t RoundedSize = NumBytes;
762 // To avoid clobbering VGPRs in lanes that weren't active on function entry,
763 // turn on all lanes before doing the spill to memory.
764 Register ScratchExecCopy;
765
766 Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
767 Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
768
769 // VGPRs used for SGPR->VGPR spills
770 for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
771 FuncInfo->getSGPRSpillVGPRs()) {
772 if (!Reg.FI)
773 continue;
774
775 if (!ScratchExecCopy)
776 ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
777 /*IsProlog*/ true);
778
779 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
780 *Reg.FI);
781 }
782
783 // VGPRs used for Whole Wave Mode
784 for (const auto &Reg : FuncInfo->WWMReservedRegs) {
785 auto VGPR = Reg.first;
786 auto FI = Reg.second;
787 if (!FI)
788 continue;
789
790 if (!ScratchExecCopy)
791 ScratchExecCopy =
792 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
793
794 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
795 }
796
797 if (ScratchExecCopy) {
798 // FIXME: Split block and make terminator.
799 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
800 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
801 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
802 .addReg(ScratchExecCopy, RegState::Kill);
803 LiveRegs.addReg(ScratchExecCopy);
804 }
805
806 if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
807 const int FramePtrFI = *FPSaveIndex;
808 assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0);
809
810 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
811
812 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
813 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
814 if (!TmpVGPR)
815 report_fatal_error("failed to find free scratch register");
816
817 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
818 .addReg(FramePtrReg);
819
820 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
821 FramePtrFI);
822 }
823
824 if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
825 const int BasePtrFI = *BPSaveIndex;
826 assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0);
827
828 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
829
830 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
831 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
832 if (!TmpVGPR)
833 report_fatal_error("failed to find free scratch register");
834
835 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
836 .addReg(BasePtrReg);
837
838 buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
839 BasePtrFI);
840 }
841
842 // In this case, spill the FP to a reserved VGPR.
843 if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
844 const int FramePtrFI = *FPSaveIndex;
845 assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0);
846
847 assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0);
848 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
849 FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
850 assert(Spill.size() == 1)((void)0);
851
852 // Save FP before setting it up.
853 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
854 .addReg(FramePtrReg)
855 .addImm(Spill[0].Lane)
856 .addReg(Spill[0].VGPR, RegState::Undef);
857 }
858
859 // In this case, spill the BP to a reserved VGPR.
860 if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
861 const int BasePtrFI = *BPSaveIndex;
862 assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0);
863
864 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0);
865 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
866 FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
867 assert(Spill.size() == 1)((void)0);
868
869 // Save BP before setting it up.
870 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
871 .addReg(BasePtrReg)
872 .addImm(Spill[0].Lane)
873 .addReg(Spill[0].VGPR, RegState::Undef);
874 }
875
876 // Emit the copy if we need an FP, and are using a free SGPR to save it.
877 if (FuncInfo->SGPRForFPSaveRestoreCopy) {
878 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
879 FuncInfo->SGPRForFPSaveRestoreCopy)
880 .addReg(FramePtrReg)
881 .setMIFlag(MachineInstr::FrameSetup);
882 }
883
884 // Emit the copy if we need a BP, and are using a free SGPR to save it.
885 if (FuncInfo->SGPRForBPSaveRestoreCopy) {
886 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
887 FuncInfo->SGPRForBPSaveRestoreCopy)
888 .addReg(BasePtrReg)
889 .setMIFlag(MachineInstr::FrameSetup);
890 }
891
892 // If a copy has been emitted for FP and/or BP, Make the SGPRs
893 // used in the copy instructions live throughout the function.
894 SmallVector<MCPhysReg, 2> TempSGPRs;
895 if (FuncInfo->SGPRForFPSaveRestoreCopy)
896 TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
897
898 if (FuncInfo->SGPRForBPSaveRestoreCopy)
899 TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
900
901 if (!TempSGPRs.empty()) {
902 for (MachineBasicBlock &MBB : MF) {
903 for (MCPhysReg Reg : TempSGPRs)
904 MBB.addLiveIn(Reg);
905
906 MBB.sortUniqueLiveIns();
907 }
908 if (!LiveRegs.empty()) {
909 LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
910 LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
911 }
912 }
913
914 if (TRI.hasStackRealignment(MF)) {
915 HasFP = true;
916 const unsigned Alignment = MFI.getMaxAlign().value();
917
918 RoundedSize += Alignment;
919 if (LiveRegs.empty()) {
920 LiveRegs.init(TRI);
921 LiveRegs.addLiveIns(MBB);
922 }
923
924 // s_add_i32 s33, s32, NumBytes
925 // s_and_b32 s33, s33, 0b111...0000
926 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
927 .addReg(StackPtrReg)
928 .addImm((Alignment - 1) * getScratchScaleFactor(ST))
929 .setMIFlag(MachineInstr::FrameSetup);
930 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
931 .addReg(FramePtrReg, RegState::Kill)
932 .addImm(-Alignment * getScratchScaleFactor(ST))
933 .setMIFlag(MachineInstr::FrameSetup);
934 FuncInfo->setIsStackRealigned(true);
935 } else if ((HasFP = hasFP(MF))) {
936 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
937 .addReg(StackPtrReg)
938 .setMIFlag(MachineInstr::FrameSetup);
939 }
940
941 // If we need a base pointer, set it up here. It's whatever the value of
942 // the stack pointer is at this point. Any variable size objects will be
943 // allocated after this, so we can still use the base pointer to reference
944 // the incoming arguments.
945 if ((HasBP = TRI.hasBasePointer(MF))) {
Although the value stored to 'HasBP' is used in the enclosing expression, the value is never actually read from 'HasBP'
946 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
947 .addReg(StackPtrReg)
948 .setMIFlag(MachineInstr::FrameSetup);
949 }
950
951 if (HasFP && RoundedSize != 0) {
952 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
953 .addReg(StackPtrReg)
954 .addImm(RoundedSize * getScratchScaleFactor(ST))
955 .setMIFlag(MachineInstr::FrameSetup);
956 }
957
958 assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||((void)0)
959 FuncInfo->FramePointerSaveIndex)) &&((void)0)
960 "Needed to save FP but didn't save it anywhere")((void)0);
961
962 assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&((void)0)
963 !FuncInfo->FramePointerSaveIndex)) &&((void)0)
964 "Saved FP but didn't need it")((void)0);
965
966 assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||((void)0)
967 FuncInfo->BasePointerSaveIndex)) &&((void)0)
968 "Needed to save BP but didn't save it anywhere")((void)0);
969
970 assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&((void)0)
971 !FuncInfo->BasePointerSaveIndex)) &&((void)0)
972 "Saved BP but didn't need it")((void)0);
973}
974
975void SIFrameLowering::emitEpilogue(MachineFunction &MF,
976 MachineBasicBlock &MBB) const {
977 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
978 if (FuncInfo->isEntryFunction())
979 return;
980
981 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
982 const SIInstrInfo *TII = ST.getInstrInfo();
983 MachineRegisterInfo &MRI = MF.getRegInfo();
984 const SIRegisterInfo &TRI = TII->getRegisterInfo();
985 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
986 LivePhysRegs LiveRegs;
987 DebugLoc DL;
988
989 const MachineFrameInfo &MFI = MF.getFrameInfo();
990 uint32_t NumBytes = MFI.getStackSize();
991 uint32_t RoundedSize = FuncInfo->isStackRealigned()
992 ? NumBytes + MFI.getMaxAlign().value()
993 : NumBytes;
994 const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
995 const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
996 const Register BasePtrReg =
997 TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
998
999 Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1000 Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1001
1002 if (RoundedSize != 0 && hasFP(MF)) {
1003 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1004 .addReg(StackPtrReg)
1005 .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1006 .setMIFlag(MachineInstr::FrameDestroy);
1007 }
1008
1009 if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1010 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1011 .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1012 .setMIFlag(MachineInstr::FrameDestroy);
1013 }
1014
1015 if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1016 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1017 .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1018 .setMIFlag(MachineInstr::FrameDestroy);
1019 }
1020
1021 if (FPSaveIndex) {
1022 const int FramePtrFI = *FPSaveIndex;
1023 assert(!MFI.isDeadObjectIndex(FramePtrFI))((void)0);
1024 if (spilledToMemory(MF, FramePtrFI)) {
1025 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1026
1027 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1028 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1029 if (!TmpVGPR)
1030 report_fatal_error("failed to find free scratch register");
1031 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1032 FramePtrFI);
1033 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1034 .addReg(TmpVGPR, RegState::Kill);
1035 } else {
1036 // Reload from VGPR spill.
1037 assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill)((void)0);
1038 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1039 FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1040 assert(Spill.size() == 1)((void)0);
1041 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1042 .addReg(Spill[0].VGPR)
1043 .addImm(Spill[0].Lane);
1044 }
1045 }
1046
1047 if (BPSaveIndex) {
1048 const int BasePtrFI = *BPSaveIndex;
1049 assert(!MFI.isDeadObjectIndex(BasePtrFI))((void)0);
1050 if (spilledToMemory(MF, BasePtrFI)) {
1051 initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1052
1053 MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1054 MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1055 if (!TmpVGPR)
1056 report_fatal_error("failed to find free scratch register");
1057 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1058 BasePtrFI);
1059 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1060 .addReg(TmpVGPR, RegState::Kill);
1061 } else {
1062 // Reload from VGPR spill.
1063 assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill)((void)0);
1064 ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1065 FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1066 assert(Spill.size() == 1)((void)0);
1067 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1068 .addReg(Spill[0].VGPR)
1069 .addImm(Spill[0].Lane);
1070 }
1071 }
1072
1073 Register ScratchExecCopy;
1074 for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1075 FuncInfo->getSGPRSpillVGPRs()) {
1076 if (!Reg.FI)
1077 continue;
1078
1079 if (!ScratchExecCopy)
1080 ScratchExecCopy =
1081 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1082
1083 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
1084 *Reg.FI);
1085 }
1086
1087 for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1088 auto VGPR = Reg.first;
1089 auto FI = Reg.second;
1090 if (!FI)
1091 continue;
1092
1093 if (!ScratchExecCopy)
1094 ScratchExecCopy =
1095 buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1096
1097 buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
1098 }
1099
1100 if (ScratchExecCopy) {
1101 // FIXME: Split block and make terminator.
1102 unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1103 MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1104 BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1105 .addReg(ScratchExecCopy, RegState::Kill);
1106 }
1107}
1108
1109#ifndef NDEBUG1
1110static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1111 const MachineFrameInfo &MFI = MF.getFrameInfo();
1112 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1113 for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1114 I != E; ++I) {
1115 if (!MFI.isDeadObjectIndex(I) &&
1116 MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1117 (I != FuncInfo->FramePointerSaveIndex &&
1118 I != FuncInfo->BasePointerSaveIndex)) {
1119 return false;
1120 }
1121 }
1122
1123 return true;
1124}
1125#endif
1126
1127StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1128 int FI,
1129 Register &FrameReg) const {
1130 const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1131
1132 FrameReg = RI->getFrameRegister(MF);
1133 return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1134}
1135
1136void SIFrameLowering::processFunctionBeforeFrameFinalized(
1137 MachineFunction &MF,
1138 RegScavenger *RS) const {
1139 MachineFrameInfo &MFI = MF.getFrameInfo();
1140
1141 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1142 const SIInstrInfo *TII = ST.getInstrInfo();
1143 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1144 MachineRegisterInfo &MRI = MF.getRegInfo();
1145 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1146
1147 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1148 && EnableSpillVGPRToAGPR;
1149
1150 if (SpillVGPRToAGPR) {
1151 // To track the spill frame indices handled in this pass.
1152 BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1153
1154 bool SeenDbgInstr = false;
1155
1156 for (MachineBasicBlock &MBB : MF) {
1157 MachineBasicBlock::iterator Next;
1158 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
1159 MachineInstr &MI = *I;
1160 Next = std::next(I);
1161
1162 if (MI.isDebugInstr())
1163 SeenDbgInstr = true;
1164
1165 if (TII->isVGPRSpill(MI)) {
1166 // Try to eliminate stack used by VGPR spills before frame
1167 // finalization.
1168 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1169 AMDGPU::OpName::vaddr);
1170 int FI = MI.getOperand(FIOp).getIndex();
1171 Register VReg =
1172 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1173 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1174 TRI->isAGPR(MRI, VReg))) {
1175 // FIXME: change to enterBasicBlockEnd()
1176 RS->enterBasicBlock(MBB);
1177 TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1178 SpillFIs.set(FI);
1179 continue;
1180 }
1181 }
1182 }
1183 }
1184
1185 for (MachineBasicBlock &MBB : MF) {
1186 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1187 MBB.addLiveIn(Reg);
1188
1189 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1190 MBB.addLiveIn(Reg);
1191
1192 MBB.sortUniqueLiveIns();
1193
1194 if (!SpillFIs.empty() && SeenDbgInstr) {
1195 // FIXME: The dead frame indices are replaced with a null register from
1196 // the debug value instructions. We should instead, update it with the
1197 // correct register value. But not sure the register value alone is
1198 for (MachineInstr &MI : MBB) {
1199 if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1200 SpillFIs[MI.getOperand(0).getIndex()]) {
1201 MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1202 MI.getOperand(0).setIsDebug();
1203 }
1204 }
1205 }
1206 }
1207 }
1208
1209 FuncInfo->removeDeadFrameIndices(MFI);
1210 assert(allSGPRSpillsAreDead(MF) &&((void)0)
1211 "SGPR spill should have been removed in SILowerSGPRSpills")((void)0);
1212
1213 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1214 // but currently hasNonSpillStackObjects is set only from source
1215 // allocas. Stack temps produced from legalization are not counted currently.
1216 if (!allStackObjectsAreDead(MFI)) {
1217 assert(RS && "RegScavenger required if spilling")((void)0);
1218
1219 // Add an emergency spill slot
1220 RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1221 }
1222}
1223
1224// Only report VGPRs to generic code.
1225void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1226 BitVector &SavedVGPRs,
1227 RegScavenger *RS) const {
1228 TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1229 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1230 if (MFI->isEntryFunction())
1231 return;
1232
1233 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1234 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1235 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1236
1237 // Ignore the SGPRs the default implementation found.
1238 SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1239
1240 // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1241 // In gfx908 there was do AGPR loads and stores and thus spilling also
1242 // require a temporary VGPR.
1243 if (!ST.hasGFX90AInsts())
1244 SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1245
1246 // hasFP only knows about stack objects that already exist. We're now
1247 // determining the stack slots that will be created, so we have to predict
1248 // them. Stack objects force FP usage with calls.
1249 //
1250 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1251 // don't want to report it here.
1252 //
1253 // FIXME: Is this really hasReservedCallFrame?
1254 const bool WillHaveFP =
1255 FrameInfo.hasCalls() &&
1256 (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1257
1258 // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1259 // so don't allow the default insertion to handle them.
1260 for (auto SSpill : MFI->getSGPRSpillVGPRs())
1261 SavedVGPRs.reset(SSpill.VGPR);
1262
1263 LivePhysRegs LiveRegs;
1264 LiveRegs.init(*TRI);
1265
1266 if (WillHaveFP || hasFP(MF)) {
1267 assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&((void)0)
1268 "Re-reserving spill slot for FP")((void)0);
1269 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1270 MFI->FramePointerSaveIndex, true);
1271 }
1272
1273 if (TRI->hasBasePointer(MF)) {
1274 if (MFI->SGPRForFPSaveRestoreCopy)
1275 LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1276
1277 assert(!MFI->SGPRForBPSaveRestoreCopy &&((void)0)
1278 !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP")((void)0);
1279 getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1280 MFI->BasePointerSaveIndex, false);
1281 }
1282}
1283
1284void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1285 BitVector &SavedRegs,
1286 RegScavenger *RS) const {
1287 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1288 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1289 if (MFI->isEntryFunction())
1290 return;
1291
1292 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1293 const SIRegisterInfo *TRI = ST.getRegisterInfo();
1294
1295 // The SP is specifically managed and we don't want extra spills of it.
1296 SavedRegs.reset(MFI->getStackPtrOffsetReg());
1297
1298 const BitVector AllSavedRegs = SavedRegs;
1299 SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1300
1301 // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1302 const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1303
1304 // We have to anticipate introducing CSR VGPR spills if we don't have any
1305 // stack objects already, since we require an FP if there is a call and stack.
1306 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1307 const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
1308
1309 // FP will be specially managed like SP.
1310 if (WillHaveFP || hasFP(MF))
1311 SavedRegs.reset(MFI->getFrameOffsetReg());
1312}
1313
1314bool SIFrameLowering::assignCalleeSavedSpillSlots(
1315 MachineFunction &MF, const TargetRegisterInfo *TRI,
1316 std::vector<CalleeSavedInfo> &CSI) const {
1317 if (CSI.empty())
1318 return true; // Early exit if no callee saved registers are modified!
1319
1320 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1321 if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1322 !FuncInfo->SGPRForBPSaveRestoreCopy)
1323 return false;
1324
1325 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1326 const SIRegisterInfo *RI = ST.getRegisterInfo();
1327 Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1328 Register BasePtrReg = RI->getBaseRegister();
1329 unsigned NumModifiedRegs = 0;
1330
1331 if (FuncInfo->SGPRForFPSaveRestoreCopy)
1332 NumModifiedRegs++;
1333 if (FuncInfo->SGPRForBPSaveRestoreCopy)
1334 NumModifiedRegs++;
1335
1336 for (auto &CS : CSI) {
1337 if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1338 CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1339 if (--NumModifiedRegs)
1340 break;
1341 } else if (CS.getReg() == BasePtrReg &&
1342 FuncInfo->SGPRForBPSaveRestoreCopy) {
1343 CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1344 if (--NumModifiedRegs)
1345 break;
1346 }
1347 }
1348
1349 return false;
1350}
1351
1352MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1353 MachineFunction &MF,
1354 MachineBasicBlock &MBB,
1355 MachineBasicBlock::iterator I) const {
1356 int64_t Amount = I->getOperand(0).getImm();
1357 if (Amount == 0)
1358 return MBB.erase(I);
1359
1360 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1361 const SIInstrInfo *TII = ST.getInstrInfo();
1362 const DebugLoc &DL = I->getDebugLoc();
1363 unsigned Opc = I->getOpcode();
1364 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1365 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1366
1367 if (!hasReservedCallFrame(MF)) {
1368 Amount = alignTo(Amount, getStackAlign());
1369 assert(isUInt<32>(Amount) && "exceeded stack address space size")((void)0);
1370 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1371 Register SPReg = MFI->getStackPtrOffsetReg();
1372
1373 Amount *= getScratchScaleFactor(ST);
1374 if (IsDestroy)
1375 Amount = -Amount;
1376 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1377 .addReg(SPReg)
1378 .addImm(Amount);
1379 } else if (CalleePopAmount != 0) {
1380 llvm_unreachable("is this used?")__builtin_unreachable();
1381 }
1382
1383 return MBB.erase(I);
1384}
1385
1386/// Returns true if the frame will require a reference to the stack pointer.
1387///
1388/// This is the set of conditions common to setting up the stack pointer in a
1389/// kernel, and for using a frame pointer in a callable function.
1390///
1391/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1392/// references SP.
1393static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1394 return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1395}
1396
1397// The FP for kernels is always known 0, so we never really need to setup an
1398// explicit register for it. However, DisableFramePointerElim will force us to
1399// use a register for it.
1400bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1401 const MachineFrameInfo &MFI = MF.getFrameInfo();
1402
1403 // For entry functions we can use an immediate offset in most cases, so the
1404 // presence of calls doesn't imply we need a distinct frame pointer.
1405 if (MFI.hasCalls() &&
1406 !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1407 // All offsets are unsigned, so need to be addressed in the same direction
1408 // as stack growth.
1409
1410 // FIXME: This function is pretty broken, since it can be called before the
1411 // frame layout is determined or CSR spills are inserted.
1412 return MFI.getStackSize() != 0;
1413 }
1414
1415 return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1416 MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1417 MF) ||
1418 MF.getTarget().Options.DisableFramePointerElim(MF);
1419}
1420
1421// This is essentially a reduced version of hasFP for entry functions. Since the
1422// stack pointer is known 0 on entry to kernels, we never really need an FP
1423// register. We may need to initialize the stack pointer depending on the frame
1424// properties, which logically overlaps many of the cases where an ordinary
1425// function would require an FP.
1426bool SIFrameLowering::requiresStackPointerReference(
1427 const MachineFunction &MF) const {
1428 // Callable functions always require a stack pointer reference.
1429 assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&((void)0)
1430 "only expected to call this for entry points")((void)0);
1431
1432 const MachineFrameInfo &MFI = MF.getFrameInfo();
1433
1434 // Entry points ordinarily don't need to initialize SP. We have to set it up
1435 // for callees if there are any. Also note tail calls are impossible/don't
1436 // make any sense for kernels.
1437 if (MFI.hasCalls())
1438 return true;
1439
1440 // We still need to initialize the SP if we're doing anything weird that
1441 // references the SP, like variable sized stack objects.
1442 return frameTriviallyRequiresSP(MFI);
1443}