Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Warning:line 141, column 27
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIRegisterInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIRegisterInfo.h"
15#include "AMDGPU.h"
16#include "AMDGPURegisterBankInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUInstPrinter.h"
19#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/CodeGen/LiveIntervals.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/RegisterScavenging.h"
24
25using namespace llvm;
26
27#define GET_REGINFO_TARGET_DESC
28#include "AMDGPUGenRegisterInfo.inc"
29
30static cl::opt<bool> EnableSpillSGPRToVGPR(
31 "amdgpu-spill-sgpr-to-vgpr",
32 cl::desc("Enable spilling VGPRs to SGPRs"),
33 cl::ReallyHidden,
34 cl::init(true));
35
36std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
37std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
38
39// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
40// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
41// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
42// meaning index 7 in SubRegFromChannelTable.
43static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
44 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
45
46namespace llvm {
47
48// A temporary struct to spill SGPRs.
49// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
50// just v_writelane and v_readlane.
51//
52// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
53// is saved to scratch (or the other way around for loads).
54// For this, a VGPR is required where the needed lanes can be clobbered. The
55// RegScavenger can provide a VGPR where currently active lanes can be
56// clobbered, but we still need to save inactive lanes.
57// The high-level steps are:
58// - Try to scavenge SGPR(s) to save exec
59// - Try to scavenge VGPR
60// - Save needed, all or inactive lanes of a TmpVGPR
61// - Spill/Restore SGPRs using TmpVGPR
62// - Restore TmpVGPR
63//
64// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
65// cannot scavenge temporary SGPRs to save exec, we use the following code:
66// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
67// s_not exec, exec
68// buffer_store_dword TmpVGPR ; save inactive lanes
69// s_not exec, exec
70struct SGPRSpillBuilder {
71 struct PerVGPRData {
72 unsigned PerVGPR;
73 unsigned NumVGPRs;
74 int64_t VGPRLanes;
75 };
76
77 // The SGPR to save
78 Register SuperReg;
79 MachineBasicBlock::iterator MI;
80 ArrayRef<int16_t> SplitParts;
81 unsigned NumSubRegs;
82 bool IsKill;
83 const DebugLoc &DL;
84
85 /* When spilling to stack */
86 // The SGPRs are written into this VGPR, which is then written to scratch
87 // (or vice versa for loads).
88 Register TmpVGPR = AMDGPU::NoRegister;
89 // Temporary spill slot to save TmpVGPR to.
90 int TmpVGPRIndex = 0;
91 // If TmpVGPR is live before the spill or if it is scavenged.
92 bool TmpVGPRLive = false;
93 // Scavenged SGPR to save EXEC.
94 Register SavedExecReg = AMDGPU::NoRegister;
95 // Stack index to write the SGPRs to.
96 int Index;
97 unsigned EltSize = 4;
98
99 RegScavenger *RS;
100 MachineBasicBlock &MBB;
101 MachineFunction &MF;
102 SIMachineFunctionInfo &MFI;
103 const SIInstrInfo &TII;
104 const SIRegisterInfo &TRI;
105 bool IsWave32;
106 Register ExecReg;
107 unsigned MovOpc;
108 unsigned NotOpc;
109
110 SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
111 bool IsWave32, MachineBasicBlock::iterator MI, int Index,
112 RegScavenger *RS)
113 : SuperReg(MI->getOperand(0).getReg()), MI(MI),
114 IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
115 RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
116 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
117 IsWave32(IsWave32) {
118 const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
119 SplitParts = TRI.getRegSplitParts(RC, EltSize);
120 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
121
122 if (IsWave32) {
123 ExecReg = AMDGPU::EXEC_LO;
124 MovOpc = AMDGPU::S_MOV_B32;
125 NotOpc = AMDGPU::S_NOT_B32;
126 } else {
127 ExecReg = AMDGPU::EXEC;
128 MovOpc = AMDGPU::S_MOV_B64;
129 NotOpc = AMDGPU::S_NOT_B64;
130 }
131
132 assert(SuperReg != AMDGPU::M0 && "m0 should never spill")((void)0);
133 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&((void)0)
134 SuperReg != AMDGPU::EXEC && "exec should never spill")((void)0);
135 }
136
137 PerVGPRData getPerVGPRData() {
138 PerVGPRData Data;
139 Data.PerVGPR = IsWave32
9.1
Field 'IsWave32' is false
9.1
Field 'IsWave32' is false
? 32 : 64
;
10
'?' condition is false
11
The value 64 is assigned to 'Data.PerVGPR'
140 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
141 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
12
Passing value via 1st parameter '__a'
13
Calling 'min<unsigned int>'
20
Returning from 'min<unsigned int>'
21
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'long long'
142 return Data;
143 }
144
145 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
146 // free.
147 // Writes these instructions if an SGPR can be scavenged:
148 // s_mov_b64 s[6:7], exec ; Save exec
149 // s_mov_b64 exec, 3 ; Wanted lanemask
150 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
151 //
152 // Writes these instructions if no SGPR can be scavenged:
153 // buffer_store_dword v0 ; Only if no free VGPR was found
154 // s_not_b64 exec, exec
155 // buffer_store_dword v0 ; Save inactive lanes
156 // ; exec stays inverted, it is flipped back in
157 // ; restore.
158 void prepare() {
159 // Scavenged temporary VGPR to use. It must be scavenged once for any number
160 // of spilled subregs.
161 // FIXME: The liveness analysis is limited and does not tell if a register
162 // is in use in lanes that are currently inactive. We can never be sure if
163 // a register as actually in use in another lane, so we need to save all
164 // used lanes of the chosen VGPR.
165 assert(RS && "Cannot spill SGPR to memory without RegScavenger")((void)0);
166 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
167
168 // Reserve temporary stack slot
169 TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
170 if (TmpVGPR) {
6
Assuming the condition is true
7
Taking true branch
171 // Found a register that is dead in the currently active lanes, we only
172 // need to spill inactive lanes.
173 TmpVGPRLive = false;
174 } else {
175 // Pick v0 because it doesn't make a difference.
176 TmpVGPR = AMDGPU::VGPR0;
177 TmpVGPRLive = true;
178 }
179
180 // Try to scavenge SGPRs to save exec
181 assert(!SavedExecReg && "Exec is already saved, refuse to save again")((void)0);
182 const TargetRegisterClass &RC =
183 IsWave32
7.1
Field 'IsWave32' is false
7.1
Field 'IsWave32' is false
? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
8
'?' condition is false
184 RS->setRegUsed(SuperReg);
185 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
186
187 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
9
Calling 'SGPRSpillBuilder::getPerVGPRData'
188
189 if (SavedExecReg) {
190 RS->setRegUsed(SavedExecReg);
191 // Set exec to needed lanes
192 BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
193 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
194 if (!TmpVGPRLive)
195 I.addReg(TmpVGPR, RegState::ImplicitDefine);
196 // Spill needed lanes
197 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
198 } else {
199 // Spill active lanes
200 if (TmpVGPRLive)
201 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
202 /*IsKill*/ false);
203 // Spill inactive lanes
204 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
205 if (!TmpVGPRLive)
206 I.addReg(TmpVGPR, RegState::ImplicitDefine);
207 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
208 }
209 }
210
211 // Writes these instructions if an SGPR can be scavenged:
212 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
213 // s_waitcnt vmcnt(0) ; If a free VGPR was found
214 // s_mov_b64 exec, s[6:7] ; Save exec
215 //
216 // Writes these instructions if no SGPR can be scavenged:
217 // buffer_load_dword v0 ; Restore inactive lanes
218 // s_waitcnt vmcnt(0) ; If a free VGPR was found
219 // s_not_b64 exec, exec
220 // buffer_load_dword v0 ; Only if no free VGPR was found
221 void restore() {
222 if (SavedExecReg) {
223 // Restore used lanes
224 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
225 /*IsKill*/ false);
226 // Restore exec
227 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
228 .addReg(SavedExecReg, RegState::Kill);
229 // Add an implicit use of the load so it is not dead.
230 // FIXME This inserts an unnecessary waitcnt
231 if (!TmpVGPRLive) {
232 I.addReg(TmpVGPR, RegState::ImplicitKill);
233 }
234 } else {
235 // Restore inactive lanes
236 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
237 /*IsKill*/ false);
238 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
239 if (!TmpVGPRLive) {
240 I.addReg(TmpVGPR, RegState::ImplicitKill);
241 }
242 // Restore active lanes
243 if (TmpVGPRLive)
244 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
245 }
246 }
247
248 // Write TmpVGPR to memory or read TmpVGPR from memory.
249 // Either using a single buffer_load/store if exec is set to the needed mask
250 // or using
251 // buffer_load
252 // s_not exec, exec
253 // buffer_load
254 // s_not exec, exec
255 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
256 if (SavedExecReg) {
257 // Spill needed lanes
258 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
259 } else {
260 // Spill active lanes
261 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
262 /*IsKill*/ false);
263 // Spill inactive lanes
264 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
266 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
267 }
268 }
269};
270
271} // namespace llvm
272
273SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
274 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
275 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
276
277 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&((void)0)
278 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&((void)0)
279 (getSubRegIndexLaneMask(AMDGPU::lo16) |((void)0)
280 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==((void)0)
281 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&((void)0)
282 "getNumCoveredRegs() will not work with generated subreg masks!")((void)0);
283
284 RegPressureIgnoredUnits.resize(getNumRegUnits());
285 RegPressureIgnoredUnits.set(
286 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
287 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
288 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
289
290 // HACK: Until this is fully tablegen'd.
291 static llvm::once_flag InitializeRegSplitPartsFlag;
292
293 static auto InitializeRegSplitPartsOnce = [this]() {
294 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
295 unsigned Size = getSubRegIdxSize(Idx);
296 if (Size & 31)
297 continue;
298 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
299 unsigned Pos = getSubRegIdxOffset(Idx);
300 if (Pos % Size)
301 continue;
302 Pos /= Size;
303 if (Vec.empty()) {
304 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
305 Vec.resize(MaxNumParts);
306 }
307 Vec[Pos] = Idx;
308 }
309 };
310
311 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
312
313 static auto InitializeSubRegFromChannelTableOnce = [this]() {
314 for (auto &Row : SubRegFromChannelTable)
315 Row.fill(AMDGPU::NoSubRegister);
316 for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
317 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
318 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
319 assert(Width < SubRegFromChannelTableWidthMap.size())((void)0);
320 Width = SubRegFromChannelTableWidthMap[Width];
321 if (Width == 0)
322 continue;
323 unsigned TableIdx = Width - 1;
324 assert(TableIdx < SubRegFromChannelTable.size())((void)0);
325 assert(Offset < SubRegFromChannelTable[TableIdx].size())((void)0);
326 SubRegFromChannelTable[TableIdx][Offset] = Idx;
327 }
328 };
329
330 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
331 llvm::call_once(InitializeSubRegFromChannelTableFlag,
332 InitializeSubRegFromChannelTableOnce);
333}
334
335void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
336 MCRegister Reg) const {
337 MCRegAliasIterator R(Reg, this, true);
338
339 for (; R.isValid(); ++R)
340 Reserved.set(*R);
341}
342
343// Forced to be here by one .inc
344const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
345 const MachineFunction *MF) const {
346 CallingConv::ID CC = MF->getFunction().getCallingConv();
347 switch (CC) {
348 case CallingConv::C:
349 case CallingConv::Fast:
350 case CallingConv::Cold:
351 case CallingConv::AMDGPU_Gfx:
352 return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
353 ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
354 : CSR_AMDGPU_HighRegs_SaveList;
355 default: {
356 // Dummy to not crash RegisterClassInfo.
357 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
358 return &NoCalleeSavedReg;
359 }
360 }
361}
362
363const MCPhysReg *
364SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
365 return nullptr;
366}
367
368const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
369 CallingConv::ID CC) const {
370 switch (CC) {
371 case CallingConv::C:
372 case CallingConv::Fast:
373 case CallingConv::Cold:
374 case CallingConv::AMDGPU_Gfx:
375 return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
376 ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
377 : CSR_AMDGPU_HighRegs_RegMask;
378 default:
379 return nullptr;
380 }
381}
382
383const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
384 return CSR_AMDGPU_NoRegs_RegMask;
385}
386
387Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
388 const SIFrameLowering *TFI =
389 MF.getSubtarget<GCNSubtarget>().getFrameLowering();
390 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
391 // During ISel lowering we always reserve the stack pointer in entry
392 // functions, but never actually want to reference it when accessing our own
393 // frame. If we need a frame pointer we use it, but otherwise we can just use
394 // an immediate "0" which we represent by returning NoRegister.
395 if (FuncInfo->isEntryFunction()) {
396 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
397 }
398 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
399 : FuncInfo->getStackPtrOffsetReg();
400}
401
402bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
403 // When we need stack realignment, we can't reference off of the
404 // stack pointer, so we reserve a base pointer.
405 const MachineFrameInfo &MFI = MF.getFrameInfo();
406 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
407}
408
409Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
410
411const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
412 return CSR_AMDGPU_AllVGPRs_RegMask;
413}
414
415const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const {
416 return CSR_AMDGPU_AllAGPRs_RegMask;
417}
418
419const uint32_t *SIRegisterInfo::getAllVectorRegMask() const {
420 return CSR_AMDGPU_AllVectorRegs_RegMask;
421}
422
423const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
424 return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
425}
426
427unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
428 unsigned NumRegs) {
429 assert(NumRegs < SubRegFromChannelTableWidthMap.size())((void)0);
430 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
431 assert(NumRegIndex && "Not implemented")((void)0);
432 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size())((void)0);
433 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
434}
435
436MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
437 const MachineFunction &MF) const {
438 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
439 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
440 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
441}
442
443BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
444 BitVector Reserved(getNumRegs());
445 Reserved.set(AMDGPU::MODE);
446
447 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
448 // this seems likely to result in bugs, so I'm marking them as reserved.
449 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
450 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
451
452 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
453 reserveRegisterTuples(Reserved, AMDGPU::M0);
454
455 // Reserve src_vccz, src_execz, src_scc.
456 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
457 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
458 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
459
460 // Reserve the memory aperture registers.
461 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
462 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
463 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
464 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
465
466 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
467 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
468
469 // Reserve xnack_mask registers - support is not implemented in Codegen.
470 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
471
472 // Reserve lds_direct register - support is not implemented in Codegen.
473 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
474
475 // Reserve Trap Handler registers - support is not implemented in Codegen.
476 reserveRegisterTuples(Reserved, AMDGPU::TBA);
477 reserveRegisterTuples(Reserved, AMDGPU::TMA);
478 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
479 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
480 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
481 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
482 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
483 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
484 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
485 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
486
487 // Reserve null register - it shall never be allocated
488 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
489
490 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
491 // will result in bugs.
492 if (isWave32) {
493 Reserved.set(AMDGPU::VCC);
494 Reserved.set(AMDGPU::VCC_HI);
495 }
496
497 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
498 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
499 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
500 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
501 reserveRegisterTuples(Reserved, Reg);
502 }
503
504 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
505 // TODO: In an entry function without calls and AGPRs used it is possible
506 // to use the whole register budget for VGPRs. Even more it shall
507 // be possible to estimate maximum AGPR/VGPR pressure and split
508 // register file accordingly.
509 if (ST.hasGFX90AInsts())
510 MaxNumVGPRs /= 2;
511 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
512 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
513 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
514 reserveRegisterTuples(Reserved, Reg);
515 Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
516 reserveRegisterTuples(Reserved, Reg);
517 }
518
519 for (auto Reg : AMDGPU::SReg_32RegClass) {
520 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
521 Register Low = getSubReg(Reg, AMDGPU::lo16);
522 // This is to prevent BB vcc liveness errors.
523 if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
524 Reserved.set(Low);
525 }
526
527 for (auto Reg : AMDGPU::AGPR_32RegClass) {
528 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
529 }
530
531 // Reserve all the rest AGPRs if there are no instructions to use it.
532 if (!ST.hasMAIInsts()) {
533 for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
534 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
535 reserveRegisterTuples(Reserved, Reg);
536 }
537 }
538
539 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
540
541 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
542 if (ScratchRSrcReg != AMDGPU::NoRegister) {
543 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
544 // to spill.
545 // TODO: May need to reserve a VGPR if doing LDS spilling.
546 reserveRegisterTuples(Reserved, ScratchRSrcReg);
547 }
548
549 // We have to assume the SP is needed in case there are calls in the function,
550 // which is detected after the function is lowered. If we aren't really going
551 // to need SP, don't bother reserving it.
552 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
553
554 if (StackPtrReg) {
555 reserveRegisterTuples(Reserved, StackPtrReg);
556 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg))((void)0);
557 }
558
559 MCRegister FrameReg = MFI->getFrameOffsetReg();
560 if (FrameReg) {
561 reserveRegisterTuples(Reserved, FrameReg);
562 assert(!isSubRegister(ScratchRSrcReg, FrameReg))((void)0);
563 }
564
565 if (hasBasePointer(MF)) {
566 MCRegister BasePtrReg = getBaseRegister();
567 reserveRegisterTuples(Reserved, BasePtrReg);
568 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg))((void)0);
569 }
570
571 for (auto Reg : MFI->WWMReservedRegs) {
572 reserveRegisterTuples(Reserved, Reg.first);
573 }
574
575 // Reserve VGPRs used for SGPR spilling.
576 // Note we treat freezeReservedRegs unusually because we run register
577 // allocation in two phases. It's OK to re-freeze with new registers for the
578 // second run.
579#if 0
580 for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
581 for (auto &SpilledVGPR : SpilledFI.second)
582 reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
583 }
584#endif
585
586 // FIXME: Stop using reserved registers for this.
587 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
588 reserveRegisterTuples(Reserved, Reg);
589
590 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
591 reserveRegisterTuples(Reserved, Reg);
592
593 for (auto SSpill : MFI->getSGPRSpillVGPRs())
594 reserveRegisterTuples(Reserved, SSpill.VGPR);
595
596 return Reserved;
597}
598
599bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
600 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
601 // On entry, the base address is 0, so it can't possibly need any more
602 // alignment.
603
604 // FIXME: Should be able to specify the entry frame alignment per calling
605 // convention instead.
606 if (Info->isEntryFunction())
607 return false;
608
609 return TargetRegisterInfo::shouldRealignStack(MF);
610}
611
612bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
613 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
614 if (Info->isEntryFunction()) {
615 const MachineFrameInfo &MFI = Fn.getFrameInfo();
616 return MFI.hasStackObjects() || MFI.hasCalls();
617 }
618
619 // May need scavenger for dealing with callee saved registers.
620 return true;
621}
622
623bool SIRegisterInfo::requiresFrameIndexScavenging(
624 const MachineFunction &MF) const {
625 // Do not use frame virtual registers. They used to be used for SGPRs, but
626 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
627 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
628 // spill.
629 return false;
630}
631
632bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
633 const MachineFunction &MF) const {
634 const MachineFrameInfo &MFI = MF.getFrameInfo();
635 return MFI.hasStackObjects();
636}
637
638bool SIRegisterInfo::requiresVirtualBaseRegisters(
639 const MachineFunction &) const {
640 // There are no special dedicated stack or frame pointers.
641 return true;
642}
643
644int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
645 assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI))((void)0);
646
647 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
648 AMDGPU::OpName::offset);
649 return MI->getOperand(OffIdx).getImm();
650}
651
652int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
653 int Idx) const {
654 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
655 return 0;
656
657 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),((void)0)
658 AMDGPU::OpName::vaddr) ||((void)0)
659 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),((void)0)
660 AMDGPU::OpName::saddr))) &&((void)0)
661 "Should never see frame index on non-address operand")((void)0);
662
663 return getScratchInstrOffset(MI);
664}
665
666bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
667 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
668 return false;
669
670 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
671
672 if (SIInstrInfo::isMUBUF(*MI))
673 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
674
675 const SIInstrInfo *TII = ST.getInstrInfo();
676 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
677 SIInstrFlags::FlatScratch);
678}
679
680Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
681 int FrameIdx,
682 int64_t Offset) const {
683 MachineBasicBlock::iterator Ins = MBB->begin();
684 DebugLoc DL; // Defaults to "unknown"
685
686 if (Ins != MBB->end())
687 DL = Ins->getDebugLoc();
688
689 MachineFunction *MF = MBB->getParent();
690 const SIInstrInfo *TII = ST.getInstrInfo();
691 MachineRegisterInfo &MRI = MF->getRegInfo();
692 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
693 : AMDGPU::V_MOV_B32_e32;
694
695 Register BaseReg = MRI.createVirtualRegister(
696 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
697 : &AMDGPU::VGPR_32RegClass);
698
699 if (Offset == 0) {
700 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
701 .addFrameIndex(FrameIdx);
702 return BaseReg;
703 }
704
705 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
706
707 Register FIReg = MRI.createVirtualRegister(
708 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
709 : &AMDGPU::VGPR_32RegClass);
710
711 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
712 .addImm(Offset);
713 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
714 .addFrameIndex(FrameIdx);
715
716 if (ST.enableFlatScratch() ) {
717 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
718 .addReg(OffsetReg, RegState::Kill)
719 .addReg(FIReg);
720 return BaseReg;
721 }
722
723 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
724 .addReg(OffsetReg, RegState::Kill)
725 .addReg(FIReg)
726 .addImm(0); // clamp bit
727
728 return BaseReg;
729}
730
731void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
732 int64_t Offset) const {
733 const SIInstrInfo *TII = ST.getInstrInfo();
734 bool IsFlat = TII->isFLATScratch(MI);
735
736#ifndef NDEBUG1
737 // FIXME: Is it possible to be storing a frame index to itself?
738 bool SeenFI = false;
739 for (const MachineOperand &MO: MI.operands()) {
740 if (MO.isFI()) {
741 if (SeenFI)
742 llvm_unreachable("should not see multiple frame indices")__builtin_unreachable();
743
744 SeenFI = true;
745 }
746 }
747#endif
748
749 MachineOperand *FIOp =
750 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
751 : AMDGPU::OpName::vaddr);
752
753 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
754 int64_t NewOffset = OffsetOp->getImm() + Offset;
755
756 assert(FIOp && FIOp->isFI() && "frame index must be address operand")((void)0);
757 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI))((void)0);
758
759 if (IsFlat) {
760 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,((void)0)
761 SIInstrFlags::FlatScratch) &&((void)0)
762 "offset should be legal")((void)0);
763 FIOp->ChangeToRegister(BaseReg, false);
764 OffsetOp->setImm(NewOffset);
765 return;
766 }
767
768#ifndef NDEBUG1
769 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
770 assert(SOffset->isImm() && SOffset->getImm() == 0)((void)0);
771#endif
772
773 assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&((void)0)
774 "offset should be legal")((void)0);
775
776 FIOp->ChangeToRegister(BaseReg, false);
777 OffsetOp->setImm(NewOffset);
778}
779
780bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
781 Register BaseReg,
782 int64_t Offset) const {
783 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI))
784 return false;
785
786 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
787
788 if (SIInstrInfo::isMUBUF(*MI))
789 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
790
791 const SIInstrInfo *TII = ST.getInstrInfo();
792 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
793 SIInstrFlags::FlatScratch);
794}
795
796const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
797 const MachineFunction &MF, unsigned Kind) const {
798 // This is inaccurate. It depends on the instruction and address space. The
799 // only place where we should hit this is for dealing with frame indexes /
800 // private accesses, so this is correct in that case.
801 return &AMDGPU::VGPR_32RegClass;
802}
803
804static unsigned getNumSubRegsForSpillOp(unsigned Op) {
805
806 switch (Op) {
807 case AMDGPU::SI_SPILL_S1024_SAVE:
808 case AMDGPU::SI_SPILL_S1024_RESTORE:
809 case AMDGPU::SI_SPILL_V1024_SAVE:
810 case AMDGPU::SI_SPILL_V1024_RESTORE:
811 case AMDGPU::SI_SPILL_A1024_SAVE:
812 case AMDGPU::SI_SPILL_A1024_RESTORE:
813 return 32;
814 case AMDGPU::SI_SPILL_S512_SAVE:
815 case AMDGPU::SI_SPILL_S512_RESTORE:
816 case AMDGPU::SI_SPILL_V512_SAVE:
817 case AMDGPU::SI_SPILL_V512_RESTORE:
818 case AMDGPU::SI_SPILL_A512_SAVE:
819 case AMDGPU::SI_SPILL_A512_RESTORE:
820 return 16;
821 case AMDGPU::SI_SPILL_S256_SAVE:
822 case AMDGPU::SI_SPILL_S256_RESTORE:
823 case AMDGPU::SI_SPILL_V256_SAVE:
824 case AMDGPU::SI_SPILL_V256_RESTORE:
825 case AMDGPU::SI_SPILL_A256_SAVE:
826 case AMDGPU::SI_SPILL_A256_RESTORE:
827 return 8;
828 case AMDGPU::SI_SPILL_S224_SAVE:
829 case AMDGPU::SI_SPILL_S224_RESTORE:
830 case AMDGPU::SI_SPILL_V224_SAVE:
831 case AMDGPU::SI_SPILL_V224_RESTORE:
832 case AMDGPU::SI_SPILL_A224_SAVE:
833 case AMDGPU::SI_SPILL_A224_RESTORE:
834 return 7;
835 case AMDGPU::SI_SPILL_S192_SAVE:
836 case AMDGPU::SI_SPILL_S192_RESTORE:
837 case AMDGPU::SI_SPILL_V192_SAVE:
838 case AMDGPU::SI_SPILL_V192_RESTORE:
839 case AMDGPU::SI_SPILL_A192_SAVE:
840 case AMDGPU::SI_SPILL_A192_RESTORE:
841 return 6;
842 case AMDGPU::SI_SPILL_S160_SAVE:
843 case AMDGPU::SI_SPILL_S160_RESTORE:
844 case AMDGPU::SI_SPILL_V160_SAVE:
845 case AMDGPU::SI_SPILL_V160_RESTORE:
846 case AMDGPU::SI_SPILL_A160_SAVE:
847 case AMDGPU::SI_SPILL_A160_RESTORE:
848 return 5;
849 case AMDGPU::SI_SPILL_S128_SAVE:
850 case AMDGPU::SI_SPILL_S128_RESTORE:
851 case AMDGPU::SI_SPILL_V128_SAVE:
852 case AMDGPU::SI_SPILL_V128_RESTORE:
853 case AMDGPU::SI_SPILL_A128_SAVE:
854 case AMDGPU::SI_SPILL_A128_RESTORE:
855 return 4;
856 case AMDGPU::SI_SPILL_S96_SAVE:
857 case AMDGPU::SI_SPILL_S96_RESTORE:
858 case AMDGPU::SI_SPILL_V96_SAVE:
859 case AMDGPU::SI_SPILL_V96_RESTORE:
860 case AMDGPU::SI_SPILL_A96_SAVE:
861 case AMDGPU::SI_SPILL_A96_RESTORE:
862 return 3;
863 case AMDGPU::SI_SPILL_S64_SAVE:
864 case AMDGPU::SI_SPILL_S64_RESTORE:
865 case AMDGPU::SI_SPILL_V64_SAVE:
866 case AMDGPU::SI_SPILL_V64_RESTORE:
867 case AMDGPU::SI_SPILL_A64_SAVE:
868 case AMDGPU::SI_SPILL_A64_RESTORE:
869 return 2;
870 case AMDGPU::SI_SPILL_S32_SAVE:
871 case AMDGPU::SI_SPILL_S32_RESTORE:
872 case AMDGPU::SI_SPILL_V32_SAVE:
873 case AMDGPU::SI_SPILL_V32_RESTORE:
874 case AMDGPU::SI_SPILL_A32_SAVE:
875 case AMDGPU::SI_SPILL_A32_RESTORE:
876 return 1;
877 default: llvm_unreachable("Invalid spill opcode")__builtin_unreachable();
878 }
879}
880
881static int getOffsetMUBUFStore(unsigned Opc) {
882 switch (Opc) {
883 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
884 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
885 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
886 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
887 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
888 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
889 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
890 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
891 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
892 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
893 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
894 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
895 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
896 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
897 default:
898 return -1;
899 }
900}
901
902static int getOffsetMUBUFLoad(unsigned Opc) {
903 switch (Opc) {
904 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
905 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
906 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
907 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
908 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
909 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
910 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
911 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
912 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
913 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
914 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
915 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
916 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
917 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
918 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
919 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
920 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
921 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
922 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
923 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
924 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
925 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
926 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
927 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
928 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
929 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
930 default:
931 return -1;
932 }
933}
934
935static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
936 MachineBasicBlock &MBB,
937 MachineBasicBlock::iterator MI,
938 int Index, unsigned Lane,
939 unsigned ValueReg, bool IsKill) {
940 MachineFunction *MF = MBB.getParent();
941 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
942 const SIInstrInfo *TII = ST.getInstrInfo();
943
944 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
945
946 if (Reg == AMDGPU::NoRegister)
947 return MachineInstrBuilder();
948
949 bool IsStore = MI->mayStore();
950 MachineRegisterInfo &MRI = MF->getRegInfo();
951 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
952
953 unsigned Dst = IsStore ? Reg : ValueReg;
954 unsigned Src = IsStore ? ValueReg : Reg;
955 unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
956 : AMDGPU::V_ACCVGPR_READ_B32_e64;
957
958 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
959 .addReg(Src, getKillRegState(IsKill));
960 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
961 return MIB;
962}
963
964// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
965// need to handle the case where an SGPR may need to be spilled while spilling.
966static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
967 MachineFrameInfo &MFI,
968 MachineBasicBlock::iterator MI,
969 int Index,
970 int64_t Offset) {
971 const SIInstrInfo *TII = ST.getInstrInfo();
972 MachineBasicBlock *MBB = MI->getParent();
973 const DebugLoc &DL = MI->getDebugLoc();
974 bool IsStore = MI->mayStore();
975
976 unsigned Opc = MI->getOpcode();
977 int LoadStoreOp = IsStore ?
978 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
979 if (LoadStoreOp == -1)
980 return false;
981
982 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
983 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
984 return true;
985
986 MachineInstrBuilder NewMI =
987 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
988 .add(*Reg)
989 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
990 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
991 .addImm(Offset)
992 .addImm(0) // cpol
993 .addImm(0) // tfe
994 .addImm(0) // swz
995 .cloneMemRefs(*MI);
996
997 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
998 AMDGPU::OpName::vdata_in);
999 if (VDataIn)
1000 NewMI.add(*VDataIn);
1001 return true;
1002}
1003
1004static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
1005 unsigned LoadStoreOp,
1006 unsigned EltSize) {
1007 bool IsStore = TII->get(LoadStoreOp).mayStore();
1008 bool UseST =
1009 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
1010 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
1011
1012 switch (EltSize) {
1013 case 4:
1014 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1015 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1016 break;
1017 case 8:
1018 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1019 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1020 break;
1021 case 12:
1022 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1023 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1024 break;
1025 case 16:
1026 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1027 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1028 break;
1029 default:
1030 llvm_unreachable("Unexpected spill load/store size!")__builtin_unreachable();
1031 }
1032
1033 if (UseST)
1034 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1035
1036 return LoadStoreOp;
1037}
1038
1039void SIRegisterInfo::buildSpillLoadStore(
1040 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1041 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1042 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1043 RegScavenger *RS, LivePhysRegs *LiveRegs) const {
1044 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both")((void)0);
1045
1046 MachineFunction *MF = MBB.getParent();
1047 const SIInstrInfo *TII = ST.getInstrInfo();
1048 const MachineFrameInfo &MFI = MF->getFrameInfo();
1049 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1050
1051 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1052 const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1053 bool IsStore = Desc->mayStore();
1054 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1055
1056 bool Scavenged = false;
1057 MCRegister SOffset = ScratchOffsetReg;
1058
1059 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1060 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1061 const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
1062 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
1063
1064 // Always use 4 byte operations for AGPRs because we need to scavenge
1065 // a temporary VGPR.
1066 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1067 unsigned NumSubRegs = RegWidth / EltSize;
1068 unsigned Size = NumSubRegs * EltSize;
1069 unsigned RemSize = RegWidth - Size;
1070 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1071 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1072 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1073 int64_t ScratchOffsetRegDelta = 0;
1074
1075 if (IsFlat && EltSize > 4) {
1076 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1077 Desc = &TII->get(LoadStoreOp);
1078 }
1079
1080 Align Alignment = MFI.getObjectAlign(Index);
1081 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1082
1083 assert((IsFlat || ((Offset % EltSize) == 0)) &&((void)0)
1084 "unexpected VGPR spill offset")((void)0);
1085
1086 bool IsOffsetLegal =
1087 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1088 SIInstrFlags::FlatScratch)
1089 : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset);
1090 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1091 SOffset = MCRegister();
1092
1093 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1094 // we can simplify the adjustment of Offset here to just scale with
1095 // WavefrontSize.
1096 if (!IsFlat)
1097 Offset *= ST.getWavefrontSize();
1098
1099 // We don't have access to the register scavenger if this function is called
1100 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
1101 if (RS) {
1102 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
1103 } else if (LiveRegs) {
1104 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1105 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1106 SOffset = Reg;
1107 break;
1108 }
1109 }
1110 }
1111
1112 if (!SOffset) {
1113 // There are no free SGPRs, and since we are in the process of spilling
1114 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1115 // on SI/CI and on VI it is true until we implement spilling using scalar
1116 // stores), we have no way to free up an SGPR. Our solution here is to
1117 // add the offset directly to the ScratchOffset or StackPtrOffset
1118 // register, and then subtract the offset after the spill to return the
1119 // register to it's original value.
1120 if (!ScratchOffsetReg)
1121 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1122 SOffset = ScratchOffsetReg;
1123 ScratchOffsetRegDelta = Offset;
1124 } else {
1125 Scavenged = true;
1126 }
1127
1128 if (!SOffset)
1129 report_fatal_error("could not scavenge SGPR to spill in entry function");
1130
1131 if (ScratchOffsetReg == AMDGPU::NoRegister) {
1132 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1133 } else {
1134 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1135 .addReg(ScratchOffsetReg)
1136 .addImm(Offset);
1137 }
1138
1139 Offset = 0;
1140 }
1141
1142 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1143 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0((void)0)
1144 && "Unexpected vaddr for flat scratch with a FI operand")((void)0);
1145
1146 assert(ST.hasFlatScratchSTMode())((void)0);
1147 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1148 Desc = &TII->get(LoadStoreOp);
1149 }
1150
1151 Register TmpReg;
1152
1153 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1154 ++i, RegOffset += EltSize) {
1155 if (i == NumSubRegs) {
1156 EltSize = RemSize;
1157 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1158 }
1159 Desc = &TII->get(LoadStoreOp);
1160
1161 unsigned NumRegs = EltSize / 4;
1162 Register SubReg = e == 1
1163 ? ValueReg
1164 : Register(getSubReg(ValueReg,
1165 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1166
1167 unsigned SOffsetRegState = 0;
1168 unsigned SrcDstRegState = getDefRegState(!IsStore);
1169 if (i + 1 == e) {
1170 SOffsetRegState |= getKillRegState(Scavenged);
1171 // The last implicit use carries the "Kill" flag.
1172 SrcDstRegState |= getKillRegState(IsKill);
1173 }
1174
1175 // Make sure the whole register is defined if there are undef components by
1176 // adding an implicit def of the super-reg on the first instruction.
1177 bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
1178 bool NeedSuperRegImpOperand = e > 1;
1179
1180 unsigned Lane = RegOffset / 4;
1181 unsigned LaneE = (RegOffset + EltSize) / 4;
1182 for ( ; Lane != LaneE; ++Lane) {
1183 bool IsSubReg = e > 1 || EltSize > 4;
1184 Register Sub = IsSubReg
1185 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1186 : ValueReg;
1187 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1188 if (!MIB.getInstr())
1189 break;
1190 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
1191 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1192 NeedSuperRegDef = false;
1193 }
1194 if (IsSubReg || NeedSuperRegImpOperand) {
1195 NeedSuperRegImpOperand = true;
1196 unsigned State = SrcDstRegState;
1197 if (Lane + 1 != LaneE)
1198 State &= ~RegState::Kill;
1199 MIB.addReg(ValueReg, RegState::Implicit | State);
1200 }
1201 }
1202
1203 if (Lane == LaneE) // Fully spilled into AGPRs.
1204 continue;
1205
1206 // Offset in bytes from the beginning of the ValueReg to its portion we
1207 // still need to spill. It may differ from RegOffset if a portion of
1208 // current SubReg has been already spilled into AGPRs by the loop above.
1209 unsigned RemRegOffset = Lane * 4;
1210 unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
1211 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1212 assert(IsFlat && EltSize > 4)((void)0);
1213
1214 unsigned NumRegs = RemEltSize / 4;
1215 SubReg = Register(getSubReg(ValueReg,
1216 getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
1217 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1218 Desc = &TII->get(Opc);
1219 }
1220
1221 unsigned FinalReg = SubReg;
1222
1223 if (IsAGPR) {
1224 assert(EltSize == 4)((void)0);
1225
1226 if (!TmpReg) {
1227 assert(RS && "Needs to have RegScavenger to spill an AGPR!")((void)0);
1228 // FIXME: change to scavengeRegisterBackwards()
1229 TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1230 RS->setRegUsed(TmpReg);
1231 }
1232 if (IsStore) {
1233 auto AccRead = BuildMI(MBB, MI, DL,
1234 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg)
1235 .addReg(SubReg, getKillRegState(IsKill));
1236 if (NeedSuperRegDef)
1237 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1238 AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1239 }
1240 SubReg = TmpReg;
1241 }
1242
1243 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
1244 MachineMemOperand *NewMMO =
1245 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1246 commonAlignment(Alignment, RemRegOffset));
1247
1248 auto MIB =
1249 BuildMI(MBB, MI, DL, *Desc)
1250 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1251 if (!IsFlat)
1252 MIB.addReg(FuncInfo->getScratchRSrcReg());
1253
1254 if (SOffset == AMDGPU::NoRegister) {
1255 if (!IsFlat)
1256 MIB.addImm(0);
1257 } else {
1258 MIB.addReg(SOffset, SOffsetRegState);
1259 }
1260 MIB.addImm(Offset + RemRegOffset)
1261 .addImm(0); // cpol
1262 if (!IsFlat)
1263 MIB.addImm(0) // tfe
1264 .addImm(0); // swz
1265 MIB.addMemOperand(NewMMO);
1266
1267 if (!IsAGPR && NeedSuperRegDef)
1268 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1269
1270 if (!IsStore && TmpReg != AMDGPU::NoRegister) {
1271 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1272 FinalReg)
1273 .addReg(TmpReg, RegState::Kill);
1274 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
1275 }
1276
1277 if (NeedSuperRegImpOperand)
1278 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1279 }
1280
1281 if (ScratchOffsetRegDelta != 0) {
1282 // Subtract the offset we added to the ScratchOffset register.
1283 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1284 .addReg(SOffset)
1285 .addImm(-ScratchOffsetRegDelta);
1286 }
1287}
1288
1289void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
1290 int Offset, bool IsLoad,
1291 bool IsKill) const {
1292 // Load/store VGPR
1293 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1294 assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill)((void)0);
1295
1296 Register FrameReg =
1297 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1298 ? getBaseRegister()
1299 : getFrameRegister(SB.MF);
1300
1301 Align Alignment = FrameInfo.getObjectAlign(Index);
1302 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index);
1303 MachineMemOperand *MMO = SB.MF.getMachineMemOperand(
1304 PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore,
1305 SB.EltSize, Alignment);
1306
1307 if (IsLoad) {
1308 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1309 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1310 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
1311 Offset * SB.EltSize, MMO, SB.RS);
1312 } else {
1313 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1314 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1315 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
1316 Offset * SB.EltSize, MMO, SB.RS);
1317 // This only ever adds one VGPR spill
1318 SB.MFI.addToSpilledVGPRs(1);
1319 }
1320}
1321
1322bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
1323 int Index,
1324 RegScavenger *RS,
1325 LiveIntervals *LIS,
1326 bool OnlyToVGPR) const {
1327 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1328
1329 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1330 SB.MFI.getSGPRToVGPRSpills(Index);
1331 bool SpillToVGPR = !VGPRSpills.empty();
1332 if (OnlyToVGPR && !SpillToVGPR)
1333 return false;
1334
1335 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&((void)0)
1336 SB.SuperReg != SB.MFI.getFrameOffsetReg()))((void)0);
1337
1338 if (SpillToVGPR) {
1339 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1340 Register SubReg =
1341 SB.NumSubRegs == 1
1342 ? SB.SuperReg
1343 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1344 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1345
1346 bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1;
1347
1348 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1349 // spill to this specific vgpr in the first basic block.
1350 auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1351 Spill.VGPR)
1352 .addReg(SubReg, getKillRegState(UseKill))
1353 .addImm(Spill.Lane)
1354 .addReg(Spill.VGPR);
1355 if (LIS) {
1356 if (i == 0)
1357 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1358 else
1359 LIS->InsertMachineInstrInMaps(*MIB);
1360 }
1361
1362 if (i == 0 && SB.NumSubRegs > 1) {
1363 // We may be spilling a super-register which is only partially defined,
1364 // and need to ensure later spills think the value is defined.
1365 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1366 }
1367
1368 if (SB.NumSubRegs > 1)
1369 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1370
1371 // FIXME: Since this spills to another register instead of an actual
1372 // frame index, we should delete the frame index when all references to
1373 // it are fixed.
1374 }
1375 } else {
1376 SB.prepare();
1377
1378 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1379 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1380
1381 // Per VGPR helper data
1382 auto PVD = SB.getPerVGPRData();
1383
1384 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1385 unsigned TmpVGPRFlags = RegState::Undef;
1386
1387 // Write sub registers into the VGPR
1388 for (unsigned i = Offset * PVD.PerVGPR,
1389 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1390 i < e; ++i) {
1391 Register SubReg =
1392 SB.NumSubRegs == 1
1393 ? SB.SuperReg
1394 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1395
1396 MachineInstrBuilder WriteLane =
1397 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1398 SB.TmpVGPR)
1399 .addReg(SubReg, SubKillState)
1400 .addImm(i % PVD.PerVGPR)
1401 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1402 TmpVGPRFlags = 0;
1403
1404 if (LIS) {
1405 if (i == 0)
1406 LIS->ReplaceMachineInstrInMaps(*MI, *WriteLane);
1407 else
1408 LIS->InsertMachineInstrInMaps(*WriteLane);
1409 }
1410
1411 // There could be undef components of a spilled super register.
1412 // TODO: Can we detect this and skip the spill?
1413 if (SB.NumSubRegs > 1) {
1414 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1415 unsigned SuperKillState = 0;
1416 if (i + 1 == SB.NumSubRegs)
1417 SuperKillState |= getKillRegState(SB.IsKill);
1418 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1419 }
1420 }
1421
1422 // Write out VGPR
1423 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1424 }
1425
1426 SB.restore();
1427 }
1428
1429 MI->eraseFromParent();
1430 SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
1431
1432 if (LIS)
1433 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1434
1435 return true;
1436}
1437
1438bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
1439 int Index,
1440 RegScavenger *RS,
1441 LiveIntervals *LIS,
1442 bool OnlyToVGPR) const {
1443 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1444
1445 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills =
1446 SB.MFI.getSGPRToVGPRSpills(Index);
1447 bool SpillToVGPR = !VGPRSpills.empty();
3
Assuming the condition is false
1448 if (OnlyToVGPR
3.1
'OnlyToVGPR' is false
3.1
'OnlyToVGPR' is false
&& !SpillToVGPR)
1449 return false;
1450
1451 if (SpillToVGPR
3.2
'SpillToVGPR' is false
3.2
'SpillToVGPR' is false
) {
4
Taking false branch
1452 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1453 Register SubReg =
1454 SB.NumSubRegs == 1
1455 ? SB.SuperReg
1456 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1457
1458 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1459 auto MIB =
1460 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1461 .addReg(Spill.VGPR)
1462 .addImm(Spill.Lane);
1463 if (SB.NumSubRegs > 1 && i == 0)
1464 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1465 if (LIS) {
1466 if (i == e - 1)
1467 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1468 else
1469 LIS->InsertMachineInstrInMaps(*MIB);
1470 }
1471
1472 }
1473 } else {
1474 SB.prepare();
5
Calling 'SGPRSpillBuilder::prepare'
1475
1476 // Per VGPR helper data
1477 auto PVD = SB.getPerVGPRData();
1478
1479 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1480 // Load in VGPR data
1481 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1482
1483 // Unpack lanes
1484 for (unsigned i = Offset * PVD.PerVGPR,
1485 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1486 i < e; ++i) {
1487 Register SubReg =
1488 SB.NumSubRegs == 1
1489 ? SB.SuperReg
1490 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1491
1492 bool LastSubReg = (i + 1 == e);
1493 auto MIB = BuildMI(SB.MBB, MI, SB.DL,
1494 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1495 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1496 .addImm(i);
1497 if (SB.NumSubRegs > 1 && i == 0)
1498 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1499 if (LIS) {
1500 if (i == e - 1)
1501 LIS->ReplaceMachineInstrInMaps(*MI, *MIB);
1502 else
1503 LIS->InsertMachineInstrInMaps(*MIB);
1504 }
1505 }
1506 }
1507
1508 SB.restore();
1509 }
1510
1511 MI->eraseFromParent();
1512
1513 if (LIS)
1514 LIS->removeAllRegUnitsForPhysReg(SB.SuperReg);
1515
1516 return true;
1517}
1518
1519/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1520/// a VGPR and the stack slot can be safely eliminated when all other users are
1521/// handled.
1522bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
1523 MachineBasicBlock::iterator MI,
1524 int FI,
1525 RegScavenger *RS,
1526 LiveIntervals *LIS) const {
1527 switch (MI->getOpcode()) {
1528 case AMDGPU::SI_SPILL_S1024_SAVE:
1529 case AMDGPU::SI_SPILL_S512_SAVE:
1530 case AMDGPU::SI_SPILL_S256_SAVE:
1531 case AMDGPU::SI_SPILL_S224_SAVE:
1532 case AMDGPU::SI_SPILL_S192_SAVE:
1533 case AMDGPU::SI_SPILL_S160_SAVE:
1534 case AMDGPU::SI_SPILL_S128_SAVE:
1535 case AMDGPU::SI_SPILL_S96_SAVE:
1536 case AMDGPU::SI_SPILL_S64_SAVE:
1537 case AMDGPU::SI_SPILL_S32_SAVE:
1538 return spillSGPR(MI, FI, RS, LIS, true);
1539 case AMDGPU::SI_SPILL_S1024_RESTORE:
1540 case AMDGPU::SI_SPILL_S512_RESTORE:
1541 case AMDGPU::SI_SPILL_S256_RESTORE:
1542 case AMDGPU::SI_SPILL_S224_RESTORE:
1543 case AMDGPU::SI_SPILL_S192_RESTORE:
1544 case AMDGPU::SI_SPILL_S160_RESTORE:
1545 case AMDGPU::SI_SPILL_S128_RESTORE:
1546 case AMDGPU::SI_SPILL_S96_RESTORE:
1547 case AMDGPU::SI_SPILL_S64_RESTORE:
1548 case AMDGPU::SI_SPILL_S32_RESTORE:
1549 return restoreSGPR(MI, FI, RS, LIS, true);
1550 default:
1551 llvm_unreachable("not an SGPR spill instruction")__builtin_unreachable();
1552 }
1553}
1554
1555void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
1556 int SPAdj, unsigned FIOperandNum,
1557 RegScavenger *RS) const {
1558 MachineFunction *MF = MI->getParent()->getParent();
1559 MachineBasicBlock *MBB = MI->getParent();
1560 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1561 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1562 const SIInstrInfo *TII = ST.getInstrInfo();
1563 DebugLoc DL = MI->getDebugLoc();
1564
1565 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?")((void)0);
1566
1567 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1568 int Index = MI->getOperand(FIOperandNum).getIndex();
1569
1570 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
1571 ? getBaseRegister()
1572 : getFrameRegister(*MF);
1573
1574 switch (MI->getOpcode()) {
1
Control jumps to 'case SI_SPILL_S32_RESTORE:' at line 1600
1575 // SGPR register spill
1576 case AMDGPU::SI_SPILL_S1024_SAVE:
1577 case AMDGPU::SI_SPILL_S512_SAVE:
1578 case AMDGPU::SI_SPILL_S256_SAVE:
1579 case AMDGPU::SI_SPILL_S224_SAVE:
1580 case AMDGPU::SI_SPILL_S192_SAVE:
1581 case AMDGPU::SI_SPILL_S160_SAVE:
1582 case AMDGPU::SI_SPILL_S128_SAVE:
1583 case AMDGPU::SI_SPILL_S96_SAVE:
1584 case AMDGPU::SI_SPILL_S64_SAVE:
1585 case AMDGPU::SI_SPILL_S32_SAVE: {
1586 spillSGPR(MI, Index, RS);
1587 break;
1588 }
1589
1590 // SGPR register restore
1591 case AMDGPU::SI_SPILL_S1024_RESTORE:
1592 case AMDGPU::SI_SPILL_S512_RESTORE:
1593 case AMDGPU::SI_SPILL_S256_RESTORE:
1594 case AMDGPU::SI_SPILL_S224_RESTORE:
1595 case AMDGPU::SI_SPILL_S192_RESTORE:
1596 case AMDGPU::SI_SPILL_S160_RESTORE:
1597 case AMDGPU::SI_SPILL_S128_RESTORE:
1598 case AMDGPU::SI_SPILL_S96_RESTORE:
1599 case AMDGPU::SI_SPILL_S64_RESTORE:
1600 case AMDGPU::SI_SPILL_S32_RESTORE: {
1601 restoreSGPR(MI, Index, RS);
2
Calling 'SIRegisterInfo::restoreSGPR'
1602 break;
1603 }
1604
1605 // VGPR register spill
1606 case AMDGPU::SI_SPILL_V1024_SAVE:
1607 case AMDGPU::SI_SPILL_V512_SAVE:
1608 case AMDGPU::SI_SPILL_V256_SAVE:
1609 case AMDGPU::SI_SPILL_V224_SAVE:
1610 case AMDGPU::SI_SPILL_V192_SAVE:
1611 case AMDGPU::SI_SPILL_V160_SAVE:
1612 case AMDGPU::SI_SPILL_V128_SAVE:
1613 case AMDGPU::SI_SPILL_V96_SAVE:
1614 case AMDGPU::SI_SPILL_V64_SAVE:
1615 case AMDGPU::SI_SPILL_V32_SAVE:
1616 case AMDGPU::SI_SPILL_A1024_SAVE:
1617 case AMDGPU::SI_SPILL_A512_SAVE:
1618 case AMDGPU::SI_SPILL_A256_SAVE:
1619 case AMDGPU::SI_SPILL_A224_SAVE:
1620 case AMDGPU::SI_SPILL_A192_SAVE:
1621 case AMDGPU::SI_SPILL_A160_SAVE:
1622 case AMDGPU::SI_SPILL_A128_SAVE:
1623 case AMDGPU::SI_SPILL_A96_SAVE:
1624 case AMDGPU::SI_SPILL_A64_SAVE:
1625 case AMDGPU::SI_SPILL_A32_SAVE: {
1626 const MachineOperand *VData = TII->getNamedOperand(*MI,
1627 AMDGPU::OpName::vdata);
1628 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==((void)0)
1629 MFI->getStackPtrOffsetReg())((void)0);
1630
1631 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1632 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1633 auto *MBB = MI->getParent();
1634 buildSpillLoadStore(
1635 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1636 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1637 *MI->memoperands_begin(), RS);
1638 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1639 MI->eraseFromParent();
1640 break;
1641 }
1642 case AMDGPU::SI_SPILL_V32_RESTORE:
1643 case AMDGPU::SI_SPILL_V64_RESTORE:
1644 case AMDGPU::SI_SPILL_V96_RESTORE:
1645 case AMDGPU::SI_SPILL_V128_RESTORE:
1646 case AMDGPU::SI_SPILL_V160_RESTORE:
1647 case AMDGPU::SI_SPILL_V192_RESTORE:
1648 case AMDGPU::SI_SPILL_V224_RESTORE:
1649 case AMDGPU::SI_SPILL_V256_RESTORE:
1650 case AMDGPU::SI_SPILL_V512_RESTORE:
1651 case AMDGPU::SI_SPILL_V1024_RESTORE:
1652 case AMDGPU::SI_SPILL_A32_RESTORE:
1653 case AMDGPU::SI_SPILL_A64_RESTORE:
1654 case AMDGPU::SI_SPILL_A96_RESTORE:
1655 case AMDGPU::SI_SPILL_A128_RESTORE:
1656 case AMDGPU::SI_SPILL_A160_RESTORE:
1657 case AMDGPU::SI_SPILL_A192_RESTORE:
1658 case AMDGPU::SI_SPILL_A224_RESTORE:
1659 case AMDGPU::SI_SPILL_A256_RESTORE:
1660 case AMDGPU::SI_SPILL_A512_RESTORE:
1661 case AMDGPU::SI_SPILL_A1024_RESTORE: {
1662 const MachineOperand *VData = TII->getNamedOperand(*MI,
1663 AMDGPU::OpName::vdata);
1664 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==((void)0)
1665 MFI->getStackPtrOffsetReg())((void)0);
1666
1667 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1668 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1669 auto *MBB = MI->getParent();
1670 buildSpillLoadStore(
1671 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
1672 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1673 *MI->memoperands_begin(), RS);
1674 MI->eraseFromParent();
1675 break;
1676 }
1677
1678 default: {
1679 // Other access to frame index
1680 const DebugLoc &DL = MI->getDebugLoc();
1681
1682 int64_t Offset = FrameInfo.getObjectOffset(Index);
1683 if (ST.enableFlatScratch()) {
1684 if (TII->isFLATScratch(*MI)) {
1685 assert((int16_t)FIOperandNum ==((void)0)
1686 AMDGPU::getNamedOperandIdx(MI->getOpcode(),((void)0)
1687 AMDGPU::OpName::saddr))((void)0);
1688
1689 // The offset is always swizzled, just replace it
1690 if (FrameReg)
1691 FIOp.ChangeToRegister(FrameReg, false);
1692
1693 if (!Offset)
1694 return;
1695
1696 MachineOperand *OffsetOp =
1697 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
1698 int64_t NewOffset = Offset + OffsetOp->getImm();
1699 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
1700 SIInstrFlags::FlatScratch)) {
1701 OffsetOp->setImm(NewOffset);
1702 if (FrameReg)
1703 return;
1704 Offset = 0;
1705 }
1706
1707 assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) &&((void)0)
1708 "Unexpected vaddr for flat scratch with a FI operand")((void)0);
1709
1710 // On GFX10 we have ST mode to use no registers for an address.
1711 // Otherwise we need to materialize 0 into an SGPR.
1712 if (!Offset && ST.hasFlatScratchSTMode()) {
1713 unsigned Opc = MI->getOpcode();
1714 unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
1715 MI->RemoveOperand(
1716 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
1717 MI->setDesc(TII->get(NewOpc));
1718 return;
1719 }
1720 }
1721
1722 if (!FrameReg) {
1723 FIOp.ChangeToImmediate(Offset);
1724 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
1725 return;
1726 }
1727
1728 // We need to use register here. Check if we can use an SGPR or need
1729 // a VGPR.
1730 FIOp.ChangeToRegister(AMDGPU::M0, false);
1731 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
1732
1733 if (!Offset && FrameReg && UseSGPR) {
1734 FIOp.setReg(FrameReg);
1735 return;
1736 }
1737
1738 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
1739 : &AMDGPU::VGPR_32RegClass;
1740
1741 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
1742 FIOp.setReg(TmpReg);
1743 FIOp.setIsKill(true);
1744
1745 if ((!FrameReg || !Offset) && TmpReg) {
1746 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1747 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
1748 if (FrameReg)
1749 MIB.addReg(FrameReg);
1750 else
1751 MIB.addImm(Offset);
1752
1753 return;
1754 }
1755
1756 Register TmpSReg =
1757 UseSGPR ? TmpReg
1758 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
1759 !UseSGPR);
1760
1761 // TODO: for flat scratch another attempt can be made with a VGPR index
1762 // if no SGPRs can be scavenged.
1763 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
1764 report_fatal_error("Cannot scavenge register in FI elimination!");
1765
1766 if (!TmpSReg) {
1767 // Use frame register and restore it after.
1768 TmpSReg = FrameReg;
1769 FIOp.setReg(FrameReg);
1770 FIOp.setIsKill(false);
1771 }
1772
1773 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
1774 .addReg(FrameReg)
1775 .addImm(Offset);
1776
1777 if (!UseSGPR)
1778 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1779 .addReg(TmpSReg, RegState::Kill);
1780
1781 if (TmpSReg == FrameReg) {
1782 // Undo frame register modification.
1783 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
1784 FrameReg)
1785 .addReg(FrameReg)
1786 .addImm(-Offset);
1787 }
1788
1789 return;
1790 }
1791
1792 bool IsMUBUF = TII->isMUBUF(*MI);
1793
1794 if (!IsMUBUF && !MFI->isEntryFunction()) {
1795 // Convert to a swizzled stack address by scaling by the wave size.
1796 //
1797 // In an entry function/kernel the offset is already swizzled.
1798
1799 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1800 Register ResultReg =
1801 IsCopy ? MI->getOperand(0).getReg()
1802 : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1803
1804 int64_t Offset = FrameInfo.getObjectOffset(Index);
1805 if (Offset == 0) {
1806 // XXX - This never happens because of emergency scavenging slot at 0?
1807 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1808 .addImm(ST.getWavefrontSizeLog2())
1809 .addReg(FrameReg);
1810 } else {
1811 if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1812 // Reuse ResultReg in intermediate step.
1813 Register ScaledReg = ResultReg;
1814
1815 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1816 ScaledReg)
1817 .addImm(ST.getWavefrontSizeLog2())
1818 .addReg(FrameReg);
1819
1820 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1821
1822 // TODO: Fold if use instruction is another add of a constant.
1823 if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1824 // FIXME: This can fail
1825 MIB.addImm(Offset);
1826 MIB.addReg(ScaledReg, RegState::Kill);
1827 if (!IsVOP2)
1828 MIB.addImm(0); // clamp bit
1829 } else {
1830 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&((void)0)
1831 "Need to reuse carry out register")((void)0);
1832
1833 // Use scavenged unused carry out as offset register.
1834 Register ConstOffsetReg;
1835 if (!isWave32)
1836 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
1837 else
1838 ConstOffsetReg = MIB.getReg(1);
1839
1840 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1841 .addImm(Offset);
1842 MIB.addReg(ConstOffsetReg, RegState::Kill);
1843 MIB.addReg(ScaledReg, RegState::Kill);
1844 MIB.addImm(0); // clamp bit
1845 }
1846 } else {
1847 // We have to produce a carry out, and there isn't a free SGPR pair
1848 // for it. We can keep the whole computation on the SALU to avoid
1849 // clobbering an additional register at the cost of an extra mov.
1850
1851 // We may have 1 free scratch SGPR even though a carry out is
1852 // unavailable. Only one additional mov is needed.
1853 Register TmpScaledReg =
1854 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1855 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
1856
1857 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1858 .addReg(FrameReg)
1859 .addImm(ST.getWavefrontSizeLog2());
1860 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
1861 .addReg(ScaledReg, RegState::Kill)
1862 .addImm(Offset);
1863 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1864 .addReg(ScaledReg, RegState::Kill);
1865
1866 // If there were truly no free SGPRs, we need to undo everything.
1867 if (!TmpScaledReg.isValid()) {
1868 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
1869 .addReg(ScaledReg, RegState::Kill)
1870 .addImm(-Offset);
1871 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1872 .addReg(FrameReg)
1873 .addImm(ST.getWavefrontSizeLog2());
1874 }
1875 }
1876 }
1877
1878 // Don't introduce an extra copy if we're just materializing in a mov.
1879 if (IsCopy)
1880 MI->eraseFromParent();
1881 else
1882 FIOp.ChangeToRegister(ResultReg, false, false, true);
1883 return;
1884 }
1885
1886 if (IsMUBUF) {
1887 // Disable offen so we don't need a 0 vgpr base.
1888 assert(static_cast<int>(FIOperandNum) ==((void)0)
1889 AMDGPU::getNamedOperandIdx(MI->getOpcode(),((void)0)
1890 AMDGPU::OpName::vaddr))((void)0);
1891
1892 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
1893 assert((SOffset.isImm() && SOffset.getImm() == 0))((void)0);
1894
1895 if (FrameReg != AMDGPU::NoRegister)
1896 SOffset.ChangeToRegister(FrameReg, false);
1897
1898 int64_t Offset = FrameInfo.getObjectOffset(Index);
1899 int64_t OldImm
1900 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1901 int64_t NewOffset = OldImm + Offset;
1902
1903 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
1904 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
1905 MI->eraseFromParent();
1906 return;
1907 }
1908 }
1909
1910 // If the offset is simply too big, don't convert to a scratch wave offset
1911 // relative index.
1912
1913 FIOp.ChangeToImmediate(Offset);
1914 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1915 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1916 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1917 .addImm(Offset);
1918 FIOp.ChangeToRegister(TmpReg, false, false, true);
1919 }
1920 }
1921 }
1922}
1923
1924StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
1925 return AMDGPUInstPrinter::getRegisterName(Reg);
1926}
1927
1928static const TargetRegisterClass *
1929getAnyVGPRClassForBitWidth(unsigned BitWidth) {
1930 if (BitWidth <= 64)
1931 return &AMDGPU::VReg_64RegClass;
1932 if (BitWidth <= 96)
1933 return &AMDGPU::VReg_96RegClass;
1934 if (BitWidth <= 128)
1935 return &AMDGPU::VReg_128RegClass;
1936 if (BitWidth <= 160)
1937 return &AMDGPU::VReg_160RegClass;
1938 if (BitWidth <= 192)
1939 return &AMDGPU::VReg_192RegClass;
1940 if (BitWidth <= 224)
1941 return &AMDGPU::VReg_224RegClass;
1942 if (BitWidth <= 256)
1943 return &AMDGPU::VReg_256RegClass;
1944 if (BitWidth <= 512)
1945 return &AMDGPU::VReg_512RegClass;
1946 if (BitWidth <= 1024)
1947 return &AMDGPU::VReg_1024RegClass;
1948
1949 return nullptr;
1950}
1951
1952static const TargetRegisterClass *
1953getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
1954 if (BitWidth <= 64)
1955 return &AMDGPU::VReg_64_Align2RegClass;
1956 if (BitWidth <= 96)
1957 return &AMDGPU::VReg_96_Align2RegClass;
1958 if (BitWidth <= 128)
1959 return &AMDGPU::VReg_128_Align2RegClass;
1960 if (BitWidth <= 160)
1961 return &AMDGPU::VReg_160_Align2RegClass;
1962 if (BitWidth <= 192)
1963 return &AMDGPU::VReg_192_Align2RegClass;
1964 if (BitWidth <= 224)
1965 return &AMDGPU::VReg_224_Align2RegClass;
1966 if (BitWidth <= 256)
1967 return &AMDGPU::VReg_256_Align2RegClass;
1968 if (BitWidth <= 512)
1969 return &AMDGPU::VReg_512_Align2RegClass;
1970 if (BitWidth <= 1024)
1971 return &AMDGPU::VReg_1024_Align2RegClass;
1972
1973 return nullptr;
1974}
1975
1976const TargetRegisterClass *
1977SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
1978 if (BitWidth == 1)
1979 return &AMDGPU::VReg_1RegClass;
1980 if (BitWidth <= 16)
1981 return &AMDGPU::VGPR_LO16RegClass;
1982 if (BitWidth <= 32)
1983 return &AMDGPU::VGPR_32RegClass;
1984 return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
1985 : getAnyVGPRClassForBitWidth(BitWidth);
1986}
1987
1988static const TargetRegisterClass *
1989getAnyAGPRClassForBitWidth(unsigned BitWidth) {
1990 if (BitWidth <= 64)
1991 return &AMDGPU::AReg_64RegClass;
1992 if (BitWidth <= 96)
1993 return &AMDGPU::AReg_96RegClass;
1994 if (BitWidth <= 128)
1995 return &AMDGPU::AReg_128RegClass;
1996 if (BitWidth <= 160)
1997 return &AMDGPU::AReg_160RegClass;
1998 if (BitWidth <= 192)
1999 return &AMDGPU::AReg_192RegClass;
2000 if (BitWidth <= 224)
2001 return &AMDGPU::AReg_224RegClass;
2002 if (BitWidth <= 256)
2003 return &AMDGPU::AReg_256RegClass;
2004 if (BitWidth <= 512)
2005 return &AMDGPU::AReg_512RegClass;
2006 if (BitWidth <= 1024)
2007 return &AMDGPU::AReg_1024RegClass;
2008
2009 return nullptr;
2010}
2011
2012static const TargetRegisterClass *
2013getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
2014 if (BitWidth <= 64)
2015 return &AMDGPU::AReg_64_Align2RegClass;
2016 if (BitWidth <= 96)
2017 return &AMDGPU::AReg_96_Align2RegClass;
2018 if (BitWidth <= 128)
2019 return &AMDGPU::AReg_128_Align2RegClass;
2020 if (BitWidth <= 160)
2021 return &AMDGPU::AReg_160_Align2RegClass;
2022 if (BitWidth <= 192)
2023 return &AMDGPU::AReg_192_Align2RegClass;
2024 if (BitWidth <= 224)
2025 return &AMDGPU::AReg_224_Align2RegClass;
2026 if (BitWidth <= 256)
2027 return &AMDGPU::AReg_256_Align2RegClass;
2028 if (BitWidth <= 512)
2029 return &AMDGPU::AReg_512_Align2RegClass;
2030 if (BitWidth <= 1024)
2031 return &AMDGPU::AReg_1024_Align2RegClass;
2032
2033 return nullptr;
2034}
2035
2036const TargetRegisterClass *
2037SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
2038 if (BitWidth <= 16)
2039 return &AMDGPU::AGPR_LO16RegClass;
2040 if (BitWidth <= 32)
2041 return &AMDGPU::AGPR_32RegClass;
2042 return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth)
2043 : getAnyAGPRClassForBitWidth(BitWidth);
2044}
2045
2046const TargetRegisterClass *
2047SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
2048 if (BitWidth <= 16)
2049 return &AMDGPU::SGPR_LO16RegClass;
2050 if (BitWidth <= 32)
2051 return &AMDGPU::SReg_32RegClass;
2052 if (BitWidth <= 64)
2053 return &AMDGPU::SReg_64RegClass;
2054 if (BitWidth <= 96)
2055 return &AMDGPU::SGPR_96RegClass;
2056 if (BitWidth <= 128)
2057 return &AMDGPU::SGPR_128RegClass;
2058 if (BitWidth <= 160)
2059 return &AMDGPU::SGPR_160RegClass;
2060 if (BitWidth <= 192)
2061 return &AMDGPU::SGPR_192RegClass;
2062 if (BitWidth <= 224)
2063 return &AMDGPU::SGPR_224RegClass;
2064 if (BitWidth <= 256)
2065 return &AMDGPU::SGPR_256RegClass;
2066 if (BitWidth <= 512)
2067 return &AMDGPU::SGPR_512RegClass;
2068 if (BitWidth <= 1024)
2069 return &AMDGPU::SGPR_1024RegClass;
2070
2071 return nullptr;
2072}
2073
2074// FIXME: This is very slow. It might be worth creating a map from physreg to
2075// register class.
2076const TargetRegisterClass *
2077SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
2078 static const TargetRegisterClass *const BaseClasses[] = {
2079 &AMDGPU::VGPR_LO16RegClass,
2080 &AMDGPU::VGPR_HI16RegClass,
2081 &AMDGPU::SReg_LO16RegClass,
2082 &AMDGPU::AGPR_LO16RegClass,
2083 &AMDGPU::VGPR_32RegClass,
2084 &AMDGPU::SReg_32RegClass,
2085 &AMDGPU::AGPR_32RegClass,
2086 &AMDGPU::AGPR_32RegClass,
2087 &AMDGPU::VReg_64_Align2RegClass,
2088 &AMDGPU::VReg_64RegClass,
2089 &AMDGPU::SReg_64RegClass,
2090 &AMDGPU::AReg_64_Align2RegClass,
2091 &AMDGPU::AReg_64RegClass,
2092 &AMDGPU::VReg_96_Align2RegClass,
2093 &AMDGPU::VReg_96RegClass,
2094 &AMDGPU::SReg_96RegClass,
2095 &AMDGPU::AReg_96_Align2RegClass,
2096 &AMDGPU::AReg_96RegClass,
2097 &AMDGPU::VReg_128_Align2RegClass,
2098 &AMDGPU::VReg_128RegClass,
2099 &AMDGPU::SReg_128RegClass,
2100 &AMDGPU::AReg_128_Align2RegClass,
2101 &AMDGPU::AReg_128RegClass,
2102 &AMDGPU::VReg_160_Align2RegClass,
2103 &AMDGPU::VReg_160RegClass,
2104 &AMDGPU::SReg_160RegClass,
2105 &AMDGPU::AReg_160_Align2RegClass,
2106 &AMDGPU::AReg_160RegClass,
2107 &AMDGPU::VReg_192_Align2RegClass,
2108 &AMDGPU::VReg_192RegClass,
2109 &AMDGPU::SReg_192RegClass,
2110 &AMDGPU::AReg_192_Align2RegClass,
2111 &AMDGPU::AReg_192RegClass,
2112 &AMDGPU::VReg_224_Align2RegClass,
2113 &AMDGPU::VReg_224RegClass,
2114 &AMDGPU::SReg_224RegClass,
2115 &AMDGPU::AReg_224_Align2RegClass,
2116 &AMDGPU::AReg_224RegClass,
2117 &AMDGPU::VReg_256_Align2RegClass,
2118 &AMDGPU::VReg_256RegClass,
2119 &AMDGPU::SReg_256RegClass,
2120 &AMDGPU::AReg_256_Align2RegClass,
2121 &AMDGPU::AReg_256RegClass,
2122 &AMDGPU::VReg_512_Align2RegClass,
2123 &AMDGPU::VReg_512RegClass,
2124 &AMDGPU::SReg_512RegClass,
2125 &AMDGPU::AReg_512_Align2RegClass,
2126 &AMDGPU::AReg_512RegClass,
2127 &AMDGPU::SReg_1024RegClass,
2128 &AMDGPU::VReg_1024_Align2RegClass,
2129 &AMDGPU::VReg_1024RegClass,
2130 &AMDGPU::AReg_1024_Align2RegClass,
2131 &AMDGPU::AReg_1024RegClass,
2132 &AMDGPU::SCC_CLASSRegClass,
2133 &AMDGPU::Pseudo_SReg_32RegClass,
2134 &AMDGPU::Pseudo_SReg_128RegClass,
2135 };
2136
2137 for (const TargetRegisterClass *BaseClass : BaseClasses) {
2138 if (BaseClass->contains(Reg)) {
2139 return BaseClass;
2140 }
2141 }
2142 return nullptr;
2143}
2144
2145bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
2146 Register Reg) const {
2147 const TargetRegisterClass *RC;
2148 if (Reg.isVirtual())
2149 RC = MRI.getRegClass(Reg);
2150 else
2151 RC = getPhysRegClass(Reg);
2152 return isSGPRClass(RC);
2153}
2154
2155// TODO: It might be helpful to have some target specific flags in
2156// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
2157bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
2158 unsigned Size = getRegSizeInBits(*RC);
2159 if (Size == 16) {
2160 return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
2161 getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
2162 }
2163 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2164 if (!VRC) {
2165 assert(Size < 32 && "Invalid register class size")((void)0);
2166 return false;
2167 }
2168 return getCommonSubClass(VRC, RC) != nullptr;
2169}
2170
2171bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
2172 unsigned Size = getRegSizeInBits(*RC);
2173 if (Size < 16)
2174 return false;
2175 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2176 if (!ARC) {
2177 assert(getVGPRClassForBitWidth(Size) && "Invalid register class size")((void)0);
2178 return false;
2179 }
2180 return getCommonSubClass(ARC, RC) != nullptr;
2181}
2182
2183const TargetRegisterClass *
2184SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
2185 unsigned Size = getRegSizeInBits(*SRC);
2186 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
2187 assert(VRC && "Invalid register class size")((void)0);
2188 return VRC;
2189}
2190
2191const TargetRegisterClass *
2192SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
2193 unsigned Size = getRegSizeInBits(*SRC);
2194 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
2195 assert(ARC && "Invalid register class size")((void)0);
2196 return ARC;
2197}
2198
2199const TargetRegisterClass *
2200SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
2201 unsigned Size = getRegSizeInBits(*VRC);
2202 if (Size == 32)
2203 return &AMDGPU::SGPR_32RegClass;
2204 const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
2205 assert(SRC && "Invalid register class size")((void)0);
2206 return SRC;
2207}
2208
2209const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
2210 const TargetRegisterClass *RC, unsigned SubIdx) const {
2211 if (SubIdx == AMDGPU::NoSubRegister)
2212 return RC;
2213
2214 // We can assume that each lane corresponds to one 32-bit register.
2215 unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
2216 if (isSGPRClass(RC)) {
2217 if (Size == 32)
2218 RC = &AMDGPU::SGPR_32RegClass;
2219 else
2220 RC = getSGPRClassForBitWidth(Size);
2221 } else if (hasAGPRs(RC)) {
2222 RC = getAGPRClassForBitWidth(Size);
2223 } else {
2224 RC = getVGPRClassForBitWidth(Size);
2225 }
2226 assert(RC && "Invalid sub-register class size")((void)0);
2227 return RC;
2228}
2229
2230const TargetRegisterClass *
2231SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
2232 const TargetRegisterClass *SubRC,
2233 unsigned SubIdx) const {
2234 // Ensure this subregister index is aligned in the super register.
2235 const TargetRegisterClass *MatchRC =
2236 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2237 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2238}
2239
2240bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2241 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2242 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
2243 return !ST.hasMFMAInlineLiteralBug();
2244
2245 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2246 OpType <= AMDGPU::OPERAND_SRC_LAST;
2247}
2248
2249bool SIRegisterInfo::shouldRewriteCopySrc(
2250 const TargetRegisterClass *DefRC,
2251 unsigned DefSubReg,
2252 const TargetRegisterClass *SrcRC,
2253 unsigned SrcSubReg) const {
2254 // We want to prefer the smallest register class possible, so we don't want to
2255 // stop and rewrite on anything that looks like a subregister
2256 // extract. Operations mostly don't care about the super register class, so we
2257 // only want to stop on the most basic of copies between the same register
2258 // class.
2259 //
2260 // e.g. if we have something like
2261 // %0 = ...
2262 // %1 = ...
2263 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2264 // %3 = COPY %2, sub0
2265 //
2266 // We want to look through the COPY to find:
2267 // => %3 = COPY %0
2268
2269 // Plain copy.
2270 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2271}
2272
2273bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2274 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2275 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2276 OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
2277}
2278
2279/// Returns a lowest register that is not used at any point in the function.
2280/// If all registers are used, then this function will return
2281/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
2282/// highest unused register.
2283MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
2284 const TargetRegisterClass *RC,
2285 const MachineFunction &MF,
2286 bool ReserveHighestVGPR) const {
2287 if (ReserveHighestVGPR) {
2288 for (MCRegister Reg : reverse(*RC))
2289 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2290 return Reg;
2291 } else {
2292 for (MCRegister Reg : *RC)
2293 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2294 return Reg;
2295 }
2296 return MCRegister();
2297}
2298
2299ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
2300 unsigned EltSize) const {
2301 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
2302 assert(RegBitWidth >= 32 && RegBitWidth <= 1024)((void)0);
2303
2304 const unsigned RegDWORDs = RegBitWidth / 32;
2305 const unsigned EltDWORDs = EltSize / 4;
2306 assert(RegSplitParts.size() + 1 >= EltDWORDs)((void)0);
2307
2308 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2309 const unsigned NumParts = RegDWORDs / EltDWORDs;
2310
2311 return makeArrayRef(Parts.data(), NumParts);
2312}
2313
2314const TargetRegisterClass*
2315SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
2316 Register Reg) const {
2317 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
2318}
2319
2320bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
2321 Register Reg) const {
2322 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2323 // Registers without classes are unaddressable, SGPR-like registers.
2324 return RC && hasVGPRs(RC);
2325}
2326
2327bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
2328 Register Reg) const {
2329 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2330
2331 // Registers without classes are unaddressable, SGPR-like registers.
2332 return RC && hasAGPRs(RC);
2333}
2334
2335bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
2336 const TargetRegisterClass *SrcRC,
2337 unsigned SubReg,
2338 const TargetRegisterClass *DstRC,
2339 unsigned DstSubReg,
2340 const TargetRegisterClass *NewRC,
2341 LiveIntervals &LIS) const {
2342 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2343 unsigned DstSize = getRegSizeInBits(*DstRC);
2344 unsigned NewSize = getRegSizeInBits(*NewRC);
2345
2346 // Do not increase size of registers beyond dword, we would need to allocate
2347 // adjacent registers and constraint regalloc more than needed.
2348
2349 // Always allow dword coalescing.
2350 if (SrcSize <= 32 || DstSize <= 32)
2351 return true;
2352
2353 return NewSize <= DstSize || NewSize <= SrcSize;
2354}
2355
2356unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
2357 MachineFunction &MF) const {
2358 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2359
2360 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
2361 MF.getFunction());
2362 switch (RC->getID()) {
2363 default:
2364 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2365 case AMDGPU::VGPR_32RegClassID:
2366 case AMDGPU::VGPR_LO16RegClassID:
2367 case AMDGPU::VGPR_HI16RegClassID:
2368 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
2369 case AMDGPU::SGPR_32RegClassID:
2370 case AMDGPU::SGPR_LO16RegClassID:
2371 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
2372 }
2373}
2374
2375unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
2376 unsigned Idx) const {
2377 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2378 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2379 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
2380 const_cast<MachineFunction &>(MF));
2381
2382 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2383 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
2384 const_cast<MachineFunction &>(MF));
2385
2386 llvm_unreachable("Unexpected register pressure set!")__builtin_unreachable();
2387}
2388
2389const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
2390 static const int Empty[] = { -1 };
2391
2392 if (RegPressureIgnoredUnits[RegUnit])
2393 return Empty;
2394
2395 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2396}
2397
2398MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
2399 // Not a callee saved register.
2400 return AMDGPU::SGPR30_SGPR31;
2401}
2402
2403const TargetRegisterClass *
2404SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
2405 const RegisterBank &RB,
2406 const MachineRegisterInfo &MRI) const {
2407 switch (RB.getID()) {
2408 case AMDGPU::VGPRRegBankID:
2409 return getVGPRClassForBitWidth(std::max(32u, Size));
2410 case AMDGPU::VCCRegBankID:
2411 assert(Size == 1)((void)0);
2412 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2413 : &AMDGPU::SReg_64_XEXECRegClass;
2414 case AMDGPU::SGPRRegBankID:
2415 return getSGPRClassForBitWidth(std::max(32u, Size));
2416 case AMDGPU::AGPRRegBankID:
2417 return getAGPRClassForBitWidth(std::max(32u, Size));
2418 default:
2419 llvm_unreachable("unknown register bank")__builtin_unreachable();
2420 }
2421}
2422
2423const TargetRegisterClass *
2424SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
2425 const MachineRegisterInfo &MRI) const {
2426 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
2427 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
2428 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
2429
2430 const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
2431 return getAllocatableClass(RC);
2432}
2433
2434MCRegister SIRegisterInfo::getVCC() const {
2435 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
2436}
2437
2438const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const {
2439 // VGPR tuples have an alignment requirement on gfx90a variants.
2440 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
2441 : &AMDGPU::VReg_64RegClass;
2442}
2443
2444const TargetRegisterClass *
2445SIRegisterInfo::getRegClass(unsigned RCID) const {
2446 switch ((int)RCID) {
2447 case AMDGPU::SReg_1RegClassID:
2448 return getBoolRC();
2449 case AMDGPU::SReg_1_XEXECRegClassID:
2450 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
2451 : &AMDGPU::SReg_64_XEXECRegClass;
2452 case -1:
2453 return nullptr;
2454 default:
2455 return AMDGPUGenRegisterInfo::getRegClass(RCID);
2456 }
2457}
2458
2459// Find reaching register definition
2460MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
2461 MachineInstr &Use,
2462 MachineRegisterInfo &MRI,
2463 LiveIntervals *LIS) const {
2464 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
2465 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
2466 SlotIndex DefIdx;
2467
2468 if (Reg.isVirtual()) {
2469 if (!LIS->hasInterval(Reg))
2470 return nullptr;
2471 LiveInterval &LI = LIS->getInterval(Reg);
2472 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
2473 : MRI.getMaxLaneMaskForVReg(Reg);
2474 VNInfo *V = nullptr;
2475 if (LI.hasSubRanges()) {
2476 for (auto &S : LI.subranges()) {
2477 if ((S.LaneMask & SubLanes) == SubLanes) {
2478 V = S.getVNInfoAt(UseIdx);
2479 break;
2480 }
2481 }
2482 } else {
2483 V = LI.getVNInfoAt(UseIdx);
2484 }
2485 if (!V)
2486 return nullptr;
2487 DefIdx = V->def;
2488 } else {
2489 // Find last def.
2490 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
2491 ++Units) {
2492 LiveRange &LR = LIS->getRegUnit(*Units);
2493 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2494 if (!DefIdx.isValid() ||
2495 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2496 LIS->getInstructionFromIndex(V->def)))
2497 DefIdx = V->def;
2498 } else {
2499 return nullptr;
2500 }
2501 }
2502 }
2503
2504 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2505
2506 if (!Def || !MDT.dominates(Def, &Use))
2507 return nullptr;
2508
2509 assert(Def->modifiesRegister(Reg, this))((void)0);
2510
2511 return Def;
2512}
2513
2514MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
2515 assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32)((void)0);
2516
2517 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
2518 AMDGPU::SReg_32RegClass,
2519 AMDGPU::AGPR_32RegClass } ) {
2520 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
2521 return Super;
2522 }
2523 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
2524 &AMDGPU::VGPR_32RegClass)) {
2525 return Super;
2526 }
2527
2528 return AMDGPU::NoRegister;
2529}
2530
2531bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
2532 if (!ST.needsAlignedVGPRs())
2533 return true;
2534
2535 if (hasVGPRs(&RC))
2536 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
2537 if (hasAGPRs(&RC))
2538 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
2539
2540 return true;
2541}
2542
2543bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
2544 switch (PhysReg) {
2545 case AMDGPU::SGPR_NULL:
2546 case AMDGPU::SRC_SHARED_BASE:
2547 case AMDGPU::SRC_PRIVATE_BASE:
2548 case AMDGPU::SRC_SHARED_LIMIT:
2549 case AMDGPU::SRC_PRIVATE_LIMIT:
2550 return true;
2551 default:
2552 return false;
2553 }
2554}
2555
2556ArrayRef<MCPhysReg>
2557SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const {
2558 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
2559 ST.getMaxNumSGPRs(MF) / 4);
2560}
2561
2562ArrayRef<MCPhysReg>
2563SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const {
2564 return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(),
2565 ST.getMaxNumSGPRs(MF) / 2);
2566}
2567
2568ArrayRef<MCPhysReg>
2569SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
2570 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
2571}

/usr/include/c++/v1/__algorithm/min.h

1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef _LIBCPP___ALGORITHM_MIN_H
10#define _LIBCPP___ALGORITHM_MIN_H
11
12#include <__config>
13#include <__algorithm/comp.h>
14#include <__algorithm/min_element.h>
15#include <initializer_list>
16
17#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
18#pragma GCC system_header
19#endif
20
21_LIBCPP_PUSH_MACROSpush_macro("min") push_macro("max")
22#include <__undef_macros>
23
24_LIBCPP_BEGIN_NAMESPACE_STDnamespace std { inline namespace __1 {
25
26template <class _Tp, class _Compare>
27_LIBCPP_NODISCARD_EXT inline
28_LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__
))
_LIBCPP_CONSTEXPR_AFTER_CXX11constexpr
29const _Tp&
30min(const _Tp& __a, const _Tp& __b, _Compare __comp)
31{
32 return __comp(__b, __a) ? __b : __a;
16
'?' condition is false
17
Returning the value 64 (reference to 'Data.PerVGPR')
33}
34
35template <class _Tp>
36_LIBCPP_NODISCARD_EXT inline
37_LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__
))
_LIBCPP_CONSTEXPR_AFTER_CXX11constexpr
38const _Tp&
39min(const _Tp& __a, const _Tp& __b)
40{
41 return _VSTDstd::__1::min(__a, __b, __less<_Tp>());
14
Passing '__a' via 1st parameter '__a'
15
Calling 'min<unsigned int, std::__less<unsigned int>>'
18
Returning from 'min<unsigned int, std::__less<unsigned int>>'
19
Returning the value 64 (reference to 'Data.PerVGPR')
42}
43
44#ifndef _LIBCPP_CXX03_LANG
45
46template<class _Tp, class _Compare>
47_LIBCPP_NODISCARD_EXT inline
48_LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__
))
_LIBCPP_CONSTEXPR_AFTER_CXX11constexpr
49_Tp
50min(initializer_list<_Tp> __t, _Compare __comp)
51{
52 return *_VSTDstd::__1::min_element(__t.begin(), __t.end(), __comp);
53}
54
55template<class _Tp>
56_LIBCPP_NODISCARD_EXT inline
57_LIBCPP_INLINE_VISIBILITY__attribute__ ((__visibility__("hidden"))) __attribute__ ((__exclude_from_explicit_instantiation__
))
_LIBCPP_CONSTEXPR_AFTER_CXX11constexpr
58_Tp
59min(initializer_list<_Tp> __t)
60{
61 return *_VSTDstd::__1::min_element(__t.begin(), __t.end(), __less<_Tp>());
62}
63
64#endif // _LIBCPP_CXX03_LANG
65
66_LIBCPP_END_NAMESPACE_STD} }
67
68_LIBCPP_POP_MACROSpop_macro("min") pop_macro("max")
69
70#endif // _LIBCPP___ALGORITHM_MIN_H