File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/MachineScheduler.cpp |
Warning: | line 3629, column 12 Access to field 'isScheduled' results in a dereference of a null pointer (loaded from variable 'SU') |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // MachineScheduler schedules machine instructions after phi elimination. It | ||||||||
10 | // preserves LiveIntervals so it can be invoked before register allocation. | ||||||||
11 | // | ||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||
13 | |||||||||
14 | #include "llvm/CodeGen/MachineScheduler.h" | ||||||||
15 | #include "llvm/ADT/ArrayRef.h" | ||||||||
16 | #include "llvm/ADT/BitVector.h" | ||||||||
17 | #include "llvm/ADT/DenseMap.h" | ||||||||
18 | #include "llvm/ADT/PriorityQueue.h" | ||||||||
19 | #include "llvm/ADT/STLExtras.h" | ||||||||
20 | #include "llvm/ADT/SmallVector.h" | ||||||||
21 | #include "llvm/ADT/Statistic.h" | ||||||||
22 | #include "llvm/ADT/iterator_range.h" | ||||||||
23 | #include "llvm/Analysis/AliasAnalysis.h" | ||||||||
24 | #include "llvm/CodeGen/LiveInterval.h" | ||||||||
25 | #include "llvm/CodeGen/LiveIntervals.h" | ||||||||
26 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||||||||
27 | #include "llvm/CodeGen/MachineDominators.h" | ||||||||
28 | #include "llvm/CodeGen/MachineFunction.h" | ||||||||
29 | #include "llvm/CodeGen/MachineFunctionPass.h" | ||||||||
30 | #include "llvm/CodeGen/MachineInstr.h" | ||||||||
31 | #include "llvm/CodeGen/MachineLoopInfo.h" | ||||||||
32 | #include "llvm/CodeGen/MachineOperand.h" | ||||||||
33 | #include "llvm/CodeGen/MachinePassRegistry.h" | ||||||||
34 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||||||
35 | #include "llvm/CodeGen/Passes.h" | ||||||||
36 | #include "llvm/CodeGen/RegisterClassInfo.h" | ||||||||
37 | #include "llvm/CodeGen/RegisterPressure.h" | ||||||||
38 | #include "llvm/CodeGen/ScheduleDAG.h" | ||||||||
39 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" | ||||||||
40 | #include "llvm/CodeGen/ScheduleDAGMutation.h" | ||||||||
41 | #include "llvm/CodeGen/ScheduleDFS.h" | ||||||||
42 | #include "llvm/CodeGen/ScheduleHazardRecognizer.h" | ||||||||
43 | #include "llvm/CodeGen/SlotIndexes.h" | ||||||||
44 | #include "llvm/CodeGen/TargetFrameLowering.h" | ||||||||
45 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||||||
46 | #include "llvm/CodeGen/TargetLowering.h" | ||||||||
47 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||||||
48 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||||||||
49 | #include "llvm/CodeGen/TargetSchedule.h" | ||||||||
50 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||||||
51 | #include "llvm/Config/llvm-config.h" | ||||||||
52 | #include "llvm/InitializePasses.h" | ||||||||
53 | #include "llvm/MC/LaneBitmask.h" | ||||||||
54 | #include "llvm/Pass.h" | ||||||||
55 | #include "llvm/Support/CommandLine.h" | ||||||||
56 | #include "llvm/Support/Compiler.h" | ||||||||
57 | #include "llvm/Support/Debug.h" | ||||||||
58 | #include "llvm/Support/ErrorHandling.h" | ||||||||
59 | #include "llvm/Support/GraphWriter.h" | ||||||||
60 | #include "llvm/Support/MachineValueType.h" | ||||||||
61 | #include "llvm/Support/raw_ostream.h" | ||||||||
62 | #include <algorithm> | ||||||||
63 | #include <cassert> | ||||||||
64 | #include <cstdint> | ||||||||
65 | #include <iterator> | ||||||||
66 | #include <limits> | ||||||||
67 | #include <memory> | ||||||||
68 | #include <string> | ||||||||
69 | #include <tuple> | ||||||||
70 | #include <utility> | ||||||||
71 | #include <vector> | ||||||||
72 | |||||||||
73 | using namespace llvm; | ||||||||
74 | |||||||||
75 | #define DEBUG_TYPE"machine-scheduler" "machine-scheduler" | ||||||||
76 | |||||||||
77 | STATISTIC(NumClustered, "Number of load/store pairs clustered")static llvm::Statistic NumClustered = {"machine-scheduler", "NumClustered" , "Number of load/store pairs clustered"}; | ||||||||
78 | |||||||||
79 | namespace llvm { | ||||||||
80 | |||||||||
81 | cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, | ||||||||
82 | cl::desc("Force top-down list scheduling")); | ||||||||
83 | cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, | ||||||||
84 | cl::desc("Force bottom-up list scheduling")); | ||||||||
85 | cl::opt<bool> | ||||||||
86 | DumpCriticalPathLength("misched-dcpl", cl::Hidden, | ||||||||
87 | cl::desc("Print critical path length to stdout")); | ||||||||
88 | |||||||||
89 | cl::opt<bool> VerifyScheduling( | ||||||||
90 | "verify-misched", cl::Hidden, | ||||||||
91 | cl::desc("Verify machine instrs before and after machine scheduling")); | ||||||||
92 | |||||||||
93 | } // end namespace llvm | ||||||||
94 | |||||||||
95 | #ifndef NDEBUG1 | ||||||||
96 | static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, | ||||||||
97 | cl::desc("Pop up a window to show MISched dags after they are processed")); | ||||||||
98 | |||||||||
99 | /// In some situations a few uninteresting nodes depend on nearly all other | ||||||||
100 | /// nodes in the graph, provide a cutoff to hide them. | ||||||||
101 | static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, | ||||||||
102 | cl::desc("Hide nodes with more predecessor/successor than cutoff")); | ||||||||
103 | |||||||||
104 | static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, | ||||||||
105 | cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); | ||||||||
106 | |||||||||
107 | static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, | ||||||||
108 | cl::desc("Only schedule this function")); | ||||||||
109 | static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, | ||||||||
110 | cl::desc("Only schedule this MBB#")); | ||||||||
111 | static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, | ||||||||
112 | cl::desc("Print schedule DAGs")); | ||||||||
113 | #else | ||||||||
114 | static const bool ViewMISchedDAGs = false; | ||||||||
115 | static const bool PrintDAGs = false; | ||||||||
116 | #endif // NDEBUG | ||||||||
117 | |||||||||
118 | /// Avoid quadratic complexity in unusually large basic blocks by limiting the | ||||||||
119 | /// size of the ready lists. | ||||||||
120 | static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, | ||||||||
121 | cl::desc("Limit ready list to N instructions"), cl::init(256)); | ||||||||
122 | |||||||||
123 | static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, | ||||||||
124 | cl::desc("Enable register pressure scheduling."), cl::init(true)); | ||||||||
125 | |||||||||
126 | static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, | ||||||||
127 | cl::desc("Enable cyclic critical path analysis."), cl::init(true)); | ||||||||
128 | |||||||||
129 | static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, | ||||||||
130 | cl::desc("Enable memop clustering."), | ||||||||
131 | cl::init(true)); | ||||||||
132 | static cl::opt<bool> | ||||||||
133 | ForceFastCluster("force-fast-cluster", cl::Hidden, | ||||||||
134 | cl::desc("Switch to fast cluster algorithm with the lost " | ||||||||
135 | "of some fusion opportunities"), | ||||||||
136 | cl::init(false)); | ||||||||
137 | static cl::opt<unsigned> | ||||||||
138 | FastClusterThreshold("fast-cluster-threshold", cl::Hidden, | ||||||||
139 | cl::desc("The threshold for fast cluster"), | ||||||||
140 | cl::init(1000)); | ||||||||
141 | |||||||||
142 | // DAG subtrees must have at least this many nodes. | ||||||||
143 | static const unsigned MinSubtreeSize = 8; | ||||||||
144 | |||||||||
145 | // Pin the vtables to this file. | ||||||||
146 | void MachineSchedStrategy::anchor() {} | ||||||||
147 | |||||||||
148 | void ScheduleDAGMutation::anchor() {} | ||||||||
149 | |||||||||
150 | //===----------------------------------------------------------------------===// | ||||||||
151 | // Machine Instruction Scheduling Pass and Registry | ||||||||
152 | //===----------------------------------------------------------------------===// | ||||||||
153 | |||||||||
154 | MachineSchedContext::MachineSchedContext() { | ||||||||
155 | RegClassInfo = new RegisterClassInfo(); | ||||||||
156 | } | ||||||||
157 | |||||||||
158 | MachineSchedContext::~MachineSchedContext() { | ||||||||
159 | delete RegClassInfo; | ||||||||
160 | } | ||||||||
161 | |||||||||
162 | namespace { | ||||||||
163 | |||||||||
164 | /// Base class for a machine scheduler class that can run at any point. | ||||||||
165 | class MachineSchedulerBase : public MachineSchedContext, | ||||||||
166 | public MachineFunctionPass { | ||||||||
167 | public: | ||||||||
168 | MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} | ||||||||
169 | |||||||||
170 | void print(raw_ostream &O, const Module* = nullptr) const override; | ||||||||
171 | |||||||||
172 | protected: | ||||||||
173 | void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); | ||||||||
174 | }; | ||||||||
175 | |||||||||
176 | /// MachineScheduler runs after coalescing and before register allocation. | ||||||||
177 | class MachineScheduler : public MachineSchedulerBase { | ||||||||
178 | public: | ||||||||
179 | MachineScheduler(); | ||||||||
180 | |||||||||
181 | void getAnalysisUsage(AnalysisUsage &AU) const override; | ||||||||
182 | |||||||||
183 | bool runOnMachineFunction(MachineFunction&) override; | ||||||||
184 | |||||||||
185 | static char ID; // Class identification, replacement for typeinfo | ||||||||
186 | |||||||||
187 | protected: | ||||||||
188 | ScheduleDAGInstrs *createMachineScheduler(); | ||||||||
189 | }; | ||||||||
190 | |||||||||
191 | /// PostMachineScheduler runs after shortly before code emission. | ||||||||
192 | class PostMachineScheduler : public MachineSchedulerBase { | ||||||||
193 | public: | ||||||||
194 | PostMachineScheduler(); | ||||||||
195 | |||||||||
196 | void getAnalysisUsage(AnalysisUsage &AU) const override; | ||||||||
197 | |||||||||
198 | bool runOnMachineFunction(MachineFunction&) override; | ||||||||
199 | |||||||||
200 | static char ID; // Class identification, replacement for typeinfo | ||||||||
201 | |||||||||
202 | protected: | ||||||||
203 | ScheduleDAGInstrs *createPostMachineScheduler(); | ||||||||
204 | }; | ||||||||
205 | |||||||||
206 | } // end anonymous namespace | ||||||||
207 | |||||||||
208 | char MachineScheduler::ID = 0; | ||||||||
209 | |||||||||
210 | char &llvm::MachineSchedulerID = MachineScheduler::ID; | ||||||||
211 | |||||||||
212 | INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,static void *initializeMachineSchedulerPassOnce(PassRegistry & Registry) { | ||||||||
213 | "Machine Instruction Scheduler", false, false)static void *initializeMachineSchedulerPassOnce(PassRegistry & Registry) { | ||||||||
214 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | ||||||||
215 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)initializeMachineDominatorTreePass(Registry); | ||||||||
216 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry); | ||||||||
217 | INITIALIZE_PASS_DEPENDENCY(SlotIndexes)initializeSlotIndexesPass(Registry); | ||||||||
218 | INITIALIZE_PASS_DEPENDENCY(LiveIntervals)initializeLiveIntervalsPass(Registry); | ||||||||
219 | INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Machine Instruction Scheduler", "machine-scheduler", &MachineScheduler::ID, PassInfo::NormalCtor_t (callDefaultCtor<MachineScheduler>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeMachineSchedulerPassFlag; void llvm::initializeMachineSchedulerPass (PassRegistry &Registry) { llvm::call_once(InitializeMachineSchedulerPassFlag , initializeMachineSchedulerPassOnce, std::ref(Registry)); } | ||||||||
220 | "Machine Instruction Scheduler", false, false)PassInfo *PI = new PassInfo( "Machine Instruction Scheduler", "machine-scheduler", &MachineScheduler::ID, PassInfo::NormalCtor_t (callDefaultCtor<MachineScheduler>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeMachineSchedulerPassFlag; void llvm::initializeMachineSchedulerPass (PassRegistry &Registry) { llvm::call_once(InitializeMachineSchedulerPassFlag , initializeMachineSchedulerPassOnce, std::ref(Registry)); } | ||||||||
221 | |||||||||
222 | MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { | ||||||||
223 | initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); | ||||||||
224 | } | ||||||||
225 | |||||||||
226 | void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { | ||||||||
227 | AU.setPreservesCFG(); | ||||||||
228 | AU.addRequired<MachineDominatorTree>(); | ||||||||
229 | AU.addRequired<MachineLoopInfo>(); | ||||||||
230 | AU.addRequired<AAResultsWrapperPass>(); | ||||||||
231 | AU.addRequired<TargetPassConfig>(); | ||||||||
232 | AU.addRequired<SlotIndexes>(); | ||||||||
233 | AU.addPreserved<SlotIndexes>(); | ||||||||
234 | AU.addRequired<LiveIntervals>(); | ||||||||
235 | AU.addPreserved<LiveIntervals>(); | ||||||||
236 | MachineFunctionPass::getAnalysisUsage(AU); | ||||||||
237 | } | ||||||||
238 | |||||||||
239 | char PostMachineScheduler::ID = 0; | ||||||||
240 | |||||||||
241 | char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; | ||||||||
242 | |||||||||
243 | INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",static void *initializePostMachineSchedulerPassOnce(PassRegistry &Registry) { | ||||||||
244 | "PostRA Machine Instruction Scheduler", false, false)static void *initializePostMachineSchedulerPassOnce(PassRegistry &Registry) { | ||||||||
245 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)initializeMachineDominatorTreePass(Registry); | ||||||||
246 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)initializeMachineLoopInfoPass(Registry); | ||||||||
247 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | ||||||||
248 | INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",PassInfo *PI = new PassInfo( "PostRA Machine Instruction Scheduler" , "postmisched", &PostMachineScheduler::ID, PassInfo::NormalCtor_t (callDefaultCtor<PostMachineScheduler>), false, false); Registry.registerPass(*PI, true); return PI; } static llvm:: once_flag InitializePostMachineSchedulerPassFlag; void llvm:: initializePostMachineSchedulerPass(PassRegistry &Registry ) { llvm::call_once(InitializePostMachineSchedulerPassFlag, initializePostMachineSchedulerPassOnce , std::ref(Registry)); } | ||||||||
249 | "PostRA Machine Instruction Scheduler", false, false)PassInfo *PI = new PassInfo( "PostRA Machine Instruction Scheduler" , "postmisched", &PostMachineScheduler::ID, PassInfo::NormalCtor_t (callDefaultCtor<PostMachineScheduler>), false, false); Registry.registerPass(*PI, true); return PI; } static llvm:: once_flag InitializePostMachineSchedulerPassFlag; void llvm:: initializePostMachineSchedulerPass(PassRegistry &Registry ) { llvm::call_once(InitializePostMachineSchedulerPassFlag, initializePostMachineSchedulerPassOnce , std::ref(Registry)); } | ||||||||
250 | |||||||||
251 | PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { | ||||||||
252 | initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); | ||||||||
253 | } | ||||||||
254 | |||||||||
255 | void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { | ||||||||
256 | AU.setPreservesCFG(); | ||||||||
257 | AU.addRequired<MachineDominatorTree>(); | ||||||||
258 | AU.addRequired<MachineLoopInfo>(); | ||||||||
259 | AU.addRequired<AAResultsWrapperPass>(); | ||||||||
260 | AU.addRequired<TargetPassConfig>(); | ||||||||
261 | MachineFunctionPass::getAnalysisUsage(AU); | ||||||||
262 | } | ||||||||
263 | |||||||||
264 | MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor> | ||||||||
265 | MachineSchedRegistry::Registry; | ||||||||
266 | |||||||||
267 | /// A dummy default scheduler factory indicates whether the scheduler | ||||||||
268 | /// is overridden on the command line. | ||||||||
269 | static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { | ||||||||
270 | return nullptr; | ||||||||
271 | } | ||||||||
272 | |||||||||
273 | /// MachineSchedOpt allows command line selection of the scheduler. | ||||||||
274 | static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, | ||||||||
275 | RegisterPassParser<MachineSchedRegistry>> | ||||||||
276 | MachineSchedOpt("misched", | ||||||||
277 | cl::init(&useDefaultMachineSched), cl::Hidden, | ||||||||
278 | cl::desc("Machine instruction scheduler to use")); | ||||||||
279 | |||||||||
280 | static MachineSchedRegistry | ||||||||
281 | DefaultSchedRegistry("default", "Use the target's default scheduler choice.", | ||||||||
282 | useDefaultMachineSched); | ||||||||
283 | |||||||||
284 | static cl::opt<bool> EnableMachineSched( | ||||||||
285 | "enable-misched", | ||||||||
286 | cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), | ||||||||
287 | cl::Hidden); | ||||||||
288 | |||||||||
289 | static cl::opt<bool> EnablePostRAMachineSched( | ||||||||
290 | "enable-post-misched", | ||||||||
291 | cl::desc("Enable the post-ra machine instruction scheduling pass."), | ||||||||
292 | cl::init(true), cl::Hidden); | ||||||||
293 | |||||||||
294 | /// Decrement this iterator until reaching the top or a non-debug instr. | ||||||||
295 | static MachineBasicBlock::const_iterator | ||||||||
296 | priorNonDebug(MachineBasicBlock::const_iterator I, | ||||||||
297 | MachineBasicBlock::const_iterator Beg) { | ||||||||
298 | assert(I != Beg && "reached the top of the region, cannot decrement")((void)0); | ||||||||
299 | while (--I != Beg) { | ||||||||
300 | if (!I->isDebugOrPseudoInstr()) | ||||||||
301 | break; | ||||||||
302 | } | ||||||||
303 | return I; | ||||||||
304 | } | ||||||||
305 | |||||||||
306 | /// Non-const version. | ||||||||
307 | static MachineBasicBlock::iterator | ||||||||
308 | priorNonDebug(MachineBasicBlock::iterator I, | ||||||||
309 | MachineBasicBlock::const_iterator Beg) { | ||||||||
310 | return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) | ||||||||
311 | .getNonConstIterator(); | ||||||||
312 | } | ||||||||
313 | |||||||||
314 | /// If this iterator is a debug value, increment until reaching the End or a | ||||||||
315 | /// non-debug instruction. | ||||||||
316 | static MachineBasicBlock::const_iterator | ||||||||
317 | nextIfDebug(MachineBasicBlock::const_iterator I, | ||||||||
318 | MachineBasicBlock::const_iterator End) { | ||||||||
319 | for(; I != End; ++I) { | ||||||||
320 | if (!I->isDebugOrPseudoInstr()) | ||||||||
321 | break; | ||||||||
322 | } | ||||||||
323 | return I; | ||||||||
324 | } | ||||||||
325 | |||||||||
326 | /// Non-const version. | ||||||||
327 | static MachineBasicBlock::iterator | ||||||||
328 | nextIfDebug(MachineBasicBlock::iterator I, | ||||||||
329 | MachineBasicBlock::const_iterator End) { | ||||||||
330 | return nextIfDebug(MachineBasicBlock::const_iterator(I), End) | ||||||||
331 | .getNonConstIterator(); | ||||||||
332 | } | ||||||||
333 | |||||||||
334 | /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. | ||||||||
335 | ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { | ||||||||
336 | // Select the scheduler, or set the default. | ||||||||
337 | MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; | ||||||||
338 | if (Ctor != useDefaultMachineSched) | ||||||||
339 | return Ctor(this); | ||||||||
340 | |||||||||
341 | // Get the default scheduler set by the target for this function. | ||||||||
342 | ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); | ||||||||
343 | if (Scheduler) | ||||||||
344 | return Scheduler; | ||||||||
345 | |||||||||
346 | // Default to GenericScheduler. | ||||||||
347 | return createGenericSchedLive(this); | ||||||||
348 | } | ||||||||
349 | |||||||||
350 | /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by | ||||||||
351 | /// the caller. We don't have a command line option to override the postRA | ||||||||
352 | /// scheduler. The Target must configure it. | ||||||||
353 | ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { | ||||||||
354 | // Get the postRA scheduler set by the target for this function. | ||||||||
355 | ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); | ||||||||
356 | if (Scheduler) | ||||||||
357 | return Scheduler; | ||||||||
358 | |||||||||
359 | // Default to GenericScheduler. | ||||||||
360 | return createGenericSchedPostRA(this); | ||||||||
361 | } | ||||||||
362 | |||||||||
363 | /// Top-level MachineScheduler pass driver. | ||||||||
364 | /// | ||||||||
365 | /// Visit blocks in function order. Divide each block into scheduling regions | ||||||||
366 | /// and visit them bottom-up. Visiting regions bottom-up is not required, but is | ||||||||
367 | /// consistent with the DAG builder, which traverses the interior of the | ||||||||
368 | /// scheduling regions bottom-up. | ||||||||
369 | /// | ||||||||
370 | /// This design avoids exposing scheduling boundaries to the DAG builder, | ||||||||
371 | /// simplifying the DAG builder's support for "special" target instructions. | ||||||||
372 | /// At the same time the design allows target schedulers to operate across | ||||||||
373 | /// scheduling boundaries, for example to bundle the boundary instructions | ||||||||
374 | /// without reordering them. This creates complexity, because the target | ||||||||
375 | /// scheduler must update the RegionBegin and RegionEnd positions cached by | ||||||||
376 | /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler | ||||||||
377 | /// design would be to split blocks at scheduling boundaries, but LLVM has a | ||||||||
378 | /// general bias against block splitting purely for implementation simplicity. | ||||||||
379 | bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { | ||||||||
380 | if (skipFunction(mf.getFunction())) | ||||||||
381 | return false; | ||||||||
382 | |||||||||
383 | if (EnableMachineSched.getNumOccurrences()) { | ||||||||
384 | if (!EnableMachineSched) | ||||||||
385 | return false; | ||||||||
386 | } else if (!mf.getSubtarget().enableMachineScheduler()) | ||||||||
387 | return false; | ||||||||
388 | |||||||||
389 | LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()))do { } while (false); | ||||||||
390 | |||||||||
391 | // Initialize the context of the pass. | ||||||||
392 | MF = &mf; | ||||||||
393 | MLI = &getAnalysis<MachineLoopInfo>(); | ||||||||
394 | MDT = &getAnalysis<MachineDominatorTree>(); | ||||||||
395 | PassConfig = &getAnalysis<TargetPassConfig>(); | ||||||||
396 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | ||||||||
397 | |||||||||
398 | LIS = &getAnalysis<LiveIntervals>(); | ||||||||
399 | |||||||||
400 | if (VerifyScheduling) { | ||||||||
401 | LLVM_DEBUG(LIS->dump())do { } while (false); | ||||||||
402 | MF->verify(this, "Before machine scheduling."); | ||||||||
403 | } | ||||||||
404 | RegClassInfo->runOnMachineFunction(*MF); | ||||||||
405 | |||||||||
406 | // Instantiate the selected scheduler for this target, function, and | ||||||||
407 | // optimization level. | ||||||||
408 | std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); | ||||||||
409 | scheduleRegions(*Scheduler, false); | ||||||||
410 | |||||||||
411 | LLVM_DEBUG(LIS->dump())do { } while (false); | ||||||||
412 | if (VerifyScheduling) | ||||||||
413 | MF->verify(this, "After machine scheduling."); | ||||||||
414 | return true; | ||||||||
415 | } | ||||||||
416 | |||||||||
417 | bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { | ||||||||
418 | if (skipFunction(mf.getFunction())) | ||||||||
419 | return false; | ||||||||
420 | |||||||||
421 | if (EnablePostRAMachineSched.getNumOccurrences()) { | ||||||||
422 | if (!EnablePostRAMachineSched) | ||||||||
423 | return false; | ||||||||
424 | } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) { | ||||||||
425 | LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n")do { } while (false); | ||||||||
426 | return false; | ||||||||
427 | } | ||||||||
428 | LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()))do { } while (false); | ||||||||
429 | |||||||||
430 | // Initialize the context of the pass. | ||||||||
431 | MF = &mf; | ||||||||
432 | MLI = &getAnalysis<MachineLoopInfo>(); | ||||||||
433 | PassConfig = &getAnalysis<TargetPassConfig>(); | ||||||||
434 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | ||||||||
435 | |||||||||
436 | if (VerifyScheduling) | ||||||||
437 | MF->verify(this, "Before post machine scheduling."); | ||||||||
438 | |||||||||
439 | // Instantiate the selected scheduler for this target, function, and | ||||||||
440 | // optimization level. | ||||||||
441 | std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); | ||||||||
442 | scheduleRegions(*Scheduler, true); | ||||||||
443 | |||||||||
444 | if (VerifyScheduling) | ||||||||
445 | MF->verify(this, "After post machine scheduling."); | ||||||||
446 | return true; | ||||||||
447 | } | ||||||||
448 | |||||||||
449 | /// Return true of the given instruction should not be included in a scheduling | ||||||||
450 | /// region. | ||||||||
451 | /// | ||||||||
452 | /// MachineScheduler does not currently support scheduling across calls. To | ||||||||
453 | /// handle calls, the DAG builder needs to be modified to create register | ||||||||
454 | /// anti/output dependencies on the registers clobbered by the call's regmask | ||||||||
455 | /// operand. In PreRA scheduling, the stack pointer adjustment already prevents | ||||||||
456 | /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce | ||||||||
457 | /// the boundary, but there would be no benefit to postRA scheduling across | ||||||||
458 | /// calls this late anyway. | ||||||||
459 | static bool isSchedBoundary(MachineBasicBlock::iterator MI, | ||||||||
460 | MachineBasicBlock *MBB, | ||||||||
461 | MachineFunction *MF, | ||||||||
462 | const TargetInstrInfo *TII) { | ||||||||
463 | return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); | ||||||||
464 | } | ||||||||
465 | |||||||||
466 | /// A region of an MBB for scheduling. | ||||||||
467 | namespace { | ||||||||
468 | struct SchedRegion { | ||||||||
469 | /// RegionBegin is the first instruction in the scheduling region, and | ||||||||
470 | /// RegionEnd is either MBB->end() or the scheduling boundary after the | ||||||||
471 | /// last instruction in the scheduling region. These iterators cannot refer | ||||||||
472 | /// to instructions outside of the identified scheduling region because | ||||||||
473 | /// those may be reordered before scheduling this region. | ||||||||
474 | MachineBasicBlock::iterator RegionBegin; | ||||||||
475 | MachineBasicBlock::iterator RegionEnd; | ||||||||
476 | unsigned NumRegionInstrs; | ||||||||
477 | |||||||||
478 | SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, | ||||||||
479 | unsigned N) : | ||||||||
480 | RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} | ||||||||
481 | }; | ||||||||
482 | } // end anonymous namespace | ||||||||
483 | |||||||||
484 | using MBBRegionsVector = SmallVector<SchedRegion, 16>; | ||||||||
485 | |||||||||
486 | static void | ||||||||
487 | getSchedRegions(MachineBasicBlock *MBB, | ||||||||
488 | MBBRegionsVector &Regions, | ||||||||
489 | bool RegionsTopDown) { | ||||||||
490 | MachineFunction *MF = MBB->getParent(); | ||||||||
491 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); | ||||||||
492 | |||||||||
493 | MachineBasicBlock::iterator I = nullptr; | ||||||||
494 | for(MachineBasicBlock::iterator RegionEnd = MBB->end(); | ||||||||
495 | RegionEnd != MBB->begin(); RegionEnd = I) { | ||||||||
496 | |||||||||
497 | // Avoid decrementing RegionEnd for blocks with no terminator. | ||||||||
498 | if (RegionEnd != MBB->end() || | ||||||||
499 | isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { | ||||||||
500 | --RegionEnd; | ||||||||
501 | } | ||||||||
502 | |||||||||
503 | // The next region starts above the previous region. Look backward in the | ||||||||
504 | // instruction stream until we find the nearest boundary. | ||||||||
505 | unsigned NumRegionInstrs = 0; | ||||||||
506 | I = RegionEnd; | ||||||||
507 | for (;I != MBB->begin(); --I) { | ||||||||
508 | MachineInstr &MI = *std::prev(I); | ||||||||
509 | if (isSchedBoundary(&MI, &*MBB, MF, TII)) | ||||||||
510 | break; | ||||||||
511 | if (!MI.isDebugOrPseudoInstr()) { | ||||||||
512 | // MBB::size() uses instr_iterator to count. Here we need a bundle to | ||||||||
513 | // count as a single instruction. | ||||||||
514 | ++NumRegionInstrs; | ||||||||
515 | } | ||||||||
516 | } | ||||||||
517 | |||||||||
518 | // It's possible we found a scheduling region that only has debug | ||||||||
519 | // instructions. Don't bother scheduling these. | ||||||||
520 | if (NumRegionInstrs != 0) | ||||||||
521 | Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); | ||||||||
522 | } | ||||||||
523 | |||||||||
524 | if (RegionsTopDown) | ||||||||
525 | std::reverse(Regions.begin(), Regions.end()); | ||||||||
526 | } | ||||||||
527 | |||||||||
528 | /// Main driver for both MachineScheduler and PostMachineScheduler. | ||||||||
529 | void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, | ||||||||
530 | bool FixKillFlags) { | ||||||||
531 | // Visit all machine basic blocks. | ||||||||
532 | // | ||||||||
533 | // TODO: Visit blocks in global postorder or postorder within the bottom-up | ||||||||
534 | // loop tree. Then we can optionally compute global RegPressure. | ||||||||
535 | for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); | ||||||||
536 | MBB != MBBEnd; ++MBB) { | ||||||||
537 | |||||||||
538 | Scheduler.startBlock(&*MBB); | ||||||||
539 | |||||||||
540 | #ifndef NDEBUG1 | ||||||||
541 | if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) | ||||||||
542 | continue; | ||||||||
543 | if (SchedOnlyBlock.getNumOccurrences() | ||||||||
544 | && (int)SchedOnlyBlock != MBB->getNumber()) | ||||||||
545 | continue; | ||||||||
546 | #endif | ||||||||
547 | |||||||||
548 | // Break the block into scheduling regions [I, RegionEnd). RegionEnd | ||||||||
549 | // points to the scheduling boundary at the bottom of the region. The DAG | ||||||||
550 | // does not include RegionEnd, but the region does (i.e. the next | ||||||||
551 | // RegionEnd is above the previous RegionBegin). If the current block has | ||||||||
552 | // no terminator then RegionEnd == MBB->end() for the bottom region. | ||||||||
553 | // | ||||||||
554 | // All the regions of MBB are first found and stored in MBBRegions, which | ||||||||
555 | // will be processed (MBB) top-down if initialized with true. | ||||||||
556 | // | ||||||||
557 | // The Scheduler may insert instructions during either schedule() or | ||||||||
558 | // exitRegion(), even for empty regions. So the local iterators 'I' and | ||||||||
559 | // 'RegionEnd' are invalid across these calls. Instructions must not be | ||||||||
560 | // added to other regions than the current one without updating MBBRegions. | ||||||||
561 | |||||||||
562 | MBBRegionsVector MBBRegions; | ||||||||
563 | getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); | ||||||||
564 | for (MBBRegionsVector::iterator R = MBBRegions.begin(); | ||||||||
565 | R != MBBRegions.end(); ++R) { | ||||||||
566 | MachineBasicBlock::iterator I = R->RegionBegin; | ||||||||
567 | MachineBasicBlock::iterator RegionEnd = R->RegionEnd; | ||||||||
568 | unsigned NumRegionInstrs = R->NumRegionInstrs; | ||||||||
569 | |||||||||
570 | // Notify the scheduler of the region, even if we may skip scheduling | ||||||||
571 | // it. Perhaps it still needs to be bundled. | ||||||||
572 | Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); | ||||||||
573 | |||||||||
574 | // Skip empty scheduling regions (0 or 1 schedulable instructions). | ||||||||
575 | if (I == RegionEnd || I == std::prev(RegionEnd)) { | ||||||||
576 | // Close the current region. Bundle the terminator if needed. | ||||||||
577 | // This invalidates 'RegionEnd' and 'I'. | ||||||||
578 | Scheduler.exitRegion(); | ||||||||
579 | continue; | ||||||||
580 | } | ||||||||
581 | LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n")do { } while (false); | ||||||||
582 | LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)do { } while (false) | ||||||||
583 | << " " << MBB->getName() << "\n From: " << *Ido { } while (false) | ||||||||
584 | << " To: ";do { } while (false) | ||||||||
585 | if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;do { } while (false) | ||||||||
586 | else dbgs() << "End";do { } while (false) | ||||||||
587 | dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n')do { } while (false); | ||||||||
588 | if (DumpCriticalPathLength) { | ||||||||
589 | errs() << MF->getName(); | ||||||||
590 | errs() << ":%bb. " << MBB->getNumber(); | ||||||||
591 | errs() << " " << MBB->getName() << " \n"; | ||||||||
592 | } | ||||||||
593 | |||||||||
594 | // Schedule a region: possibly reorder instructions. | ||||||||
595 | // This invalidates the original region iterators. | ||||||||
596 | Scheduler.schedule(); | ||||||||
597 | |||||||||
598 | // Close the current region. | ||||||||
599 | Scheduler.exitRegion(); | ||||||||
600 | } | ||||||||
601 | Scheduler.finishBlock(); | ||||||||
602 | // FIXME: Ideally, no further passes should rely on kill flags. However, | ||||||||
603 | // thumb2 size reduction is currently an exception, so the PostMIScheduler | ||||||||
604 | // needs to do this. | ||||||||
605 | if (FixKillFlags) | ||||||||
606 | Scheduler.fixupKills(*MBB); | ||||||||
607 | } | ||||||||
608 | Scheduler.finalizeSchedule(); | ||||||||
609 | } | ||||||||
610 | |||||||||
611 | void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { | ||||||||
612 | // unimplemented | ||||||||
613 | } | ||||||||
614 | |||||||||
615 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | ||||||||
616 | LLVM_DUMP_METHOD__attribute__((noinline)) void ReadyQueue::dump() const { | ||||||||
617 | dbgs() << "Queue " << Name << ": "; | ||||||||
618 | for (const SUnit *SU : Queue) | ||||||||
619 | dbgs() << SU->NodeNum << " "; | ||||||||
620 | dbgs() << "\n"; | ||||||||
621 | } | ||||||||
622 | #endif | ||||||||
623 | |||||||||
624 | //===----------------------------------------------------------------------===// | ||||||||
625 | // ScheduleDAGMI - Basic machine instruction scheduling. This is | ||||||||
626 | // independent of PreRA/PostRA scheduling and involves no extra book-keeping for | ||||||||
627 | // virtual registers. | ||||||||
628 | // ===----------------------------------------------------------------------===/ | ||||||||
629 | |||||||||
630 | // Provide a vtable anchor. | ||||||||
631 | ScheduleDAGMI::~ScheduleDAGMI() = default; | ||||||||
632 | |||||||||
633 | /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When | ||||||||
634 | /// NumPredsLeft reaches zero, release the successor node. | ||||||||
635 | /// | ||||||||
636 | /// FIXME: Adjust SuccSU height based on MinLatency. | ||||||||
637 | void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { | ||||||||
638 | SUnit *SuccSU = SuccEdge->getSUnit(); | ||||||||
639 | |||||||||
640 | if (SuccEdge->isWeak()) { | ||||||||
641 | --SuccSU->WeakPredsLeft; | ||||||||
642 | if (SuccEdge->isCluster()) | ||||||||
643 | NextClusterSucc = SuccSU; | ||||||||
644 | return; | ||||||||
645 | } | ||||||||
646 | #ifndef NDEBUG1 | ||||||||
647 | if (SuccSU->NumPredsLeft == 0) { | ||||||||
648 | dbgs() << "*** Scheduling failed! ***\n"; | ||||||||
649 | dumpNode(*SuccSU); | ||||||||
650 | dbgs() << " has been released too many times!\n"; | ||||||||
651 | llvm_unreachable(nullptr)__builtin_unreachable(); | ||||||||
652 | } | ||||||||
653 | #endif | ||||||||
654 | // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, | ||||||||
655 | // CurrCycle may have advanced since then. | ||||||||
656 | if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) | ||||||||
657 | SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); | ||||||||
658 | |||||||||
659 | --SuccSU->NumPredsLeft; | ||||||||
660 | if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) | ||||||||
661 | SchedImpl->releaseTopNode(SuccSU); | ||||||||
662 | } | ||||||||
663 | |||||||||
664 | /// releaseSuccessors - Call releaseSucc on each of SU's successors. | ||||||||
665 | void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { | ||||||||
666 | for (SDep &Succ : SU->Succs) | ||||||||
667 | releaseSucc(SU, &Succ); | ||||||||
668 | } | ||||||||
669 | |||||||||
670 | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When | ||||||||
671 | /// NumSuccsLeft reaches zero, release the predecessor node. | ||||||||
672 | /// | ||||||||
673 | /// FIXME: Adjust PredSU height based on MinLatency. | ||||||||
674 | void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { | ||||||||
675 | SUnit *PredSU = PredEdge->getSUnit(); | ||||||||
676 | |||||||||
677 | if (PredEdge->isWeak()) { | ||||||||
678 | --PredSU->WeakSuccsLeft; | ||||||||
679 | if (PredEdge->isCluster()) | ||||||||
680 | NextClusterPred = PredSU; | ||||||||
681 | return; | ||||||||
682 | } | ||||||||
683 | #ifndef NDEBUG1 | ||||||||
684 | if (PredSU->NumSuccsLeft == 0) { | ||||||||
685 | dbgs() << "*** Scheduling failed! ***\n"; | ||||||||
686 | dumpNode(*PredSU); | ||||||||
687 | dbgs() << " has been released too many times!\n"; | ||||||||
688 | llvm_unreachable(nullptr)__builtin_unreachable(); | ||||||||
689 | } | ||||||||
690 | #endif | ||||||||
691 | // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, | ||||||||
692 | // CurrCycle may have advanced since then. | ||||||||
693 | if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) | ||||||||
694 | PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); | ||||||||
695 | |||||||||
696 | --PredSU->NumSuccsLeft; | ||||||||
697 | if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) | ||||||||
698 | SchedImpl->releaseBottomNode(PredSU); | ||||||||
699 | } | ||||||||
700 | |||||||||
701 | /// releasePredecessors - Call releasePred on each of SU's predecessors. | ||||||||
702 | void ScheduleDAGMI::releasePredecessors(SUnit *SU) { | ||||||||
703 | for (SDep &Pred : SU->Preds) | ||||||||
704 | releasePred(SU, &Pred); | ||||||||
705 | } | ||||||||
706 | |||||||||
707 | void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) { | ||||||||
708 | ScheduleDAGInstrs::startBlock(bb); | ||||||||
709 | SchedImpl->enterMBB(bb); | ||||||||
710 | } | ||||||||
711 | |||||||||
712 | void ScheduleDAGMI::finishBlock() { | ||||||||
713 | SchedImpl->leaveMBB(); | ||||||||
714 | ScheduleDAGInstrs::finishBlock(); | ||||||||
715 | } | ||||||||
716 | |||||||||
717 | /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after | ||||||||
718 | /// crossing a scheduling boundary. [begin, end) includes all instructions in | ||||||||
719 | /// the region, including the boundary itself and single-instruction regions | ||||||||
720 | /// that don't get scheduled. | ||||||||
721 | void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, | ||||||||
722 | MachineBasicBlock::iterator begin, | ||||||||
723 | MachineBasicBlock::iterator end, | ||||||||
724 | unsigned regioninstrs) | ||||||||
725 | { | ||||||||
726 | ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); | ||||||||
727 | |||||||||
728 | SchedImpl->initPolicy(begin, end, regioninstrs); | ||||||||
729 | } | ||||||||
730 | |||||||||
731 | /// This is normally called from the main scheduler loop but may also be invoked | ||||||||
732 | /// by the scheduling strategy to perform additional code motion. | ||||||||
733 | void ScheduleDAGMI::moveInstruction( | ||||||||
734 | MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { | ||||||||
735 | // Advance RegionBegin if the first instruction moves down. | ||||||||
736 | if (&*RegionBegin == MI) | ||||||||
737 | ++RegionBegin; | ||||||||
738 | |||||||||
739 | // Update the instruction stream. | ||||||||
740 | BB->splice(InsertPos, BB, MI); | ||||||||
741 | |||||||||
742 | // Update LiveIntervals | ||||||||
743 | if (LIS) | ||||||||
744 | LIS->handleMove(*MI, /*UpdateFlags=*/true); | ||||||||
745 | |||||||||
746 | // Recede RegionBegin if an instruction moves above the first. | ||||||||
747 | if (RegionBegin == InsertPos) | ||||||||
748 | RegionBegin = MI; | ||||||||
749 | } | ||||||||
750 | |||||||||
751 | bool ScheduleDAGMI::checkSchedLimit() { | ||||||||
752 | #ifndef NDEBUG1 | ||||||||
753 | if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { | ||||||||
754 | CurrentTop = CurrentBottom; | ||||||||
755 | return false; | ||||||||
756 | } | ||||||||
757 | ++NumInstrsScheduled; | ||||||||
758 | #endif | ||||||||
759 | return true; | ||||||||
760 | } | ||||||||
761 | |||||||||
762 | /// Per-region scheduling driver, called back from | ||||||||
763 | /// MachineScheduler::runOnMachineFunction. This is a simplified driver that | ||||||||
764 | /// does not consider liveness or register pressure. It is useful for PostRA | ||||||||
765 | /// scheduling and potentially other custom schedulers. | ||||||||
766 | void ScheduleDAGMI::schedule() { | ||||||||
767 | LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n")do { } while (false); | ||||||||
768 | LLVM_DEBUG(SchedImpl->dumpPolicy())do { } while (false); | ||||||||
769 | |||||||||
770 | // Build the DAG. | ||||||||
771 | buildSchedGraph(AA); | ||||||||
772 | |||||||||
773 | postprocessDAG(); | ||||||||
774 | |||||||||
775 | SmallVector<SUnit*, 8> TopRoots, BotRoots; | ||||||||
776 | findRootsAndBiasEdges(TopRoots, BotRoots); | ||||||||
777 | |||||||||
778 | LLVM_DEBUG(dump())do { } while (false); | ||||||||
779 | if (PrintDAGs) dump(); | ||||||||
780 | if (ViewMISchedDAGs) viewGraph(); | ||||||||
781 | |||||||||
782 | // Initialize the strategy before modifying the DAG. | ||||||||
783 | // This may initialize a DFSResult to be used for queue priority. | ||||||||
784 | SchedImpl->initialize(this); | ||||||||
785 | |||||||||
786 | // Initialize ready queues now that the DAG and priority data are finalized. | ||||||||
787 | initQueues(TopRoots, BotRoots); | ||||||||
788 | |||||||||
789 | bool IsTopNode = false; | ||||||||
790 | while (true) { | ||||||||
791 | LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n")do { } while (false); | ||||||||
792 | SUnit *SU = SchedImpl->pickNode(IsTopNode); | ||||||||
793 | if (!SU) break; | ||||||||
794 | |||||||||
795 | assert(!SU->isScheduled && "Node already scheduled")((void)0); | ||||||||
796 | if (!checkSchedLimit()) | ||||||||
797 | break; | ||||||||
798 | |||||||||
799 | MachineInstr *MI = SU->getInstr(); | ||||||||
800 | if (IsTopNode) { | ||||||||
801 | assert(SU->isTopReady() && "node still has unscheduled dependencies")((void)0); | ||||||||
802 | if (&*CurrentTop == MI) | ||||||||
803 | CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); | ||||||||
804 | else | ||||||||
805 | moveInstruction(MI, CurrentTop); | ||||||||
806 | } else { | ||||||||
807 | assert(SU->isBottomReady() && "node still has unscheduled dependencies")((void)0); | ||||||||
808 | MachineBasicBlock::iterator priorII = | ||||||||
809 | priorNonDebug(CurrentBottom, CurrentTop); | ||||||||
810 | if (&*priorII == MI) | ||||||||
811 | CurrentBottom = priorII; | ||||||||
812 | else { | ||||||||
813 | if (&*CurrentTop == MI) | ||||||||
814 | CurrentTop = nextIfDebug(++CurrentTop, priorII); | ||||||||
815 | moveInstruction(MI, CurrentBottom); | ||||||||
816 | CurrentBottom = MI; | ||||||||
817 | } | ||||||||
818 | } | ||||||||
819 | // Notify the scheduling strategy before updating the DAG. | ||||||||
820 | // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues | ||||||||
821 | // runs, it can then use the accurate ReadyCycle time to determine whether | ||||||||
822 | // newly released nodes can move to the readyQ. | ||||||||
823 | SchedImpl->schedNode(SU, IsTopNode); | ||||||||
824 | |||||||||
825 | updateQueues(SU, IsTopNode); | ||||||||
826 | } | ||||||||
827 | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")((void)0); | ||||||||
828 | |||||||||
829 | placeDebugValues(); | ||||||||
830 | |||||||||
831 | LLVM_DEBUG({do { } while (false) | ||||||||
832 | dbgs() << "*** Final schedule for "do { } while (false) | ||||||||
833 | << printMBBReference(*begin()->getParent()) << " ***\n";do { } while (false) | ||||||||
834 | dumpSchedule();do { } while (false) | ||||||||
835 | dbgs() << '\n';do { } while (false) | ||||||||
836 | })do { } while (false); | ||||||||
837 | } | ||||||||
838 | |||||||||
839 | /// Apply each ScheduleDAGMutation step in order. | ||||||||
840 | void ScheduleDAGMI::postprocessDAG() { | ||||||||
841 | for (auto &m : Mutations) | ||||||||
842 | m->apply(this); | ||||||||
843 | } | ||||||||
844 | |||||||||
845 | void ScheduleDAGMI:: | ||||||||
846 | findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, | ||||||||
847 | SmallVectorImpl<SUnit*> &BotRoots) { | ||||||||
848 | for (SUnit &SU : SUnits) { | ||||||||
849 | assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits")((void)0); | ||||||||
850 | |||||||||
851 | // Order predecessors so DFSResult follows the critical path. | ||||||||
852 | SU.biasCriticalPath(); | ||||||||
853 | |||||||||
854 | // A SUnit is ready to top schedule if it has no predecessors. | ||||||||
855 | if (!SU.NumPredsLeft) | ||||||||
856 | TopRoots.push_back(&SU); | ||||||||
857 | // A SUnit is ready to bottom schedule if it has no successors. | ||||||||
858 | if (!SU.NumSuccsLeft) | ||||||||
859 | BotRoots.push_back(&SU); | ||||||||
860 | } | ||||||||
861 | ExitSU.biasCriticalPath(); | ||||||||
862 | } | ||||||||
863 | |||||||||
864 | /// Identify DAG roots and setup scheduler queues. | ||||||||
865 | void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, | ||||||||
866 | ArrayRef<SUnit*> BotRoots) { | ||||||||
867 | NextClusterSucc = nullptr; | ||||||||
868 | NextClusterPred = nullptr; | ||||||||
869 | |||||||||
870 | // Release all DAG roots for scheduling, not including EntrySU/ExitSU. | ||||||||
871 | // | ||||||||
872 | // Nodes with unreleased weak edges can still be roots. | ||||||||
873 | // Release top roots in forward order. | ||||||||
874 | for (SUnit *SU : TopRoots) | ||||||||
875 | SchedImpl->releaseTopNode(SU); | ||||||||
876 | |||||||||
877 | // Release bottom roots in reverse order so the higher priority nodes appear | ||||||||
878 | // first. This is more natural and slightly more efficient. | ||||||||
879 | for (SmallVectorImpl<SUnit*>::const_reverse_iterator | ||||||||
880 | I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { | ||||||||
881 | SchedImpl->releaseBottomNode(*I); | ||||||||
882 | } | ||||||||
883 | |||||||||
884 | releaseSuccessors(&EntrySU); | ||||||||
885 | releasePredecessors(&ExitSU); | ||||||||
886 | |||||||||
887 | SchedImpl->registerRoots(); | ||||||||
888 | |||||||||
889 | // Advance past initial DebugValues. | ||||||||
890 | CurrentTop = nextIfDebug(RegionBegin, RegionEnd); | ||||||||
891 | CurrentBottom = RegionEnd; | ||||||||
892 | } | ||||||||
893 | |||||||||
894 | /// Update scheduler queues after scheduling an instruction. | ||||||||
895 | void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { | ||||||||
896 | // Release dependent instructions for scheduling. | ||||||||
897 | if (IsTopNode) | ||||||||
898 | releaseSuccessors(SU); | ||||||||
899 | else | ||||||||
900 | releasePredecessors(SU); | ||||||||
901 | |||||||||
902 | SU->isScheduled = true; | ||||||||
903 | } | ||||||||
904 | |||||||||
905 | /// Reinsert any remaining debug_values, just like the PostRA scheduler. | ||||||||
906 | void ScheduleDAGMI::placeDebugValues() { | ||||||||
907 | // If first instruction was a DBG_VALUE then put it back. | ||||||||
908 | if (FirstDbgValue) { | ||||||||
909 | BB->splice(RegionBegin, BB, FirstDbgValue); | ||||||||
910 | RegionBegin = FirstDbgValue; | ||||||||
911 | } | ||||||||
912 | |||||||||
913 | for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator | ||||||||
914 | DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { | ||||||||
915 | std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); | ||||||||
916 | MachineInstr *DbgValue = P.first; | ||||||||
917 | MachineBasicBlock::iterator OrigPrevMI = P.second; | ||||||||
918 | if (&*RegionBegin == DbgValue) | ||||||||
919 | ++RegionBegin; | ||||||||
920 | BB->splice(++OrigPrevMI, BB, DbgValue); | ||||||||
921 | if (OrigPrevMI == std::prev(RegionEnd)) | ||||||||
922 | RegionEnd = DbgValue; | ||||||||
923 | } | ||||||||
924 | DbgValues.clear(); | ||||||||
925 | FirstDbgValue = nullptr; | ||||||||
926 | } | ||||||||
927 | |||||||||
928 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | ||||||||
929 | LLVM_DUMP_METHOD__attribute__((noinline)) void ScheduleDAGMI::dumpSchedule() const { | ||||||||
930 | for (MachineInstr &MI : *this) { | ||||||||
931 | if (SUnit *SU = getSUnit(&MI)) | ||||||||
932 | dumpNode(*SU); | ||||||||
933 | else | ||||||||
934 | dbgs() << "Missing SUnit\n"; | ||||||||
935 | } | ||||||||
936 | } | ||||||||
937 | #endif | ||||||||
938 | |||||||||
939 | //===----------------------------------------------------------------------===// | ||||||||
940 | // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals | ||||||||
941 | // preservation. | ||||||||
942 | //===----------------------------------------------------------------------===// | ||||||||
943 | |||||||||
944 | ScheduleDAGMILive::~ScheduleDAGMILive() { | ||||||||
945 | delete DFSResult; | ||||||||
946 | } | ||||||||
947 | |||||||||
948 | void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { | ||||||||
949 | const MachineInstr &MI = *SU.getInstr(); | ||||||||
950 | for (const MachineOperand &MO : MI.operands()) { | ||||||||
951 | if (!MO.isReg()) | ||||||||
952 | continue; | ||||||||
953 | if (!MO.readsReg()) | ||||||||
954 | continue; | ||||||||
955 | if (TrackLaneMasks && !MO.isUse()) | ||||||||
956 | continue; | ||||||||
957 | |||||||||
958 | Register Reg = MO.getReg(); | ||||||||
959 | if (!Register::isVirtualRegister(Reg)) | ||||||||
960 | continue; | ||||||||
961 | |||||||||
962 | // Ignore re-defs. | ||||||||
963 | if (TrackLaneMasks) { | ||||||||
964 | bool FoundDef = false; | ||||||||
965 | for (const MachineOperand &MO2 : MI.operands()) { | ||||||||
966 | if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { | ||||||||
967 | FoundDef = true; | ||||||||
968 | break; | ||||||||
969 | } | ||||||||
970 | } | ||||||||
971 | if (FoundDef) | ||||||||
972 | continue; | ||||||||
973 | } | ||||||||
974 | |||||||||
975 | // Record this local VReg use. | ||||||||
976 | VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); | ||||||||
977 | for (; UI != VRegUses.end(); ++UI) { | ||||||||
978 | if (UI->SU == &SU) | ||||||||
979 | break; | ||||||||
980 | } | ||||||||
981 | if (UI == VRegUses.end()) | ||||||||
982 | VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); | ||||||||
983 | } | ||||||||
984 | } | ||||||||
985 | |||||||||
986 | /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after | ||||||||
987 | /// crossing a scheduling boundary. [begin, end) includes all instructions in | ||||||||
988 | /// the region, including the boundary itself and single-instruction regions | ||||||||
989 | /// that don't get scheduled. | ||||||||
990 | void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, | ||||||||
991 | MachineBasicBlock::iterator begin, | ||||||||
992 | MachineBasicBlock::iterator end, | ||||||||
993 | unsigned regioninstrs) | ||||||||
994 | { | ||||||||
995 | // ScheduleDAGMI initializes SchedImpl's per-region policy. | ||||||||
996 | ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); | ||||||||
997 | |||||||||
998 | // For convenience remember the end of the liveness region. | ||||||||
999 | LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); | ||||||||
1000 | |||||||||
1001 | SUPressureDiffs.clear(); | ||||||||
1002 | |||||||||
1003 | ShouldTrackPressure = SchedImpl->shouldTrackPressure(); | ||||||||
1004 | ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); | ||||||||
1005 | |||||||||
1006 | assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&((void)0) | ||||||||
1007 | "ShouldTrackLaneMasks requires ShouldTrackPressure")((void)0); | ||||||||
1008 | } | ||||||||
1009 | |||||||||
1010 | // Setup the register pressure trackers for the top scheduled and bottom | ||||||||
1011 | // scheduled regions. | ||||||||
1012 | void ScheduleDAGMILive::initRegPressure() { | ||||||||
1013 | VRegUses.clear(); | ||||||||
1014 | VRegUses.setUniverse(MRI.getNumVirtRegs()); | ||||||||
1015 | for (SUnit &SU : SUnits) | ||||||||
1016 | collectVRegUses(SU); | ||||||||
1017 | |||||||||
1018 | TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, | ||||||||
1019 | ShouldTrackLaneMasks, false); | ||||||||
1020 | BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, | ||||||||
1021 | ShouldTrackLaneMasks, false); | ||||||||
1022 | |||||||||
1023 | // Close the RPTracker to finalize live ins. | ||||||||
1024 | RPTracker.closeRegion(); | ||||||||
1025 | |||||||||
1026 | LLVM_DEBUG(RPTracker.dump())do { } while (false); | ||||||||
1027 | |||||||||
1028 | // Initialize the live ins and live outs. | ||||||||
1029 | TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); | ||||||||
1030 | BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); | ||||||||
1031 | |||||||||
1032 | // Close one end of the tracker so we can call | ||||||||
1033 | // getMaxUpward/DownwardPressureDelta before advancing across any | ||||||||
1034 | // instructions. This converts currently live regs into live ins/outs. | ||||||||
1035 | TopRPTracker.closeTop(); | ||||||||
1036 | BotRPTracker.closeBottom(); | ||||||||
1037 | |||||||||
1038 | BotRPTracker.initLiveThru(RPTracker); | ||||||||
1039 | if (!BotRPTracker.getLiveThru().empty()) { | ||||||||
1040 | TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); | ||||||||
1041 | LLVM_DEBUG(dbgs() << "Live Thru: ";do { } while (false) | ||||||||
1042 | dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI))do { } while (false); | ||||||||
1043 | }; | ||||||||
1044 | |||||||||
1045 | // For each live out vreg reduce the pressure change associated with other | ||||||||
1046 | // uses of the same vreg below the live-out reaching def. | ||||||||
1047 | updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); | ||||||||
1048 | |||||||||
1049 | // Account for liveness generated by the region boundary. | ||||||||
1050 | if (LiveRegionEnd != RegionEnd) { | ||||||||
1051 | SmallVector<RegisterMaskPair, 8> LiveUses; | ||||||||
1052 | BotRPTracker.recede(&LiveUses); | ||||||||
1053 | updatePressureDiffs(LiveUses); | ||||||||
1054 | } | ||||||||
1055 | |||||||||
1056 | LLVM_DEBUG(dbgs() << "Top Pressure:\n";do { } while (false) | ||||||||
1057 | dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);do { } while (false) | ||||||||
1058 | dbgs() << "Bottom Pressure:\n";do { } while (false) | ||||||||
1059 | dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);)do { } while (false); | ||||||||
1060 | |||||||||
1061 | assert((BotRPTracker.getPos() == RegionEnd ||((void)0) | ||||||||
1062 | (RegionEnd->isDebugInstr() &&((void)0) | ||||||||
1063 | BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&((void)0) | ||||||||
1064 | "Can't find the region bottom")((void)0); | ||||||||
1065 | |||||||||
1066 | // Cache the list of excess pressure sets in this region. This will also track | ||||||||
1067 | // the max pressure in the scheduled code for these sets. | ||||||||
1068 | RegionCriticalPSets.clear(); | ||||||||
1069 | const std::vector<unsigned> &RegionPressure = | ||||||||
1070 | RPTracker.getPressure().MaxSetPressure; | ||||||||
1071 | for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { | ||||||||
1072 | unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); | ||||||||
1073 | if (RegionPressure[i] > Limit) { | ||||||||
1074 | LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limitdo { } while (false) | ||||||||
1075 | << " Actual " << RegionPressure[i] << "\n")do { } while (false); | ||||||||
1076 | RegionCriticalPSets.push_back(PressureChange(i)); | ||||||||
1077 | } | ||||||||
1078 | } | ||||||||
1079 | LLVM_DEBUG(dbgs() << "Excess PSets: ";do { } while (false) | ||||||||
1080 | for (const PressureChange &RCPSdo { } while (false) | ||||||||
1081 | : RegionCriticalPSets) dbgs()do { } while (false) | ||||||||
1082 | << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";do { } while (false) | ||||||||
1083 | dbgs() << "\n")do { } while (false); | ||||||||
1084 | } | ||||||||
1085 | |||||||||
1086 | void ScheduleDAGMILive:: | ||||||||
1087 | updateScheduledPressure(const SUnit *SU, | ||||||||
1088 | const std::vector<unsigned> &NewMaxPressure) { | ||||||||
1089 | const PressureDiff &PDiff = getPressureDiff(SU); | ||||||||
1090 | unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); | ||||||||
1091 | for (const PressureChange &PC : PDiff) { | ||||||||
1092 | if (!PC.isValid()) | ||||||||
1093 | break; | ||||||||
1094 | unsigned ID = PC.getPSet(); | ||||||||
1095 | while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) | ||||||||
1096 | ++CritIdx; | ||||||||
1097 | if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { | ||||||||
1098 | if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() | ||||||||
1099 | && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) | ||||||||
1100 | RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); | ||||||||
1101 | } | ||||||||
1102 | unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); | ||||||||
1103 | if (NewMaxPressure[ID] >= Limit - 2) { | ||||||||
1104 | LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "do { } while (false) | ||||||||
1105 | << NewMaxPressure[ID]do { } while (false) | ||||||||
1106 | << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")do { } while (false) | ||||||||
1107 | << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]do { } while (false) | ||||||||
1108 | << " livethru)\n")do { } while (false); | ||||||||
1109 | } | ||||||||
1110 | } | ||||||||
1111 | } | ||||||||
1112 | |||||||||
1113 | /// Update the PressureDiff array for liveness after scheduling this | ||||||||
1114 | /// instruction. | ||||||||
1115 | void ScheduleDAGMILive::updatePressureDiffs( | ||||||||
1116 | ArrayRef<RegisterMaskPair> LiveUses) { | ||||||||
1117 | for (const RegisterMaskPair &P : LiveUses) { | ||||||||
1118 | Register Reg = P.RegUnit; | ||||||||
1119 | /// FIXME: Currently assuming single-use physregs. | ||||||||
1120 | if (!Register::isVirtualRegister(Reg)) | ||||||||
1121 | continue; | ||||||||
1122 | |||||||||
1123 | if (ShouldTrackLaneMasks) { | ||||||||
1124 | // If the register has just become live then other uses won't change | ||||||||
1125 | // this fact anymore => decrement pressure. | ||||||||
1126 | // If the register has just become dead then other uses make it come | ||||||||
1127 | // back to life => increment pressure. | ||||||||
1128 | bool Decrement = P.LaneMask.any(); | ||||||||
1129 | |||||||||
1130 | for (const VReg2SUnit &V2SU | ||||||||
1131 | : make_range(VRegUses.find(Reg), VRegUses.end())) { | ||||||||
1132 | SUnit &SU = *V2SU.SU; | ||||||||
1133 | if (SU.isScheduled || &SU == &ExitSU) | ||||||||
1134 | continue; | ||||||||
1135 | |||||||||
1136 | PressureDiff &PDiff = getPressureDiff(&SU); | ||||||||
1137 | PDiff.addPressureChange(Reg, Decrement, &MRI); | ||||||||
1138 | LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "do { } while (false) | ||||||||
1139 | << printReg(Reg, TRI) << ':'do { } while (false) | ||||||||
1140 | << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();do { } while (false) | ||||||||
1141 | dbgs() << " to "; PDiff.dump(*TRI);)do { } while (false); | ||||||||
1142 | } | ||||||||
1143 | } else { | ||||||||
1144 | assert(P.LaneMask.any())((void)0); | ||||||||
1145 | LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n")do { } while (false); | ||||||||
1146 | // This may be called before CurrentBottom has been initialized. However, | ||||||||
1147 | // BotRPTracker must have a valid position. We want the value live into the | ||||||||
1148 | // instruction or live out of the block, so ask for the previous | ||||||||
1149 | // instruction's live-out. | ||||||||
1150 | const LiveInterval &LI = LIS->getInterval(Reg); | ||||||||
1151 | VNInfo *VNI; | ||||||||
1152 | MachineBasicBlock::const_iterator I = | ||||||||
1153 | nextIfDebug(BotRPTracker.getPos(), BB->end()); | ||||||||
1154 | if (I == BB->end()) | ||||||||
1155 | VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); | ||||||||
1156 | else { | ||||||||
1157 | LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); | ||||||||
1158 | VNI = LRQ.valueIn(); | ||||||||
1159 | } | ||||||||
1160 | // RegisterPressureTracker guarantees that readsReg is true for LiveUses. | ||||||||
1161 | assert(VNI && "No live value at use.")((void)0); | ||||||||
1162 | for (const VReg2SUnit &V2SU | ||||||||
1163 | : make_range(VRegUses.find(Reg), VRegUses.end())) { | ||||||||
1164 | SUnit *SU = V2SU.SU; | ||||||||
1165 | // If this use comes before the reaching def, it cannot be a last use, | ||||||||
1166 | // so decrease its pressure change. | ||||||||
1167 | if (!SU->isScheduled && SU != &ExitSU) { | ||||||||
1168 | LiveQueryResult LRQ = | ||||||||
1169 | LI.Query(LIS->getInstructionIndex(*SU->getInstr())); | ||||||||
1170 | if (LRQ.valueIn() == VNI) { | ||||||||
1171 | PressureDiff &PDiff = getPressureDiff(SU); | ||||||||
1172 | PDiff.addPressureChange(Reg, true, &MRI); | ||||||||
1173 | LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "do { } while (false) | ||||||||
1174 | << *SU->getInstr();do { } while (false) | ||||||||
1175 | dbgs() << " to "; PDiff.dump(*TRI);)do { } while (false); | ||||||||
1176 | } | ||||||||
1177 | } | ||||||||
1178 | } | ||||||||
1179 | } | ||||||||
1180 | } | ||||||||
1181 | } | ||||||||
1182 | |||||||||
1183 | void ScheduleDAGMILive::dump() const { | ||||||||
1184 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | ||||||||
1185 | if (EntrySU.getInstr() != nullptr) | ||||||||
1186 | dumpNodeAll(EntrySU); | ||||||||
1187 | for (const SUnit &SU : SUnits) { | ||||||||
1188 | dumpNodeAll(SU); | ||||||||
1189 | if (ShouldTrackPressure) { | ||||||||
1190 | dbgs() << " Pressure Diff : "; | ||||||||
1191 | getPressureDiff(&SU).dump(*TRI); | ||||||||
1192 | } | ||||||||
1193 | dbgs() << " Single Issue : "; | ||||||||
1194 | if (SchedModel.mustBeginGroup(SU.getInstr()) && | ||||||||
1195 | SchedModel.mustEndGroup(SU.getInstr())) | ||||||||
1196 | dbgs() << "true;"; | ||||||||
1197 | else | ||||||||
1198 | dbgs() << "false;"; | ||||||||
1199 | dbgs() << '\n'; | ||||||||
1200 | } | ||||||||
1201 | if (ExitSU.getInstr() != nullptr) | ||||||||
1202 | dumpNodeAll(ExitSU); | ||||||||
1203 | #endif | ||||||||
1204 | } | ||||||||
1205 | |||||||||
1206 | /// schedule - Called back from MachineScheduler::runOnMachineFunction | ||||||||
1207 | /// after setting up the current scheduling region. [RegionBegin, RegionEnd) | ||||||||
1208 | /// only includes instructions that have DAG nodes, not scheduling boundaries. | ||||||||
1209 | /// | ||||||||
1210 | /// This is a skeletal driver, with all the functionality pushed into helpers, | ||||||||
1211 | /// so that it can be easily extended by experimental schedulers. Generally, | ||||||||
1212 | /// implementing MachineSchedStrategy should be sufficient to implement a new | ||||||||
1213 | /// scheduling algorithm. However, if a scheduler further subclasses | ||||||||
1214 | /// ScheduleDAGMILive then it will want to override this virtual method in order | ||||||||
1215 | /// to update any specialized state. | ||||||||
1216 | void ScheduleDAGMILive::schedule() { | ||||||||
1217 | LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n")do { } while (false); | ||||||||
1218 | LLVM_DEBUG(SchedImpl->dumpPolicy())do { } while (false); | ||||||||
1219 | buildDAGWithRegPressure(); | ||||||||
1220 | |||||||||
1221 | postprocessDAG(); | ||||||||
1222 | |||||||||
1223 | SmallVector<SUnit*, 8> TopRoots, BotRoots; | ||||||||
1224 | findRootsAndBiasEdges(TopRoots, BotRoots); | ||||||||
1225 | |||||||||
1226 | // Initialize the strategy before modifying the DAG. | ||||||||
1227 | // This may initialize a DFSResult to be used for queue priority. | ||||||||
1228 | SchedImpl->initialize(this); | ||||||||
1229 | |||||||||
1230 | LLVM_DEBUG(dump())do { } while (false); | ||||||||
1231 | if (PrintDAGs) dump(); | ||||||||
1232 | if (ViewMISchedDAGs) viewGraph(); | ||||||||
1233 | |||||||||
1234 | // Initialize ready queues now that the DAG and priority data are finalized. | ||||||||
1235 | initQueues(TopRoots, BotRoots); | ||||||||
1236 | |||||||||
1237 | bool IsTopNode = false; | ||||||||
1238 | while (true) { | ||||||||
1239 | LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n")do { } while (false); | ||||||||
1240 | SUnit *SU = SchedImpl->pickNode(IsTopNode); | ||||||||
1241 | if (!SU) break; | ||||||||
1242 | |||||||||
1243 | assert(!SU->isScheduled && "Node already scheduled")((void)0); | ||||||||
1244 | if (!checkSchedLimit()) | ||||||||
1245 | break; | ||||||||
1246 | |||||||||
1247 | scheduleMI(SU, IsTopNode); | ||||||||
1248 | |||||||||
1249 | if (DFSResult) { | ||||||||
1250 | unsigned SubtreeID = DFSResult->getSubtreeID(SU); | ||||||||
1251 | if (!ScheduledTrees.test(SubtreeID)) { | ||||||||
1252 | ScheduledTrees.set(SubtreeID); | ||||||||
1253 | DFSResult->scheduleTree(SubtreeID); | ||||||||
1254 | SchedImpl->scheduleTree(SubtreeID); | ||||||||
1255 | } | ||||||||
1256 | } | ||||||||
1257 | |||||||||
1258 | // Notify the scheduling strategy after updating the DAG. | ||||||||
1259 | SchedImpl->schedNode(SU, IsTopNode); | ||||||||
1260 | |||||||||
1261 | updateQueues(SU, IsTopNode); | ||||||||
1262 | } | ||||||||
1263 | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")((void)0); | ||||||||
1264 | |||||||||
1265 | placeDebugValues(); | ||||||||
1266 | |||||||||
1267 | LLVM_DEBUG({do { } while (false) | ||||||||
1268 | dbgs() << "*** Final schedule for "do { } while (false) | ||||||||
1269 | << printMBBReference(*begin()->getParent()) << " ***\n";do { } while (false) | ||||||||
1270 | dumpSchedule();do { } while (false) | ||||||||
1271 | dbgs() << '\n';do { } while (false) | ||||||||
1272 | })do { } while (false); | ||||||||
1273 | } | ||||||||
1274 | |||||||||
1275 | /// Build the DAG and setup three register pressure trackers. | ||||||||
1276 | void ScheduleDAGMILive::buildDAGWithRegPressure() { | ||||||||
1277 | if (!ShouldTrackPressure) { | ||||||||
1278 | RPTracker.reset(); | ||||||||
1279 | RegionCriticalPSets.clear(); | ||||||||
1280 | buildSchedGraph(AA); | ||||||||
1281 | return; | ||||||||
1282 | } | ||||||||
1283 | |||||||||
1284 | // Initialize the register pressure tracker used by buildSchedGraph. | ||||||||
1285 | RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, | ||||||||
1286 | ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); | ||||||||
1287 | |||||||||
1288 | // Account for liveness generate by the region boundary. | ||||||||
1289 | if (LiveRegionEnd != RegionEnd) | ||||||||
1290 | RPTracker.recede(); | ||||||||
1291 | |||||||||
1292 | // Build the DAG, and compute current register pressure. | ||||||||
1293 | buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); | ||||||||
1294 | |||||||||
1295 | // Initialize top/bottom trackers after computing region pressure. | ||||||||
1296 | initRegPressure(); | ||||||||
1297 | } | ||||||||
1298 | |||||||||
1299 | void ScheduleDAGMILive::computeDFSResult() { | ||||||||
1300 | if (!DFSResult) | ||||||||
1301 | DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); | ||||||||
1302 | DFSResult->clear(); | ||||||||
1303 | ScheduledTrees.clear(); | ||||||||
1304 | DFSResult->resize(SUnits.size()); | ||||||||
1305 | DFSResult->compute(SUnits); | ||||||||
1306 | ScheduledTrees.resize(DFSResult->getNumSubtrees()); | ||||||||
1307 | } | ||||||||
1308 | |||||||||
1309 | /// Compute the max cyclic critical path through the DAG. The scheduling DAG | ||||||||
1310 | /// only provides the critical path for single block loops. To handle loops that | ||||||||
1311 | /// span blocks, we could use the vreg path latencies provided by | ||||||||
1312 | /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently | ||||||||
1313 | /// available for use in the scheduler. | ||||||||
1314 | /// | ||||||||
1315 | /// The cyclic path estimation identifies a def-use pair that crosses the back | ||||||||
1316 | /// edge and considers the depth and height of the nodes. For example, consider | ||||||||
1317 | /// the following instruction sequence where each instruction has unit latency | ||||||||
1318 | /// and defines an eponymous virtual register: | ||||||||
1319 | /// | ||||||||
1320 | /// a->b(a,c)->c(b)->d(c)->exit | ||||||||
1321 | /// | ||||||||
1322 | /// The cyclic critical path is a two cycles: b->c->b | ||||||||
1323 | /// The acyclic critical path is four cycles: a->b->c->d->exit | ||||||||
1324 | /// LiveOutHeight = height(c) = len(c->d->exit) = 2 | ||||||||
1325 | /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 | ||||||||
1326 | /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 | ||||||||
1327 | /// LiveInDepth = depth(b) = len(a->b) = 1 | ||||||||
1328 | /// | ||||||||
1329 | /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 | ||||||||
1330 | /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 | ||||||||
1331 | /// CyclicCriticalPath = min(2, 2) = 2 | ||||||||
1332 | /// | ||||||||
1333 | /// This could be relevant to PostRA scheduling, but is currently implemented | ||||||||
1334 | /// assuming LiveIntervals. | ||||||||
1335 | unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { | ||||||||
1336 | // This only applies to single block loop. | ||||||||
1337 | if (!BB->isSuccessor(BB)) | ||||||||
1338 | return 0; | ||||||||
1339 | |||||||||
1340 | unsigned MaxCyclicLatency = 0; | ||||||||
1341 | // Visit each live out vreg def to find def/use pairs that cross iterations. | ||||||||
1342 | for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { | ||||||||
1343 | Register Reg = P.RegUnit; | ||||||||
1344 | if (!Register::isVirtualRegister(Reg)) | ||||||||
1345 | continue; | ||||||||
1346 | const LiveInterval &LI = LIS->getInterval(Reg); | ||||||||
1347 | const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); | ||||||||
1348 | if (!DefVNI) | ||||||||
1349 | continue; | ||||||||
1350 | |||||||||
1351 | MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); | ||||||||
1352 | const SUnit *DefSU = getSUnit(DefMI); | ||||||||
1353 | if (!DefSU) | ||||||||
1354 | continue; | ||||||||
1355 | |||||||||
1356 | unsigned LiveOutHeight = DefSU->getHeight(); | ||||||||
1357 | unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; | ||||||||
1358 | // Visit all local users of the vreg def. | ||||||||
1359 | for (const VReg2SUnit &V2SU | ||||||||
1360 | : make_range(VRegUses.find(Reg), VRegUses.end())) { | ||||||||
1361 | SUnit *SU = V2SU.SU; | ||||||||
1362 | if (SU == &ExitSU) | ||||||||
1363 | continue; | ||||||||
1364 | |||||||||
1365 | // Only consider uses of the phi. | ||||||||
1366 | LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); | ||||||||
1367 | if (!LRQ.valueIn()->isPHIDef()) | ||||||||
1368 | continue; | ||||||||
1369 | |||||||||
1370 | // Assume that a path spanning two iterations is a cycle, which could | ||||||||
1371 | // overestimate in strange cases. This allows cyclic latency to be | ||||||||
1372 | // estimated as the minimum slack of the vreg's depth or height. | ||||||||
1373 | unsigned CyclicLatency = 0; | ||||||||
1374 | if (LiveOutDepth > SU->getDepth()) | ||||||||
1375 | CyclicLatency = LiveOutDepth - SU->getDepth(); | ||||||||
1376 | |||||||||
1377 | unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; | ||||||||
1378 | if (LiveInHeight > LiveOutHeight) { | ||||||||
1379 | if (LiveInHeight - LiveOutHeight < CyclicLatency) | ||||||||
1380 | CyclicLatency = LiveInHeight - LiveOutHeight; | ||||||||
1381 | } else | ||||||||
1382 | CyclicLatency = 0; | ||||||||
1383 | |||||||||
1384 | LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("do { } while (false) | ||||||||
1385 | << SU->NodeNum << ") = " << CyclicLatency << "c\n")do { } while (false); | ||||||||
1386 | if (CyclicLatency > MaxCyclicLatency) | ||||||||
1387 | MaxCyclicLatency = CyclicLatency; | ||||||||
1388 | } | ||||||||
1389 | } | ||||||||
1390 | LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n")do { } while (false); | ||||||||
1391 | return MaxCyclicLatency; | ||||||||
1392 | } | ||||||||
1393 | |||||||||
1394 | /// Release ExitSU predecessors and setup scheduler queues. Re-position | ||||||||
1395 | /// the Top RP tracker in case the region beginning has changed. | ||||||||
1396 | void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, | ||||||||
1397 | ArrayRef<SUnit*> BotRoots) { | ||||||||
1398 | ScheduleDAGMI::initQueues(TopRoots, BotRoots); | ||||||||
1399 | if (ShouldTrackPressure) { | ||||||||
1400 | assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")((void)0); | ||||||||
1401 | TopRPTracker.setPos(CurrentTop); | ||||||||
1402 | } | ||||||||
1403 | } | ||||||||
1404 | |||||||||
1405 | /// Move an instruction and update register pressure. | ||||||||
1406 | void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { | ||||||||
1407 | // Move the instruction to its new location in the instruction stream. | ||||||||
1408 | MachineInstr *MI = SU->getInstr(); | ||||||||
1409 | |||||||||
1410 | if (IsTopNode) { | ||||||||
1411 | assert(SU->isTopReady() && "node still has unscheduled dependencies")((void)0); | ||||||||
1412 | if (&*CurrentTop == MI) | ||||||||
1413 | CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); | ||||||||
1414 | else { | ||||||||
1415 | moveInstruction(MI, CurrentTop); | ||||||||
1416 | TopRPTracker.setPos(MI); | ||||||||
1417 | } | ||||||||
1418 | |||||||||
1419 | if (ShouldTrackPressure) { | ||||||||
1420 | // Update top scheduled pressure. | ||||||||
1421 | RegisterOperands RegOpers; | ||||||||
1422 | RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); | ||||||||
1423 | if (ShouldTrackLaneMasks) { | ||||||||
1424 | // Adjust liveness and add missing dead+read-undef flags. | ||||||||
1425 | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); | ||||||||
1426 | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); | ||||||||
1427 | } else { | ||||||||
1428 | // Adjust for missing dead-def flags. | ||||||||
1429 | RegOpers.detectDeadDefs(*MI, *LIS); | ||||||||
1430 | } | ||||||||
1431 | |||||||||
1432 | TopRPTracker.advance(RegOpers); | ||||||||
1433 | assert(TopRPTracker.getPos() == CurrentTop && "out of sync")((void)0); | ||||||||
1434 | LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(do { } while (false) | ||||||||
1435 | TopRPTracker.getRegSetPressureAtPos(), TRI);)do { } while (false); | ||||||||
1436 | |||||||||
1437 | updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); | ||||||||
1438 | } | ||||||||
1439 | } else { | ||||||||
1440 | assert(SU->isBottomReady() && "node still has unscheduled dependencies")((void)0); | ||||||||
1441 | MachineBasicBlock::iterator priorII = | ||||||||
1442 | priorNonDebug(CurrentBottom, CurrentTop); | ||||||||
1443 | if (&*priorII == MI) | ||||||||
1444 | CurrentBottom = priorII; | ||||||||
1445 | else { | ||||||||
1446 | if (&*CurrentTop == MI) { | ||||||||
1447 | CurrentTop = nextIfDebug(++CurrentTop, priorII); | ||||||||
1448 | TopRPTracker.setPos(CurrentTop); | ||||||||
1449 | } | ||||||||
1450 | moveInstruction(MI, CurrentBottom); | ||||||||
1451 | CurrentBottom = MI; | ||||||||
1452 | BotRPTracker.setPos(CurrentBottom); | ||||||||
1453 | } | ||||||||
1454 | if (ShouldTrackPressure) { | ||||||||
1455 | RegisterOperands RegOpers; | ||||||||
1456 | RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); | ||||||||
1457 | if (ShouldTrackLaneMasks) { | ||||||||
1458 | // Adjust liveness and add missing dead+read-undef flags. | ||||||||
1459 | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); | ||||||||
1460 | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); | ||||||||
1461 | } else { | ||||||||
1462 | // Adjust for missing dead-def flags. | ||||||||
1463 | RegOpers.detectDeadDefs(*MI, *LIS); | ||||||||
1464 | } | ||||||||
1465 | |||||||||
1466 | if (BotRPTracker.getPos() != CurrentBottom) | ||||||||
1467 | BotRPTracker.recedeSkipDebugValues(); | ||||||||
1468 | SmallVector<RegisterMaskPair, 8> LiveUses; | ||||||||
1469 | BotRPTracker.recede(RegOpers, &LiveUses); | ||||||||
1470 | assert(BotRPTracker.getPos() == CurrentBottom && "out of sync")((void)0); | ||||||||
1471 | LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(do { } while (false) | ||||||||
1472 | BotRPTracker.getRegSetPressureAtPos(), TRI);)do { } while (false); | ||||||||
1473 | |||||||||
1474 | updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); | ||||||||
1475 | updatePressureDiffs(LiveUses); | ||||||||
1476 | } | ||||||||
1477 | } | ||||||||
1478 | } | ||||||||
1479 | |||||||||
1480 | //===----------------------------------------------------------------------===// | ||||||||
1481 | // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. | ||||||||
1482 | //===----------------------------------------------------------------------===// | ||||||||
1483 | |||||||||
1484 | namespace { | ||||||||
1485 | |||||||||
1486 | /// Post-process the DAG to create cluster edges between neighboring | ||||||||
1487 | /// loads or between neighboring stores. | ||||||||
1488 | class BaseMemOpClusterMutation : public ScheduleDAGMutation { | ||||||||
1489 | struct MemOpInfo { | ||||||||
1490 | SUnit *SU; | ||||||||
1491 | SmallVector<const MachineOperand *, 4> BaseOps; | ||||||||
1492 | int64_t Offset; | ||||||||
1493 | unsigned Width; | ||||||||
1494 | |||||||||
1495 | MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, | ||||||||
1496 | int64_t Offset, unsigned Width) | ||||||||
1497 | : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), | ||||||||
1498 | Width(Width) {} | ||||||||
1499 | |||||||||
1500 | static bool Compare(const MachineOperand *const &A, | ||||||||
1501 | const MachineOperand *const &B) { | ||||||||
1502 | if (A->getType() != B->getType()) | ||||||||
1503 | return A->getType() < B->getType(); | ||||||||
1504 | if (A->isReg()) | ||||||||
1505 | return A->getReg() < B->getReg(); | ||||||||
1506 | if (A->isFI()) { | ||||||||
1507 | const MachineFunction &MF = *A->getParent()->getParent()->getParent(); | ||||||||
1508 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); | ||||||||
1509 | bool StackGrowsDown = TFI.getStackGrowthDirection() == | ||||||||
1510 | TargetFrameLowering::StackGrowsDown; | ||||||||
1511 | return StackGrowsDown ? A->getIndex() > B->getIndex() | ||||||||
1512 | : A->getIndex() < B->getIndex(); | ||||||||
1513 | } | ||||||||
1514 | |||||||||
1515 | llvm_unreachable("MemOpClusterMutation only supports register or frame "__builtin_unreachable() | ||||||||
1516 | "index bases.")__builtin_unreachable(); | ||||||||
1517 | } | ||||||||
1518 | |||||||||
1519 | bool operator<(const MemOpInfo &RHS) const { | ||||||||
1520 | // FIXME: Don't compare everything twice. Maybe use C++20 three way | ||||||||
1521 | // comparison instead when it's available. | ||||||||
1522 | if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), | ||||||||
1523 | RHS.BaseOps.begin(), RHS.BaseOps.end(), | ||||||||
1524 | Compare)) | ||||||||
1525 | return true; | ||||||||
1526 | if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), | ||||||||
1527 | BaseOps.begin(), BaseOps.end(), Compare)) | ||||||||
1528 | return false; | ||||||||
1529 | if (Offset != RHS.Offset) | ||||||||
1530 | return Offset < RHS.Offset; | ||||||||
1531 | return SU->NodeNum < RHS.SU->NodeNum; | ||||||||
1532 | } | ||||||||
1533 | }; | ||||||||
1534 | |||||||||
1535 | const TargetInstrInfo *TII; | ||||||||
1536 | const TargetRegisterInfo *TRI; | ||||||||
1537 | bool IsLoad; | ||||||||
1538 | |||||||||
1539 | public: | ||||||||
1540 | BaseMemOpClusterMutation(const TargetInstrInfo *tii, | ||||||||
1541 | const TargetRegisterInfo *tri, bool IsLoad) | ||||||||
1542 | : TII(tii), TRI(tri), IsLoad(IsLoad) {} | ||||||||
1543 | |||||||||
1544 | void apply(ScheduleDAGInstrs *DAGInstrs) override; | ||||||||
1545 | |||||||||
1546 | protected: | ||||||||
1547 | void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster, | ||||||||
1548 | ScheduleDAGInstrs *DAG); | ||||||||
1549 | void collectMemOpRecords(std::vector<SUnit> &SUnits, | ||||||||
1550 | SmallVectorImpl<MemOpInfo> &MemOpRecords); | ||||||||
1551 | bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, | ||||||||
1552 | DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups); | ||||||||
1553 | }; | ||||||||
1554 | |||||||||
1555 | class StoreClusterMutation : public BaseMemOpClusterMutation { | ||||||||
1556 | public: | ||||||||
1557 | StoreClusterMutation(const TargetInstrInfo *tii, | ||||||||
1558 | const TargetRegisterInfo *tri) | ||||||||
1559 | : BaseMemOpClusterMutation(tii, tri, false) {} | ||||||||
1560 | }; | ||||||||
1561 | |||||||||
1562 | class LoadClusterMutation : public BaseMemOpClusterMutation { | ||||||||
1563 | public: | ||||||||
1564 | LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) | ||||||||
1565 | : BaseMemOpClusterMutation(tii, tri, true) {} | ||||||||
1566 | }; | ||||||||
1567 | |||||||||
1568 | } // end anonymous namespace | ||||||||
1569 | |||||||||
1570 | namespace llvm { | ||||||||
1571 | |||||||||
1572 | std::unique_ptr<ScheduleDAGMutation> | ||||||||
1573 | createLoadClusterDAGMutation(const TargetInstrInfo *TII, | ||||||||
1574 | const TargetRegisterInfo *TRI) { | ||||||||
1575 | return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI) | ||||||||
1576 | : nullptr; | ||||||||
1577 | } | ||||||||
1578 | |||||||||
1579 | std::unique_ptr<ScheduleDAGMutation> | ||||||||
1580 | createStoreClusterDAGMutation(const TargetInstrInfo *TII, | ||||||||
1581 | const TargetRegisterInfo *TRI) { | ||||||||
1582 | return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI) | ||||||||
1583 | : nullptr; | ||||||||
1584 | } | ||||||||
1585 | |||||||||
1586 | } // end namespace llvm | ||||||||
1587 | |||||||||
1588 | // Sorting all the loads/stores first, then for each load/store, checking the | ||||||||
1589 | // following load/store one by one, until reach the first non-dependent one and | ||||||||
1590 | // call target hook to see if they can cluster. | ||||||||
1591 | // If FastCluster is enabled, we assume that, all the loads/stores have been | ||||||||
1592 | // preprocessed and now, they didn't have dependencies on each other. | ||||||||
1593 | void BaseMemOpClusterMutation::clusterNeighboringMemOps( | ||||||||
1594 | ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster, | ||||||||
1595 | ScheduleDAGInstrs *DAG) { | ||||||||
1596 | // Keep track of the current cluster length and bytes for each SUnit. | ||||||||
1597 | DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo; | ||||||||
1598 | |||||||||
1599 | // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to | ||||||||
1600 | // cluster mem ops collected within `MemOpRecords` array. | ||||||||
1601 | for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { | ||||||||
1602 | // Decision to cluster mem ops is taken based on target dependent logic | ||||||||
1603 | auto MemOpa = MemOpRecords[Idx]; | ||||||||
1604 | |||||||||
1605 | // Seek for the next load/store to do the cluster. | ||||||||
1606 | unsigned NextIdx = Idx + 1; | ||||||||
1607 | for (; NextIdx < End; ++NextIdx) | ||||||||
1608 | // Skip if MemOpb has been clustered already or has dependency with | ||||||||
1609 | // MemOpa. | ||||||||
1610 | if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && | ||||||||
1611 | (FastCluster || | ||||||||
1612 | (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && | ||||||||
1613 | !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)))) | ||||||||
1614 | break; | ||||||||
1615 | if (NextIdx == End) | ||||||||
1616 | continue; | ||||||||
1617 | |||||||||
1618 | auto MemOpb = MemOpRecords[NextIdx]; | ||||||||
1619 | unsigned ClusterLength = 2; | ||||||||
1620 | unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; | ||||||||
1621 | if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { | ||||||||
1622 | ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; | ||||||||
1623 | CurrentClusterBytes = | ||||||||
1624 | SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; | ||||||||
1625 | } | ||||||||
1626 | |||||||||
1627 | if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, | ||||||||
1628 | CurrentClusterBytes)) | ||||||||
1629 | continue; | ||||||||
1630 | |||||||||
1631 | SUnit *SUa = MemOpa.SU; | ||||||||
1632 | SUnit *SUb = MemOpb.SU; | ||||||||
1633 | if (SUa->NodeNum > SUb->NodeNum) | ||||||||
1634 | std::swap(SUa, SUb); | ||||||||
1635 | |||||||||
1636 | // FIXME: Is this check really required? | ||||||||
1637 | if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) | ||||||||
1638 | continue; | ||||||||
1639 | |||||||||
1640 | LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("do { } while (false) | ||||||||
1641 | << SUb->NodeNum << ")\n")do { } while (false); | ||||||||
1642 | ++NumClustered; | ||||||||
1643 | |||||||||
1644 | if (IsLoad) { | ||||||||
1645 | // Copy successor edges from SUa to SUb. Interleaving computation | ||||||||
1646 | // dependent on SUa can prevent load combining due to register reuse. | ||||||||
1647 | // Predecessor edges do not need to be copied from SUb to SUa since | ||||||||
1648 | // nearby loads should have effectively the same inputs. | ||||||||
1649 | for (const SDep &Succ : SUa->Succs) { | ||||||||
1650 | if (Succ.getSUnit() == SUb) | ||||||||
1651 | continue; | ||||||||
1652 | LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNumdo { } while (false) | ||||||||
1653 | << ")\n")do { } while (false); | ||||||||
1654 | DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); | ||||||||
1655 | } | ||||||||
1656 | } else { | ||||||||
1657 | // Copy predecessor edges from SUb to SUa to avoid the SUnits that | ||||||||
1658 | // SUb dependent on scheduled in-between SUb and SUa. Successor edges | ||||||||
1659 | // do not need to be copied from SUa to SUb since no one will depend | ||||||||
1660 | // on stores. | ||||||||
1661 | // Notice that, we don't need to care about the memory dependency as | ||||||||
1662 | // we won't try to cluster them if they have any memory dependency. | ||||||||
1663 | for (const SDep &Pred : SUb->Preds) { | ||||||||
1664 | if (Pred.getSUnit() == SUa) | ||||||||
1665 | continue; | ||||||||
1666 | LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNumdo { } while (false) | ||||||||
1667 | << ")\n")do { } while (false); | ||||||||
1668 | DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); | ||||||||
1669 | } | ||||||||
1670 | } | ||||||||
1671 | |||||||||
1672 | SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength, | ||||||||
1673 | CurrentClusterBytes}; | ||||||||
1674 | |||||||||
1675 | LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLengthdo { } while (false) | ||||||||
1676 | << ", Curr cluster bytes: " << CurrentClusterBytesdo { } while (false) | ||||||||
1677 | << "\n")do { } while (false); | ||||||||
1678 | } | ||||||||
1679 | } | ||||||||
1680 | |||||||||
1681 | void BaseMemOpClusterMutation::collectMemOpRecords( | ||||||||
1682 | std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) { | ||||||||
1683 | for (auto &SU : SUnits) { | ||||||||
1684 | if ((IsLoad && !SU.getInstr()->mayLoad()) || | ||||||||
1685 | (!IsLoad && !SU.getInstr()->mayStore())) | ||||||||
1686 | continue; | ||||||||
1687 | |||||||||
1688 | const MachineInstr &MI = *SU.getInstr(); | ||||||||
1689 | SmallVector<const MachineOperand *, 4> BaseOps; | ||||||||
1690 | int64_t Offset; | ||||||||
1691 | bool OffsetIsScalable; | ||||||||
1692 | unsigned Width; | ||||||||
1693 | if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, | ||||||||
1694 | OffsetIsScalable, Width, TRI)) { | ||||||||
1695 | MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); | ||||||||
1696 | |||||||||
1697 | LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "do { } while (false) | ||||||||
1698 | << Offset << ", OffsetIsScalable: " << OffsetIsScalabledo { } while (false) | ||||||||
1699 | << ", Width: " << Width << "\n")do { } while (false); | ||||||||
1700 | } | ||||||||
1701 | #ifndef NDEBUG1 | ||||||||
1702 | for (auto *Op : BaseOps) | ||||||||
1703 | assert(Op)((void)0); | ||||||||
1704 | #endif | ||||||||
1705 | } | ||||||||
1706 | } | ||||||||
1707 | |||||||||
1708 | bool BaseMemOpClusterMutation::groupMemOps( | ||||||||
1709 | ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, | ||||||||
1710 | DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) { | ||||||||
1711 | bool FastCluster = | ||||||||
1712 | ForceFastCluster || | ||||||||
1713 | MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold; | ||||||||
1714 | |||||||||
1715 | for (const auto &MemOp : MemOps) { | ||||||||
1716 | unsigned ChainPredID = DAG->SUnits.size(); | ||||||||
1717 | if (FastCluster) { | ||||||||
1718 | for (const SDep &Pred : MemOp.SU->Preds) { | ||||||||
1719 | // We only want to cluster the mem ops that have the same ctrl(non-data) | ||||||||
1720 | // pred so that they didn't have ctrl dependency for each other. But for | ||||||||
1721 | // store instrs, we can still cluster them if the pred is load instr. | ||||||||
1722 | if ((Pred.isCtrl() && | ||||||||
1723 | (IsLoad || | ||||||||
1724 | (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && | ||||||||
1725 | !Pred.isArtificial()) { | ||||||||
1726 | ChainPredID = Pred.getSUnit()->NodeNum; | ||||||||
1727 | break; | ||||||||
1728 | } | ||||||||
1729 | } | ||||||||
1730 | } else | ||||||||
1731 | ChainPredID = 0; | ||||||||
1732 | |||||||||
1733 | Groups[ChainPredID].push_back(MemOp); | ||||||||
1734 | } | ||||||||
1735 | return FastCluster; | ||||||||
1736 | } | ||||||||
1737 | |||||||||
1738 | /// Callback from DAG postProcessing to create cluster edges for loads/stores. | ||||||||
1739 | void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { | ||||||||
1740 | // Collect all the clusterable loads/stores | ||||||||
1741 | SmallVector<MemOpInfo, 32> MemOpRecords; | ||||||||
1742 | collectMemOpRecords(DAG->SUnits, MemOpRecords); | ||||||||
1743 | |||||||||
1744 | if (MemOpRecords.size() < 2) | ||||||||
1745 | return; | ||||||||
1746 | |||||||||
1747 | // Put the loads/stores without dependency into the same group with some | ||||||||
1748 | // heuristic if the DAG is too complex to avoid compiling time blow up. | ||||||||
1749 | // Notice that, some fusion pair could be lost with this. | ||||||||
1750 | DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups; | ||||||||
1751 | bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups); | ||||||||
1752 | |||||||||
1753 | for (auto &Group : Groups) { | ||||||||
1754 | // Sorting the loads/stores, so that, we can stop the cluster as early as | ||||||||
1755 | // possible. | ||||||||
1756 | llvm::sort(Group.second); | ||||||||
1757 | |||||||||
1758 | // Trying to cluster all the neighboring loads/stores. | ||||||||
1759 | clusterNeighboringMemOps(Group.second, FastCluster, DAG); | ||||||||
1760 | } | ||||||||
1761 | } | ||||||||
1762 | |||||||||
1763 | //===----------------------------------------------------------------------===// | ||||||||
1764 | // CopyConstrain - DAG post-processing to encourage copy elimination. | ||||||||
1765 | //===----------------------------------------------------------------------===// | ||||||||
1766 | |||||||||
1767 | namespace { | ||||||||
1768 | |||||||||
1769 | /// Post-process the DAG to create weak edges from all uses of a copy to | ||||||||
1770 | /// the one use that defines the copy's source vreg, most likely an induction | ||||||||
1771 | /// variable increment. | ||||||||
1772 | class CopyConstrain : public ScheduleDAGMutation { | ||||||||
1773 | // Transient state. | ||||||||
1774 | SlotIndex RegionBeginIdx; | ||||||||
1775 | |||||||||
1776 | // RegionEndIdx is the slot index of the last non-debug instruction in the | ||||||||
1777 | // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. | ||||||||
1778 | SlotIndex RegionEndIdx; | ||||||||
1779 | |||||||||
1780 | public: | ||||||||
1781 | CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} | ||||||||
1782 | |||||||||
1783 | void apply(ScheduleDAGInstrs *DAGInstrs) override; | ||||||||
1784 | |||||||||
1785 | protected: | ||||||||
1786 | void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); | ||||||||
1787 | }; | ||||||||
1788 | |||||||||
1789 | } // end anonymous namespace | ||||||||
1790 | |||||||||
1791 | namespace llvm { | ||||||||
1792 | |||||||||
1793 | std::unique_ptr<ScheduleDAGMutation> | ||||||||
1794 | createCopyConstrainDAGMutation(const TargetInstrInfo *TII, | ||||||||
1795 | const TargetRegisterInfo *TRI) { | ||||||||
1796 | return std::make_unique<CopyConstrain>(TII, TRI); | ||||||||
1797 | } | ||||||||
1798 | |||||||||
1799 | } // end namespace llvm | ||||||||
1800 | |||||||||
1801 | /// constrainLocalCopy handles two possibilities: | ||||||||
1802 | /// 1) Local src: | ||||||||
1803 | /// I0: = dst | ||||||||
1804 | /// I1: src = ... | ||||||||
1805 | /// I2: = dst | ||||||||
1806 | /// I3: dst = src (copy) | ||||||||
1807 | /// (create pred->succ edges I0->I1, I2->I1) | ||||||||
1808 | /// | ||||||||
1809 | /// 2) Local copy: | ||||||||
1810 | /// I0: dst = src (copy) | ||||||||
1811 | /// I1: = dst | ||||||||
1812 | /// I2: src = ... | ||||||||
1813 | /// I3: = dst | ||||||||
1814 | /// (create pred->succ edges I1->I2, I3->I2) | ||||||||
1815 | /// | ||||||||
1816 | /// Although the MachineScheduler is currently constrained to single blocks, | ||||||||
1817 | /// this algorithm should handle extended blocks. An EBB is a set of | ||||||||
1818 | /// contiguously numbered blocks such that the previous block in the EBB is | ||||||||
1819 | /// always the single predecessor. | ||||||||
1820 | void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { | ||||||||
1821 | LiveIntervals *LIS = DAG->getLIS(); | ||||||||
1822 | MachineInstr *Copy = CopySU->getInstr(); | ||||||||
1823 | |||||||||
1824 | // Check for pure vreg copies. | ||||||||
1825 | const MachineOperand &SrcOp = Copy->getOperand(1); | ||||||||
1826 | Register SrcReg = SrcOp.getReg(); | ||||||||
1827 | if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) | ||||||||
1828 | return; | ||||||||
1829 | |||||||||
1830 | const MachineOperand &DstOp = Copy->getOperand(0); | ||||||||
1831 | Register DstReg = DstOp.getReg(); | ||||||||
1832 | if (!Register::isVirtualRegister(DstReg) || DstOp.isDead()) | ||||||||
1833 | return; | ||||||||
1834 | |||||||||
1835 | // Check if either the dest or source is local. If it's live across a back | ||||||||
1836 | // edge, it's not local. Note that if both vregs are live across the back | ||||||||
1837 | // edge, we cannot successfully contrain the copy without cyclic scheduling. | ||||||||
1838 | // If both the copy's source and dest are local live intervals, then we | ||||||||
1839 | // should treat the dest as the global for the purpose of adding | ||||||||
1840 | // constraints. This adds edges from source's other uses to the copy. | ||||||||
1841 | unsigned LocalReg = SrcReg; | ||||||||
1842 | unsigned GlobalReg = DstReg; | ||||||||
1843 | LiveInterval *LocalLI = &LIS->getInterval(LocalReg); | ||||||||
1844 | if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { | ||||||||
1845 | LocalReg = DstReg; | ||||||||
1846 | GlobalReg = SrcReg; | ||||||||
1847 | LocalLI = &LIS->getInterval(LocalReg); | ||||||||
1848 | if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) | ||||||||
1849 | return; | ||||||||
1850 | } | ||||||||
1851 | LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); | ||||||||
1852 | |||||||||
1853 | // Find the global segment after the start of the local LI. | ||||||||
1854 | LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); | ||||||||
1855 | // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a | ||||||||
1856 | // local live range. We could create edges from other global uses to the local | ||||||||
1857 | // start, but the coalescer should have already eliminated these cases, so | ||||||||
1858 | // don't bother dealing with it. | ||||||||
1859 | if (GlobalSegment == GlobalLI->end()) | ||||||||
1860 | return; | ||||||||
1861 | |||||||||
1862 | // If GlobalSegment is killed at the LocalLI->start, the call to find() | ||||||||
1863 | // returned the next global segment. But if GlobalSegment overlaps with | ||||||||
1864 | // LocalLI->start, then advance to the next segment. If a hole in GlobalLI | ||||||||
1865 | // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. | ||||||||
1866 | if (GlobalSegment->contains(LocalLI->beginIndex())) | ||||||||
1867 | ++GlobalSegment; | ||||||||
1868 | |||||||||
1869 | if (GlobalSegment == GlobalLI->end()) | ||||||||
1870 | return; | ||||||||
1871 | |||||||||
1872 | // Check if GlobalLI contains a hole in the vicinity of LocalLI. | ||||||||
1873 | if (GlobalSegment != GlobalLI->begin()) { | ||||||||
1874 | // Two address defs have no hole. | ||||||||
1875 | if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, | ||||||||
1876 | GlobalSegment->start)) { | ||||||||
1877 | return; | ||||||||
1878 | } | ||||||||
1879 | // If the prior global segment may be defined by the same two-address | ||||||||
1880 | // instruction that also defines LocalLI, then can't make a hole here. | ||||||||
1881 | if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, | ||||||||
1882 | LocalLI->beginIndex())) { | ||||||||
1883 | return; | ||||||||
1884 | } | ||||||||
1885 | // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise | ||||||||
1886 | // it would be a disconnected component in the live range. | ||||||||
1887 | assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&((void)0) | ||||||||
1888 | "Disconnected LRG within the scheduling region.")((void)0); | ||||||||
1889 | } | ||||||||
1890 | MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); | ||||||||
1891 | if (!GlobalDef) | ||||||||
1892 | return; | ||||||||
1893 | |||||||||
1894 | SUnit *GlobalSU = DAG->getSUnit(GlobalDef); | ||||||||
1895 | if (!GlobalSU) | ||||||||
1896 | return; | ||||||||
1897 | |||||||||
1898 | // GlobalDef is the bottom of the GlobalLI hole. Open the hole by | ||||||||
1899 | // constraining the uses of the last local def to precede GlobalDef. | ||||||||
1900 | SmallVector<SUnit*,8> LocalUses; | ||||||||
1901 | const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); | ||||||||
1902 | MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); | ||||||||
1903 | SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); | ||||||||
1904 | for (const SDep &Succ : LastLocalSU->Succs) { | ||||||||
1905 | if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) | ||||||||
1906 | continue; | ||||||||
1907 | if (Succ.getSUnit() == GlobalSU) | ||||||||
1908 | continue; | ||||||||
1909 | if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) | ||||||||
1910 | return; | ||||||||
1911 | LocalUses.push_back(Succ.getSUnit()); | ||||||||
1912 | } | ||||||||
1913 | // Open the top of the GlobalLI hole by constraining any earlier global uses | ||||||||
1914 | // to precede the start of LocalLI. | ||||||||
1915 | SmallVector<SUnit*,8> GlobalUses; | ||||||||
1916 | MachineInstr *FirstLocalDef = | ||||||||
1917 | LIS->getInstructionFromIndex(LocalLI->beginIndex()); | ||||||||
1918 | SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); | ||||||||
1919 | for (const SDep &Pred : GlobalSU->Preds) { | ||||||||
1920 | if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) | ||||||||
1921 | continue; | ||||||||
1922 | if (Pred.getSUnit() == FirstLocalSU) | ||||||||
1923 | continue; | ||||||||
1924 | if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) | ||||||||
1925 | return; | ||||||||
1926 | GlobalUses.push_back(Pred.getSUnit()); | ||||||||
1927 | } | ||||||||
1928 | LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n")do { } while (false); | ||||||||
1929 | // Add the weak edges. | ||||||||
1930 | for (SUnit *LU : LocalUses) { | ||||||||
1931 | LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("do { } while (false) | ||||||||
1932 | << GlobalSU->NodeNum << ")\n")do { } while (false); | ||||||||
1933 | DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak)); | ||||||||
1934 | } | ||||||||
1935 | for (SUnit *GU : GlobalUses) { | ||||||||
1936 | LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("do { } while (false) | ||||||||
1937 | << FirstLocalSU->NodeNum << ")\n")do { } while (false); | ||||||||
1938 | DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak)); | ||||||||
1939 | } | ||||||||
1940 | } | ||||||||
1941 | |||||||||
1942 | /// Callback from DAG postProcessing to create weak edges to encourage | ||||||||
1943 | /// copy elimination. | ||||||||
1944 | void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { | ||||||||
1945 | ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); | ||||||||
1946 | assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals")((void)0); | ||||||||
1947 | |||||||||
1948 | MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); | ||||||||
1949 | if (FirstPos == DAG->end()) | ||||||||
1950 | return; | ||||||||
1951 | RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); | ||||||||
1952 | RegionEndIdx = DAG->getLIS()->getInstructionIndex( | ||||||||
1953 | *priorNonDebug(DAG->end(), DAG->begin())); | ||||||||
1954 | |||||||||
1955 | for (SUnit &SU : DAG->SUnits) { | ||||||||
1956 | if (!SU.getInstr()->isCopy()) | ||||||||
1957 | continue; | ||||||||
1958 | |||||||||
1959 | constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); | ||||||||
1960 | } | ||||||||
1961 | } | ||||||||
1962 | |||||||||
1963 | //===----------------------------------------------------------------------===// | ||||||||
1964 | // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler | ||||||||
1965 | // and possibly other custom schedulers. | ||||||||
1966 | //===----------------------------------------------------------------------===// | ||||||||
1967 | |||||||||
1968 | static const unsigned InvalidCycle = ~0U; | ||||||||
1969 | |||||||||
1970 | SchedBoundary::~SchedBoundary() { delete HazardRec; } | ||||||||
1971 | |||||||||
1972 | /// Given a Count of resource usage and a Latency value, return true if a | ||||||||
1973 | /// SchedBoundary becomes resource limited. | ||||||||
1974 | /// If we are checking after scheduling a node, we should return true when | ||||||||
1975 | /// we just reach the resource limit. | ||||||||
1976 | static bool checkResourceLimit(unsigned LFactor, unsigned Count, | ||||||||
1977 | unsigned Latency, bool AfterSchedNode) { | ||||||||
1978 | int ResCntFactor = (int)(Count - (Latency * LFactor)); | ||||||||
1979 | if (AfterSchedNode) | ||||||||
1980 | return ResCntFactor >= (int)LFactor; | ||||||||
1981 | else | ||||||||
1982 | return ResCntFactor > (int)LFactor; | ||||||||
1983 | } | ||||||||
1984 | |||||||||
1985 | void SchedBoundary::reset() { | ||||||||
1986 | // A new HazardRec is created for each DAG and owned by SchedBoundary. | ||||||||
1987 | // Destroying and reconstructing it is very expensive though. So keep | ||||||||
1988 | // invalid, placeholder HazardRecs. | ||||||||
1989 | if (HazardRec && HazardRec->isEnabled()) { | ||||||||
1990 | delete HazardRec; | ||||||||
1991 | HazardRec = nullptr; | ||||||||
1992 | } | ||||||||
1993 | Available.clear(); | ||||||||
1994 | Pending.clear(); | ||||||||
1995 | CheckPending = false; | ||||||||
1996 | CurrCycle = 0; | ||||||||
1997 | CurrMOps = 0; | ||||||||
1998 | MinReadyCycle = std::numeric_limits<unsigned>::max(); | ||||||||
1999 | ExpectedLatency = 0; | ||||||||
2000 | DependentLatency = 0; | ||||||||
2001 | RetiredMOps = 0; | ||||||||
2002 | MaxExecutedResCount = 0; | ||||||||
2003 | ZoneCritResIdx = 0; | ||||||||
2004 | IsResourceLimited = false; | ||||||||
2005 | ReservedCycles.clear(); | ||||||||
2006 | ReservedCyclesIndex.clear(); | ||||||||
2007 | ResourceGroupSubUnitMasks.clear(); | ||||||||
2008 | #ifndef NDEBUG1 | ||||||||
2009 | // Track the maximum number of stall cycles that could arise either from the | ||||||||
2010 | // latency of a DAG edge or the number of cycles that a processor resource is | ||||||||
2011 | // reserved (SchedBoundary::ReservedCycles). | ||||||||
2012 | MaxObservedStall = 0; | ||||||||
2013 | #endif | ||||||||
2014 | // Reserve a zero-count for invalid CritResIdx. | ||||||||
2015 | ExecutedResCounts.resize(1); | ||||||||
2016 | assert(!ExecutedResCounts[0] && "nonzero count for bad resource")((void)0); | ||||||||
2017 | } | ||||||||
2018 | |||||||||
2019 | void SchedRemainder:: | ||||||||
2020 | init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { | ||||||||
2021 | reset(); | ||||||||
2022 | if (!SchedModel->hasInstrSchedModel()) | ||||||||
2023 | return; | ||||||||
2024 | RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); | ||||||||
2025 | for (SUnit &SU : DAG->SUnits) { | ||||||||
2026 | const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); | ||||||||
2027 | RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) | ||||||||
2028 | * SchedModel->getMicroOpFactor(); | ||||||||
2029 | for (TargetSchedModel::ProcResIter | ||||||||
2030 | PI = SchedModel->getWriteProcResBegin(SC), | ||||||||
2031 | PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | ||||||||
2032 | unsigned PIdx = PI->ProcResourceIdx; | ||||||||
2033 | unsigned Factor = SchedModel->getResourceFactor(PIdx); | ||||||||
2034 | RemainingCounts[PIdx] += (Factor * PI->Cycles); | ||||||||
2035 | } | ||||||||
2036 | } | ||||||||
2037 | } | ||||||||
2038 | |||||||||
2039 | void SchedBoundary:: | ||||||||
2040 | init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { | ||||||||
2041 | reset(); | ||||||||
2042 | DAG = dag; | ||||||||
2043 | SchedModel = smodel; | ||||||||
2044 | Rem = rem; | ||||||||
2045 | if (SchedModel->hasInstrSchedModel()) { | ||||||||
2046 | unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); | ||||||||
2047 | ReservedCyclesIndex.resize(ResourceCount); | ||||||||
2048 | ExecutedResCounts.resize(ResourceCount); | ||||||||
2049 | ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0)); | ||||||||
2050 | unsigned NumUnits = 0; | ||||||||
2051 | |||||||||
2052 | for (unsigned i = 0; i < ResourceCount; ++i) { | ||||||||
2053 | ReservedCyclesIndex[i] = NumUnits; | ||||||||
2054 | NumUnits += SchedModel->getProcResource(i)->NumUnits; | ||||||||
2055 | if (isUnbufferedGroup(i)) { | ||||||||
2056 | auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; | ||||||||
2057 | for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; | ||||||||
2058 | U != UE; ++U) | ||||||||
2059 | ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]); | ||||||||
2060 | } | ||||||||
2061 | } | ||||||||
2062 | |||||||||
2063 | ReservedCycles.resize(NumUnits, InvalidCycle); | ||||||||
2064 | } | ||||||||
2065 | } | ||||||||
2066 | |||||||||
2067 | /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat | ||||||||
2068 | /// these "soft stalls" differently than the hard stall cycles based on CPU | ||||||||
2069 | /// resources and computed by checkHazard(). A fully in-order model | ||||||||
2070 | /// (MicroOpBufferSize==0) will not make use of this since instructions are not | ||||||||
2071 | /// available for scheduling until they are ready. However, a weaker in-order | ||||||||
2072 | /// model may use this for heuristics. For example, if a processor has in-order | ||||||||
2073 | /// behavior when reading certain resources, this may come into play. | ||||||||
2074 | unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { | ||||||||
2075 | if (!SU->isUnbuffered) | ||||||||
2076 | return 0; | ||||||||
2077 | |||||||||
2078 | unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); | ||||||||
2079 | if (ReadyCycle > CurrCycle) | ||||||||
2080 | return ReadyCycle - CurrCycle; | ||||||||
2081 | return 0; | ||||||||
2082 | } | ||||||||
2083 | |||||||||
2084 | /// Compute the next cycle at which the given processor resource unit | ||||||||
2085 | /// can be scheduled. | ||||||||
2086 | unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, | ||||||||
2087 | unsigned Cycles) { | ||||||||
2088 | unsigned NextUnreserved = ReservedCycles[InstanceIdx]; | ||||||||
2089 | // If this resource has never been used, always return cycle zero. | ||||||||
2090 | if (NextUnreserved == InvalidCycle) | ||||||||
2091 | return 0; | ||||||||
2092 | // For bottom-up scheduling add the cycles needed for the current operation. | ||||||||
2093 | if (!isTop()) | ||||||||
2094 | NextUnreserved += Cycles; | ||||||||
2095 | return NextUnreserved; | ||||||||
2096 | } | ||||||||
2097 | |||||||||
2098 | /// Compute the next cycle at which the given processor resource can be | ||||||||
2099 | /// scheduled. Returns the next cycle and the index of the processor resource | ||||||||
2100 | /// instance in the reserved cycles vector. | ||||||||
2101 | std::pair<unsigned, unsigned> | ||||||||
2102 | SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, | ||||||||
2103 | unsigned Cycles) { | ||||||||
2104 | |||||||||
2105 | unsigned MinNextUnreserved = InvalidCycle; | ||||||||
2106 | unsigned InstanceIdx = 0; | ||||||||
2107 | unsigned StartIndex = ReservedCyclesIndex[PIdx]; | ||||||||
2108 | unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; | ||||||||
2109 | assert(NumberOfInstances > 0 &&((void)0) | ||||||||
2110 | "Cannot have zero instances of a ProcResource")((void)0); | ||||||||
2111 | |||||||||
2112 | if (isUnbufferedGroup(PIdx)) { | ||||||||
2113 | // If any subunits are used by the instruction, report that the resource | ||||||||
2114 | // group is available at 0, effectively removing the group record from | ||||||||
2115 | // hazarding and basing the hazarding decisions on the subunit records. | ||||||||
2116 | // Otherwise, choose the first available instance from among the subunits. | ||||||||
2117 | // Specifications which assign cycles to both the subunits and the group or | ||||||||
2118 | // which use an unbuffered group with buffered subunits will appear to | ||||||||
2119 | // schedule strangely. In the first case, the additional cycles for the | ||||||||
2120 | // group will be ignored. In the second, the group will be ignored | ||||||||
2121 | // entirely. | ||||||||
2122 | for (const MCWriteProcResEntry &PE : | ||||||||
2123 | make_range(SchedModel->getWriteProcResBegin(SC), | ||||||||
2124 | SchedModel->getWriteProcResEnd(SC))) | ||||||||
2125 | if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx]) | ||||||||
2126 | return std::make_pair(0u, StartIndex); | ||||||||
2127 | |||||||||
2128 | auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; | ||||||||
2129 | for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { | ||||||||
2130 | unsigned NextUnreserved, NextInstanceIdx; | ||||||||
2131 | std::tie(NextUnreserved, NextInstanceIdx) = | ||||||||
2132 | getNextResourceCycle(SC, SubUnits[I], Cycles); | ||||||||
2133 | if (MinNextUnreserved > NextUnreserved) { | ||||||||
2134 | InstanceIdx = NextInstanceIdx; | ||||||||
2135 | MinNextUnreserved = NextUnreserved; | ||||||||
2136 | } | ||||||||
2137 | } | ||||||||
2138 | return std::make_pair(MinNextUnreserved, InstanceIdx); | ||||||||
2139 | } | ||||||||
2140 | |||||||||
2141 | for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; | ||||||||
2142 | ++I) { | ||||||||
2143 | unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); | ||||||||
2144 | if (MinNextUnreserved > NextUnreserved) { | ||||||||
2145 | InstanceIdx = I; | ||||||||
2146 | MinNextUnreserved = NextUnreserved; | ||||||||
2147 | } | ||||||||
2148 | } | ||||||||
2149 | return std::make_pair(MinNextUnreserved, InstanceIdx); | ||||||||
2150 | } | ||||||||
2151 | |||||||||
2152 | /// Does this SU have a hazard within the current instruction group. | ||||||||
2153 | /// | ||||||||
2154 | /// The scheduler supports two modes of hazard recognition. The first is the | ||||||||
2155 | /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that | ||||||||
2156 | /// supports highly complicated in-order reservation tables | ||||||||
2157 | /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. | ||||||||
2158 | /// | ||||||||
2159 | /// The second is a streamlined mechanism that checks for hazards based on | ||||||||
2160 | /// simple counters that the scheduler itself maintains. It explicitly checks | ||||||||
2161 | /// for instruction dispatch limitations, including the number of micro-ops that | ||||||||
2162 | /// can dispatch per cycle. | ||||||||
2163 | /// | ||||||||
2164 | /// TODO: Also check whether the SU must start a new group. | ||||||||
2165 | bool SchedBoundary::checkHazard(SUnit *SU) { | ||||||||
2166 | if (HazardRec->isEnabled() | ||||||||
2167 | && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { | ||||||||
2168 | return true; | ||||||||
2169 | } | ||||||||
2170 | |||||||||
2171 | unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); | ||||||||
2172 | if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { | ||||||||
2173 | LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="do { } while (false) | ||||||||
2174 | << SchedModel->getNumMicroOps(SU->getInstr()) << '\n')do { } while (false); | ||||||||
2175 | return true; | ||||||||
2176 | } | ||||||||
2177 | |||||||||
2178 | if (CurrMOps > 0 && | ||||||||
2179 | ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || | ||||||||
2180 | (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { | ||||||||
2181 | LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "do { } while (false) | ||||||||
2182 | << (isTop() ? "begin" : "end") << " group\n")do { } while (false); | ||||||||
2183 | return true; | ||||||||
2184 | } | ||||||||
2185 | |||||||||
2186 | if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { | ||||||||
2187 | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | ||||||||
2188 | for (const MCWriteProcResEntry &PE : | ||||||||
2189 | make_range(SchedModel->getWriteProcResBegin(SC), | ||||||||
2190 | SchedModel->getWriteProcResEnd(SC))) { | ||||||||
2191 | unsigned ResIdx = PE.ProcResourceIdx; | ||||||||
2192 | unsigned Cycles = PE.Cycles; | ||||||||
2193 | unsigned NRCycle, InstanceIdx; | ||||||||
2194 | std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); | ||||||||
2195 | if (NRCycle > CurrCycle) { | ||||||||
2196 | #ifndef NDEBUG1 | ||||||||
2197 | MaxObservedStall = std::max(Cycles, MaxObservedStall); | ||||||||
2198 | #endif | ||||||||
2199 | LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "do { } while (false) | ||||||||
2200 | << SchedModel->getResourceName(ResIdx)do { } while (false) | ||||||||
2201 | << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'do { } while (false) | ||||||||
2202 | << "=" << NRCycle << "c\n")do { } while (false); | ||||||||
2203 | return true; | ||||||||
2204 | } | ||||||||
2205 | } | ||||||||
2206 | } | ||||||||
2207 | return false; | ||||||||
2208 | } | ||||||||
2209 | |||||||||
2210 | // Find the unscheduled node in ReadySUs with the highest latency. | ||||||||
2211 | unsigned SchedBoundary:: | ||||||||
2212 | findMaxLatency(ArrayRef<SUnit*> ReadySUs) { | ||||||||
2213 | SUnit *LateSU = nullptr; | ||||||||
2214 | unsigned RemLatency = 0; | ||||||||
2215 | for (SUnit *SU : ReadySUs) { | ||||||||
2216 | unsigned L = getUnscheduledLatency(SU); | ||||||||
2217 | if (L > RemLatency) { | ||||||||
2218 | RemLatency = L; | ||||||||
2219 | LateSU = SU; | ||||||||
2220 | } | ||||||||
2221 | } | ||||||||
2222 | if (LateSU) { | ||||||||
2223 | LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("do { } while (false) | ||||||||
2224 | << LateSU->NodeNum << ") " << RemLatency << "c\n")do { } while (false); | ||||||||
2225 | } | ||||||||
2226 | return RemLatency; | ||||||||
2227 | } | ||||||||
2228 | |||||||||
2229 | // Count resources in this zone and the remaining unscheduled | ||||||||
2230 | // instruction. Return the max count, scaled. Set OtherCritIdx to the critical | ||||||||
2231 | // resource index, or zero if the zone is issue limited. | ||||||||
2232 | unsigned SchedBoundary:: | ||||||||
2233 | getOtherResourceCount(unsigned &OtherCritIdx) { | ||||||||
2234 | OtherCritIdx = 0; | ||||||||
2235 | if (!SchedModel->hasInstrSchedModel()) | ||||||||
2236 | return 0; | ||||||||
2237 | |||||||||
2238 | unsigned OtherCritCount = Rem->RemIssueCount | ||||||||
2239 | + (RetiredMOps * SchedModel->getMicroOpFactor()); | ||||||||
2240 | LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "do { } while (false) | ||||||||
2241 | << OtherCritCount / SchedModel->getMicroOpFactor() << '\n')do { } while (false); | ||||||||
2242 | for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); | ||||||||
2243 | PIdx != PEnd; ++PIdx) { | ||||||||
2244 | unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; | ||||||||
2245 | if (OtherCount > OtherCritCount) { | ||||||||
2246 | OtherCritCount = OtherCount; | ||||||||
2247 | OtherCritIdx = PIdx; | ||||||||
2248 | } | ||||||||
2249 | } | ||||||||
2250 | if (OtherCritIdx) { | ||||||||
2251 | LLVM_DEBUG(do { } while (false) | ||||||||
2252 | dbgs() << " " << Available.getName() << " + Remain CritRes: "do { } while (false) | ||||||||
2253 | << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)do { } while (false) | ||||||||
2254 | << " " << SchedModel->getResourceName(OtherCritIdx) << "\n")do { } while (false); | ||||||||
2255 | } | ||||||||
2256 | return OtherCritCount; | ||||||||
2257 | } | ||||||||
2258 | |||||||||
2259 | void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, | ||||||||
2260 | unsigned Idx) { | ||||||||
2261 | assert(SU->getInstr() && "Scheduled SUnit must have instr")((void)0); | ||||||||
2262 | |||||||||
2263 | #ifndef NDEBUG1 | ||||||||
2264 | // ReadyCycle was been bumped up to the CurrCycle when this node was | ||||||||
2265 | // scheduled, but CurrCycle may have been eagerly advanced immediately after | ||||||||
2266 | // scheduling, so may now be greater than ReadyCycle. | ||||||||
2267 | if (ReadyCycle > CurrCycle) | ||||||||
2268 | MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); | ||||||||
2269 | #endif | ||||||||
2270 | |||||||||
2271 | if (ReadyCycle < MinReadyCycle) | ||||||||
2272 | MinReadyCycle = ReadyCycle; | ||||||||
2273 | |||||||||
2274 | // Check for interlocks first. For the purpose of other heuristics, an | ||||||||
2275 | // instruction that cannot issue appears as if it's not in the ReadyQueue. | ||||||||
2276 | bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; | ||||||||
2277 | bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) || | ||||||||
2278 | checkHazard(SU) || (Available.size() >= ReadyListLimit); | ||||||||
2279 | |||||||||
2280 | if (!HazardDetected) { | ||||||||
2281 | Available.push(SU); | ||||||||
2282 | |||||||||
2283 | if (InPQueue) | ||||||||
2284 | Pending.remove(Pending.begin() + Idx); | ||||||||
2285 | return; | ||||||||
2286 | } | ||||||||
2287 | |||||||||
2288 | if (!InPQueue) | ||||||||
2289 | Pending.push(SU); | ||||||||
2290 | } | ||||||||
2291 | |||||||||
2292 | /// Move the boundary of scheduled code by one cycle. | ||||||||
2293 | void SchedBoundary::bumpCycle(unsigned NextCycle) { | ||||||||
2294 | if (SchedModel->getMicroOpBufferSize() == 0) { | ||||||||
2295 | assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&((void)0) | ||||||||
2296 | "MinReadyCycle uninitialized")((void)0); | ||||||||
2297 | if (MinReadyCycle > NextCycle) | ||||||||
2298 | NextCycle = MinReadyCycle; | ||||||||
2299 | } | ||||||||
2300 | // Update the current micro-ops, which will issue in the next cycle. | ||||||||
2301 | unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); | ||||||||
2302 | CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; | ||||||||
2303 | |||||||||
2304 | // Decrement DependentLatency based on the next cycle. | ||||||||
2305 | if ((NextCycle - CurrCycle) > DependentLatency) | ||||||||
2306 | DependentLatency = 0; | ||||||||
2307 | else | ||||||||
2308 | DependentLatency -= (NextCycle - CurrCycle); | ||||||||
2309 | |||||||||
2310 | if (!HazardRec->isEnabled()) { | ||||||||
2311 | // Bypass HazardRec virtual calls. | ||||||||
2312 | CurrCycle = NextCycle; | ||||||||
2313 | } else { | ||||||||
2314 | // Bypass getHazardType calls in case of long latency. | ||||||||
2315 | for (; CurrCycle != NextCycle; ++CurrCycle) { | ||||||||
2316 | if (isTop()) | ||||||||
2317 | HazardRec->AdvanceCycle(); | ||||||||
2318 | else | ||||||||
2319 | HazardRec->RecedeCycle(); | ||||||||
2320 | } | ||||||||
2321 | } | ||||||||
2322 | CheckPending = true; | ||||||||
2323 | IsResourceLimited = | ||||||||
2324 | checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), | ||||||||
2325 | getScheduledLatency(), true); | ||||||||
2326 | |||||||||
2327 | LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()do { } while (false) | ||||||||
2328 | << '\n')do { } while (false); | ||||||||
2329 | } | ||||||||
2330 | |||||||||
2331 | void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { | ||||||||
2332 | ExecutedResCounts[PIdx] += Count; | ||||||||
2333 | if (ExecutedResCounts[PIdx] > MaxExecutedResCount) | ||||||||
2334 | MaxExecutedResCount = ExecutedResCounts[PIdx]; | ||||||||
2335 | } | ||||||||
2336 | |||||||||
2337 | /// Add the given processor resource to this scheduled zone. | ||||||||
2338 | /// | ||||||||
2339 | /// \param Cycles indicates the number of consecutive (non-pipelined) cycles | ||||||||
2340 | /// during which this resource is consumed. | ||||||||
2341 | /// | ||||||||
2342 | /// \return the next cycle at which the instruction may execute without | ||||||||
2343 | /// oversubscribing resources. | ||||||||
2344 | unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, | ||||||||
2345 | unsigned Cycles, unsigned NextCycle) { | ||||||||
2346 | unsigned Factor = SchedModel->getResourceFactor(PIdx); | ||||||||
2347 | unsigned Count = Factor * Cycles; | ||||||||
2348 | LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"do { } while (false) | ||||||||
2349 | << Cycles << "x" << Factor << "u\n")do { } while (false); | ||||||||
2350 | |||||||||
2351 | // Update Executed resources counts. | ||||||||
2352 | incExecutedResources(PIdx, Count); | ||||||||
2353 | assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted")((void)0); | ||||||||
2354 | Rem->RemainingCounts[PIdx] -= Count; | ||||||||
2355 | |||||||||
2356 | // Check if this resource exceeds the current critical resource. If so, it | ||||||||
2357 | // becomes the critical resource. | ||||||||
2358 | if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { | ||||||||
2359 | ZoneCritResIdx = PIdx; | ||||||||
2360 | LLVM_DEBUG(dbgs() << " *** Critical resource "do { } while (false) | ||||||||
2361 | << SchedModel->getResourceName(PIdx) << ": "do { } while (false) | ||||||||
2362 | << getResourceCount(PIdx) / SchedModel->getLatencyFactor()do { } while (false) | ||||||||
2363 | << "c\n")do { } while (false); | ||||||||
2364 | } | ||||||||
2365 | // For reserved resources, record the highest cycle using the resource. | ||||||||
2366 | unsigned NextAvailable, InstanceIdx; | ||||||||
2367 | std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles); | ||||||||
2368 | if (NextAvailable > CurrCycle) { | ||||||||
2369 | LLVM_DEBUG(dbgs() << " Resource conflict: "do { } while (false) | ||||||||
2370 | << SchedModel->getResourceName(PIdx)do { } while (false) | ||||||||
2371 | << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'do { } while (false) | ||||||||
2372 | << " reserved until @" << NextAvailable << "\n")do { } while (false); | ||||||||
2373 | } | ||||||||
2374 | return NextAvailable; | ||||||||
2375 | } | ||||||||
2376 | |||||||||
2377 | /// Move the boundary of scheduled code by one SUnit. | ||||||||
2378 | void SchedBoundary::bumpNode(SUnit *SU) { | ||||||||
2379 | // Update the reservation table. | ||||||||
2380 | if (HazardRec->isEnabled()) { | ||||||||
2381 | if (!isTop() && SU->isCall) { | ||||||||
2382 | // Calls are scheduled with their preceding instructions. For bottom-up | ||||||||
2383 | // scheduling, clear the pipeline state before emitting. | ||||||||
2384 | HazardRec->Reset(); | ||||||||
2385 | } | ||||||||
2386 | HazardRec->EmitInstruction(SU); | ||||||||
2387 | // Scheduling an instruction may have made pending instructions available. | ||||||||
2388 | CheckPending = true; | ||||||||
2389 | } | ||||||||
2390 | // checkHazard should prevent scheduling multiple instructions per cycle that | ||||||||
2391 | // exceed the issue width. | ||||||||
2392 | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | ||||||||
2393 | unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); | ||||||||
2394 | assert(((void)0) | ||||||||
2395 | (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&((void)0) | ||||||||
2396 | "Cannot schedule this instruction's MicroOps in the current cycle.")((void)0); | ||||||||
2397 | |||||||||
2398 | unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); | ||||||||
2399 | LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n")do { } while (false); | ||||||||
2400 | |||||||||
2401 | unsigned NextCycle = CurrCycle; | ||||||||
2402 | switch (SchedModel->getMicroOpBufferSize()) { | ||||||||
2403 | case 0: | ||||||||
2404 | assert(ReadyCycle <= CurrCycle && "Broken PendingQueue")((void)0); | ||||||||
2405 | break; | ||||||||
2406 | case 1: | ||||||||
2407 | if (ReadyCycle > NextCycle) { | ||||||||
2408 | NextCycle = ReadyCycle; | ||||||||
2409 | LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n")do { } while (false); | ||||||||
2410 | } | ||||||||
2411 | break; | ||||||||
2412 | default: | ||||||||
2413 | // We don't currently model the OOO reorder buffer, so consider all | ||||||||
2414 | // scheduled MOps to be "retired". We do loosely model in-order resource | ||||||||
2415 | // latency. If this instruction uses an in-order resource, account for any | ||||||||
2416 | // likely stall cycles. | ||||||||
2417 | if (SU->isUnbuffered && ReadyCycle > NextCycle) | ||||||||
2418 | NextCycle = ReadyCycle; | ||||||||
2419 | break; | ||||||||
2420 | } | ||||||||
2421 | RetiredMOps += IncMOps; | ||||||||
2422 | |||||||||
2423 | // Update resource counts and critical resource. | ||||||||
2424 | if (SchedModel->hasInstrSchedModel()) { | ||||||||
2425 | unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); | ||||||||
2426 | assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted")((void)0); | ||||||||
2427 | Rem->RemIssueCount -= DecRemIssue; | ||||||||
2428 | if (ZoneCritResIdx) { | ||||||||
2429 | // Scale scheduled micro-ops for comparing with the critical resource. | ||||||||
2430 | unsigned ScaledMOps = | ||||||||
2431 | RetiredMOps * SchedModel->getMicroOpFactor(); | ||||||||
2432 | |||||||||
2433 | // If scaled micro-ops are now more than the previous critical resource by | ||||||||
2434 | // a full cycle, then micro-ops issue becomes critical. | ||||||||
2435 | if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) | ||||||||
2436 | >= (int)SchedModel->getLatencyFactor()) { | ||||||||
2437 | ZoneCritResIdx = 0; | ||||||||
2438 | LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "do { } while (false) | ||||||||
2439 | << ScaledMOps / SchedModel->getLatencyFactor()do { } while (false) | ||||||||
2440 | << "c\n")do { } while (false); | ||||||||
2441 | } | ||||||||
2442 | } | ||||||||
2443 | for (TargetSchedModel::ProcResIter | ||||||||
2444 | PI = SchedModel->getWriteProcResBegin(SC), | ||||||||
2445 | PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | ||||||||
2446 | unsigned RCycle = | ||||||||
2447 | countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle); | ||||||||
2448 | if (RCycle > NextCycle) | ||||||||
2449 | NextCycle = RCycle; | ||||||||
2450 | } | ||||||||
2451 | if (SU->hasReservedResource) { | ||||||||
2452 | // For reserved resources, record the highest cycle using the resource. | ||||||||
2453 | // For top-down scheduling, this is the cycle in which we schedule this | ||||||||
2454 | // instruction plus the number of cycles the operations reserves the | ||||||||
2455 | // resource. For bottom-up is it simply the instruction's cycle. | ||||||||
2456 | for (TargetSchedModel::ProcResIter | ||||||||
2457 | PI = SchedModel->getWriteProcResBegin(SC), | ||||||||
2458 | PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | ||||||||
2459 | unsigned PIdx = PI->ProcResourceIdx; | ||||||||
2460 | if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { | ||||||||
2461 | unsigned ReservedUntil, InstanceIdx; | ||||||||
2462 | std::tie(ReservedUntil, InstanceIdx) = | ||||||||
2463 | getNextResourceCycle(SC, PIdx, 0); | ||||||||
2464 | if (isTop()) { | ||||||||
2465 | ReservedCycles[InstanceIdx] = | ||||||||
2466 | std::max(ReservedUntil, NextCycle + PI->Cycles); | ||||||||
2467 | } else | ||||||||
2468 | ReservedCycles[InstanceIdx] = NextCycle; | ||||||||
2469 | } | ||||||||
2470 | } | ||||||||
2471 | } | ||||||||
2472 | } | ||||||||
2473 | // Update ExpectedLatency and DependentLatency. | ||||||||
2474 | unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; | ||||||||
2475 | unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; | ||||||||
2476 | if (SU->getDepth() > TopLatency) { | ||||||||
2477 | TopLatency = SU->getDepth(); | ||||||||
2478 | LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("do { } while (false) | ||||||||
2479 | << SU->NodeNum << ") " << TopLatency << "c\n")do { } while (false); | ||||||||
2480 | } | ||||||||
2481 | if (SU->getHeight() > BotLatency) { | ||||||||
2482 | BotLatency = SU->getHeight(); | ||||||||
2483 | LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("do { } while (false) | ||||||||
2484 | << SU->NodeNum << ") " << BotLatency << "c\n")do { } while (false); | ||||||||
2485 | } | ||||||||
2486 | // If we stall for any reason, bump the cycle. | ||||||||
2487 | if (NextCycle > CurrCycle) | ||||||||
2488 | bumpCycle(NextCycle); | ||||||||
2489 | else | ||||||||
2490 | // After updating ZoneCritResIdx and ExpectedLatency, check if we're | ||||||||
2491 | // resource limited. If a stall occurred, bumpCycle does this. | ||||||||
2492 | IsResourceLimited = | ||||||||
2493 | checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), | ||||||||
2494 | getScheduledLatency(), true); | ||||||||
2495 | |||||||||
2496 | // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle | ||||||||
2497 | // resets CurrMOps. Loop to handle instructions with more MOps than issue in | ||||||||
2498 | // one cycle. Since we commonly reach the max MOps here, opportunistically | ||||||||
2499 | // bump the cycle to avoid uselessly checking everything in the readyQ. | ||||||||
2500 | CurrMOps += IncMOps; | ||||||||
2501 | |||||||||
2502 | // Bump the cycle count for issue group constraints. | ||||||||
2503 | // This must be done after NextCycle has been adjust for all other stalls. | ||||||||
2504 | // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set | ||||||||
2505 | // currCycle to X. | ||||||||
2506 | if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || | ||||||||
2507 | (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { | ||||||||
2508 | LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")do { } while (false) | ||||||||
2509 | << " group\n")do { } while (false); | ||||||||
2510 | bumpCycle(++NextCycle); | ||||||||
2511 | } | ||||||||
2512 | |||||||||
2513 | while (CurrMOps >= SchedModel->getIssueWidth()) { | ||||||||
2514 | LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "do { } while (false) | ||||||||
2515 | << CurrCycle << '\n')do { } while (false); | ||||||||
2516 | bumpCycle(++NextCycle); | ||||||||
2517 | } | ||||||||
2518 | LLVM_DEBUG(dumpScheduledState())do { } while (false); | ||||||||
2519 | } | ||||||||
2520 | |||||||||
2521 | /// Release pending ready nodes in to the available queue. This makes them | ||||||||
2522 | /// visible to heuristics. | ||||||||
2523 | void SchedBoundary::releasePending() { | ||||||||
2524 | // If the available queue is empty, it is safe to reset MinReadyCycle. | ||||||||
2525 | if (Available.empty()) | ||||||||
2526 | MinReadyCycle = std::numeric_limits<unsigned>::max(); | ||||||||
2527 | |||||||||
2528 | // Check to see if any of the pending instructions are ready to issue. If | ||||||||
2529 | // so, add them to the available queue. | ||||||||
2530 | for (unsigned I = 0, E = Pending.size(); I < E; ++I) { | ||||||||
2531 | SUnit *SU = *(Pending.begin() + I); | ||||||||
2532 | unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; | ||||||||
2533 | |||||||||
2534 | if (ReadyCycle < MinReadyCycle) | ||||||||
2535 | MinReadyCycle = ReadyCycle; | ||||||||
2536 | |||||||||
2537 | if (Available.size() >= ReadyListLimit) | ||||||||
2538 | break; | ||||||||
2539 | |||||||||
2540 | releaseNode(SU, ReadyCycle, true, I); | ||||||||
2541 | if (E != Pending.size()) { | ||||||||
2542 | --I; | ||||||||
2543 | --E; | ||||||||
2544 | } | ||||||||
2545 | } | ||||||||
2546 | CheckPending = false; | ||||||||
2547 | } | ||||||||
2548 | |||||||||
2549 | /// Remove SU from the ready set for this boundary. | ||||||||
2550 | void SchedBoundary::removeReady(SUnit *SU) { | ||||||||
2551 | if (Available.isInQueue(SU)) | ||||||||
2552 | Available.remove(Available.find(SU)); | ||||||||
2553 | else { | ||||||||
2554 | assert(Pending.isInQueue(SU) && "bad ready count")((void)0); | ||||||||
2555 | Pending.remove(Pending.find(SU)); | ||||||||
2556 | } | ||||||||
2557 | } | ||||||||
2558 | |||||||||
2559 | /// If this queue only has one ready candidate, return it. As a side effect, | ||||||||
2560 | /// defer any nodes that now hit a hazard, and advance the cycle until at least | ||||||||
2561 | /// one node is ready. If multiple instructions are ready, return NULL. | ||||||||
2562 | SUnit *SchedBoundary::pickOnlyChoice() { | ||||||||
2563 | if (CheckPending) | ||||||||
2564 | releasePending(); | ||||||||
2565 | |||||||||
2566 | // Defer any ready instrs that now have a hazard. | ||||||||
2567 | for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { | ||||||||
2568 | if (checkHazard(*I)) { | ||||||||
2569 | Pending.push(*I); | ||||||||
2570 | I = Available.remove(I); | ||||||||
2571 | continue; | ||||||||
2572 | } | ||||||||
2573 | ++I; | ||||||||
2574 | } | ||||||||
2575 | for (unsigned i = 0; Available.empty(); ++i) { | ||||||||
2576 | // FIXME: Re-enable assert once PR20057 is resolved. | ||||||||
2577 | // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && | ||||||||
2578 | // "permanent hazard"); | ||||||||
2579 | (void)i; | ||||||||
2580 | bumpCycle(CurrCycle + 1); | ||||||||
2581 | releasePending(); | ||||||||
2582 | } | ||||||||
2583 | |||||||||
2584 | LLVM_DEBUG(Pending.dump())do { } while (false); | ||||||||
2585 | LLVM_DEBUG(Available.dump())do { } while (false); | ||||||||
2586 | |||||||||
2587 | if (Available.size() == 1) | ||||||||
2588 | return *Available.begin(); | ||||||||
2589 | return nullptr; | ||||||||
2590 | } | ||||||||
2591 | |||||||||
2592 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | ||||||||
2593 | // This is useful information to dump after bumpNode. | ||||||||
2594 | // Note that the Queue contents are more useful before pickNodeFromQueue. | ||||||||
2595 | LLVM_DUMP_METHOD__attribute__((noinline)) void SchedBoundary::dumpScheduledState() const { | ||||||||
2596 | unsigned ResFactor; | ||||||||
2597 | unsigned ResCount; | ||||||||
2598 | if (ZoneCritResIdx) { | ||||||||
2599 | ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); | ||||||||
2600 | ResCount = getResourceCount(ZoneCritResIdx); | ||||||||
2601 | } else { | ||||||||
2602 | ResFactor = SchedModel->getMicroOpFactor(); | ||||||||
2603 | ResCount = RetiredMOps * ResFactor; | ||||||||
2604 | } | ||||||||
2605 | unsigned LFactor = SchedModel->getLatencyFactor(); | ||||||||
2606 | dbgs() << Available.getName() << " @" << CurrCycle << "c\n" | ||||||||
2607 | << " Retired: " << RetiredMOps; | ||||||||
2608 | dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; | ||||||||
2609 | dbgs() << "\n Critical: " << ResCount / LFactor << "c, " | ||||||||
2610 | << ResCount / ResFactor << " " | ||||||||
2611 | << SchedModel->getResourceName(ZoneCritResIdx) | ||||||||
2612 | << "\n ExpectedLatency: " << ExpectedLatency << "c\n" | ||||||||
2613 | << (IsResourceLimited ? " - Resource" : " - Latency") | ||||||||
2614 | << " limited.\n"; | ||||||||
2615 | } | ||||||||
2616 | #endif | ||||||||
2617 | |||||||||
2618 | //===----------------------------------------------------------------------===// | ||||||||
2619 | // GenericScheduler - Generic implementation of MachineSchedStrategy. | ||||||||
2620 | //===----------------------------------------------------------------------===// | ||||||||
2621 | |||||||||
2622 | void GenericSchedulerBase::SchedCandidate:: | ||||||||
2623 | initResourceDelta(const ScheduleDAGMI *DAG, | ||||||||
2624 | const TargetSchedModel *SchedModel) { | ||||||||
2625 | if (!Policy.ReduceResIdx && !Policy.DemandResIdx) | ||||||||
2626 | return; | ||||||||
2627 | |||||||||
2628 | const MCSchedClassDesc *SC = DAG->getSchedClass(SU); | ||||||||
2629 | for (TargetSchedModel::ProcResIter | ||||||||
2630 | PI = SchedModel->getWriteProcResBegin(SC), | ||||||||
2631 | PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { | ||||||||
2632 | if (PI->ProcResourceIdx == Policy.ReduceResIdx) | ||||||||
2633 | ResDelta.CritResources += PI->Cycles; | ||||||||
2634 | if (PI->ProcResourceIdx == Policy.DemandResIdx) | ||||||||
2635 | ResDelta.DemandedResources += PI->Cycles; | ||||||||
2636 | } | ||||||||
2637 | } | ||||||||
2638 | |||||||||
2639 | /// Compute remaining latency. We need this both to determine whether the | ||||||||
2640 | /// overall schedule has become latency-limited and whether the instructions | ||||||||
2641 | /// outside this zone are resource or latency limited. | ||||||||
2642 | /// | ||||||||
2643 | /// The "dependent" latency is updated incrementally during scheduling as the | ||||||||
2644 | /// max height/depth of scheduled nodes minus the cycles since it was | ||||||||
2645 | /// scheduled: | ||||||||
2646 | /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone | ||||||||
2647 | /// | ||||||||
2648 | /// The "independent" latency is the max ready queue depth: | ||||||||
2649 | /// ILat = max N.depth for N in Available|Pending | ||||||||
2650 | /// | ||||||||
2651 | /// RemainingLatency is the greater of independent and dependent latency. | ||||||||
2652 | /// | ||||||||
2653 | /// These computations are expensive, especially in DAGs with many edges, so | ||||||||
2654 | /// only do them if necessary. | ||||||||
2655 | static unsigned computeRemLatency(SchedBoundary &CurrZone) { | ||||||||
2656 | unsigned RemLatency = CurrZone.getDependentLatency(); | ||||||||
2657 | RemLatency = std::max(RemLatency, | ||||||||
2658 | CurrZone.findMaxLatency(CurrZone.Available.elements())); | ||||||||
2659 | RemLatency = std::max(RemLatency, | ||||||||
2660 | CurrZone.findMaxLatency(CurrZone.Pending.elements())); | ||||||||
2661 | return RemLatency; | ||||||||
2662 | } | ||||||||
2663 | |||||||||
2664 | /// Returns true if the current cycle plus remaning latency is greater than | ||||||||
2665 | /// the critical path in the scheduling region. | ||||||||
2666 | bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy, | ||||||||
2667 | SchedBoundary &CurrZone, | ||||||||
2668 | bool ComputeRemLatency, | ||||||||
2669 | unsigned &RemLatency) const { | ||||||||
2670 | // The current cycle is already greater than the critical path, so we are | ||||||||
2671 | // already latency limited and don't need to compute the remaining latency. | ||||||||
2672 | if (CurrZone.getCurrCycle() > Rem.CriticalPath) | ||||||||
2673 | return true; | ||||||||
2674 | |||||||||
2675 | // If we haven't scheduled anything yet, then we aren't latency limited. | ||||||||
2676 | if (CurrZone.getCurrCycle() == 0) | ||||||||
2677 | return false; | ||||||||
2678 | |||||||||
2679 | if (ComputeRemLatency) | ||||||||
2680 | RemLatency = computeRemLatency(CurrZone); | ||||||||
2681 | |||||||||
2682 | return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath; | ||||||||
2683 | } | ||||||||
2684 | |||||||||
2685 | /// Set the CandPolicy given a scheduling zone given the current resources and | ||||||||
2686 | /// latencies inside and outside the zone. | ||||||||
2687 | void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, | ||||||||
2688 | SchedBoundary &CurrZone, | ||||||||
2689 | SchedBoundary *OtherZone) { | ||||||||
2690 | // Apply preemptive heuristics based on the total latency and resources | ||||||||
2691 | // inside and outside this zone. Potential stalls should be considered before | ||||||||
2692 | // following this policy. | ||||||||
2693 | |||||||||
2694 | // Compute the critical resource outside the zone. | ||||||||
2695 | unsigned OtherCritIdx = 0; | ||||||||
2696 | unsigned OtherCount = | ||||||||
2697 | OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; | ||||||||
2698 | |||||||||
2699 | bool OtherResLimited = false; | ||||||||
2700 | unsigned RemLatency = 0; | ||||||||
2701 | bool RemLatencyComputed = false; | ||||||||
2702 | if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { | ||||||||
2703 | RemLatency = computeRemLatency(CurrZone); | ||||||||
2704 | RemLatencyComputed = true; | ||||||||
2705 | OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), | ||||||||
2706 | OtherCount, RemLatency, false); | ||||||||
2707 | } | ||||||||
2708 | |||||||||
2709 | // Schedule aggressively for latency in PostRA mode. We don't check for | ||||||||
2710 | // acyclic latency during PostRA, and highly out-of-order processors will | ||||||||
2711 | // skip PostRA scheduling. | ||||||||
2712 | if (!OtherResLimited && | ||||||||
2713 | (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed, | ||||||||
2714 | RemLatency))) { | ||||||||
2715 | Policy.ReduceLatency |= true; | ||||||||
2716 | LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()do { } while (false) | ||||||||
2717 | << " RemainingLatency " << RemLatency << " + "do { } while (false) | ||||||||
2718 | << CurrZone.getCurrCycle() << "c > CritPath "do { } while (false) | ||||||||
2719 | << Rem.CriticalPath << "\n")do { } while (false); | ||||||||
2720 | } | ||||||||
2721 | // If the same resource is limiting inside and outside the zone, do nothing. | ||||||||
2722 | if (CurrZone.getZoneCritResIdx() == OtherCritIdx) | ||||||||
2723 | return; | ||||||||
2724 | |||||||||
2725 | LLVM_DEBUG(if (CurrZone.isResourceLimited()) {do { } while (false) | ||||||||
2726 | dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "do { } while (false) | ||||||||
2727 | << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";do { } while (false) | ||||||||
2728 | } if (OtherResLimited) dbgs()do { } while (false) | ||||||||
2729 | << " RemainingLimit: "do { } while (false) | ||||||||
2730 | << SchedModel->getResourceName(OtherCritIdx) << "\n";do { } while (false) | ||||||||
2731 | if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()do { } while (false) | ||||||||
2732 | << " Latency limited both directions.\n")do { } while (false); | ||||||||
2733 | |||||||||
2734 | if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) | ||||||||
2735 | Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); | ||||||||
2736 | |||||||||
2737 | if (OtherResLimited) | ||||||||
2738 | Policy.DemandResIdx = OtherCritIdx; | ||||||||
2739 | } | ||||||||
2740 | |||||||||
2741 | #ifndef NDEBUG1 | ||||||||
2742 | const char *GenericSchedulerBase::getReasonStr( | ||||||||
2743 | GenericSchedulerBase::CandReason Reason) { | ||||||||
2744 | switch (Reason) { | ||||||||
2745 | case NoCand: return "NOCAND "; | ||||||||
2746 | case Only1: return "ONLY1 "; | ||||||||
2747 | case PhysReg: return "PHYS-REG "; | ||||||||
2748 | case RegExcess: return "REG-EXCESS"; | ||||||||
2749 | case RegCritical: return "REG-CRIT "; | ||||||||
2750 | case Stall: return "STALL "; | ||||||||
2751 | case Cluster: return "CLUSTER "; | ||||||||
2752 | case Weak: return "WEAK "; | ||||||||
2753 | case RegMax: return "REG-MAX "; | ||||||||
2754 | case ResourceReduce: return "RES-REDUCE"; | ||||||||
2755 | case ResourceDemand: return "RES-DEMAND"; | ||||||||
2756 | case TopDepthReduce: return "TOP-DEPTH "; | ||||||||
2757 | case TopPathReduce: return "TOP-PATH "; | ||||||||
2758 | case BotHeightReduce:return "BOT-HEIGHT"; | ||||||||
2759 | case BotPathReduce: return "BOT-PATH "; | ||||||||
2760 | case NextDefUse: return "DEF-USE "; | ||||||||
2761 | case NodeOrder: return "ORDER "; | ||||||||
2762 | }; | ||||||||
2763 | llvm_unreachable("Unknown reason!")__builtin_unreachable(); | ||||||||
2764 | } | ||||||||
2765 | |||||||||
2766 | void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { | ||||||||
2767 | PressureChange P; | ||||||||
2768 | unsigned ResIdx = 0; | ||||||||
2769 | unsigned Latency = 0; | ||||||||
2770 | switch (Cand.Reason) { | ||||||||
2771 | default: | ||||||||
2772 | break; | ||||||||
2773 | case RegExcess: | ||||||||
2774 | P = Cand.RPDelta.Excess; | ||||||||
2775 | break; | ||||||||
2776 | case RegCritical: | ||||||||
2777 | P = Cand.RPDelta.CriticalMax; | ||||||||
2778 | break; | ||||||||
2779 | case RegMax: | ||||||||
2780 | P = Cand.RPDelta.CurrentMax; | ||||||||
2781 | break; | ||||||||
2782 | case ResourceReduce: | ||||||||
2783 | ResIdx = Cand.Policy.ReduceResIdx; | ||||||||
2784 | break; | ||||||||
2785 | case ResourceDemand: | ||||||||
2786 | ResIdx = Cand.Policy.DemandResIdx; | ||||||||
2787 | break; | ||||||||
2788 | case TopDepthReduce: | ||||||||
2789 | Latency = Cand.SU->getDepth(); | ||||||||
2790 | break; | ||||||||
2791 | case TopPathReduce: | ||||||||
2792 | Latency = Cand.SU->getHeight(); | ||||||||
2793 | break; | ||||||||
2794 | case BotHeightReduce: | ||||||||
2795 | Latency = Cand.SU->getHeight(); | ||||||||
2796 | break; | ||||||||
2797 | case BotPathReduce: | ||||||||
2798 | Latency = Cand.SU->getDepth(); | ||||||||
2799 | break; | ||||||||
2800 | } | ||||||||
2801 | dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); | ||||||||
2802 | if (P.isValid()) | ||||||||
2803 | dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) | ||||||||
2804 | << ":" << P.getUnitInc() << " "; | ||||||||
2805 | else | ||||||||
2806 | dbgs() << " "; | ||||||||
2807 | if (ResIdx) | ||||||||
2808 | dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; | ||||||||
2809 | else | ||||||||
2810 | dbgs() << " "; | ||||||||
2811 | if (Latency) | ||||||||
2812 | dbgs() << " " << Latency << " cycles "; | ||||||||
2813 | else | ||||||||
2814 | dbgs() << " "; | ||||||||
2815 | dbgs() << '\n'; | ||||||||
2816 | } | ||||||||
2817 | #endif | ||||||||
2818 | |||||||||
2819 | namespace llvm { | ||||||||
2820 | /// Return true if this heuristic determines order. | ||||||||
2821 | /// TODO: Consider refactor return type of these functions as integer or enum, | ||||||||
2822 | /// as we may need to differentiate whether TryCand is better than Cand. | ||||||||
2823 | bool tryLess(int TryVal, int CandVal, | ||||||||
2824 | GenericSchedulerBase::SchedCandidate &TryCand, | ||||||||
2825 | GenericSchedulerBase::SchedCandidate &Cand, | ||||||||
2826 | GenericSchedulerBase::CandReason Reason) { | ||||||||
2827 | if (TryVal < CandVal) { | ||||||||
2828 | TryCand.Reason = Reason; | ||||||||
2829 | return true; | ||||||||
2830 | } | ||||||||
2831 | if (TryVal > CandVal) { | ||||||||
2832 | if (Cand.Reason > Reason) | ||||||||
2833 | Cand.Reason = Reason; | ||||||||
2834 | return true; | ||||||||
2835 | } | ||||||||
2836 | return false; | ||||||||
2837 | } | ||||||||
2838 | |||||||||
2839 | bool tryGreater(int TryVal, int CandVal, | ||||||||
2840 | GenericSchedulerBase::SchedCandidate &TryCand, | ||||||||
2841 | GenericSchedulerBase::SchedCandidate &Cand, | ||||||||
2842 | GenericSchedulerBase::CandReason Reason) { | ||||||||
2843 | if (TryVal > CandVal) { | ||||||||
2844 | TryCand.Reason = Reason; | ||||||||
2845 | return true; | ||||||||
2846 | } | ||||||||
2847 | if (TryVal < CandVal) { | ||||||||
2848 | if (Cand.Reason > Reason) | ||||||||
2849 | Cand.Reason = Reason; | ||||||||
2850 | return true; | ||||||||
2851 | } | ||||||||
2852 | return false; | ||||||||
2853 | } | ||||||||
2854 | |||||||||
2855 | bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, | ||||||||
2856 | GenericSchedulerBase::SchedCandidate &Cand, | ||||||||
2857 | SchedBoundary &Zone) { | ||||||||
2858 | if (Zone.isTop()) { | ||||||||
2859 | // Prefer the candidate with the lesser depth, but only if one of them has | ||||||||
2860 | // depth greater than the total latency scheduled so far, otherwise either | ||||||||
2861 | // of them could be scheduled now with no stall. | ||||||||
2862 | if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > | ||||||||
2863 | Zone.getScheduledLatency()) { | ||||||||
2864 | if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), | ||||||||
2865 | TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) | ||||||||
2866 | return true; | ||||||||
2867 | } | ||||||||
2868 | if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), | ||||||||
2869 | TryCand, Cand, GenericSchedulerBase::TopPathReduce)) | ||||||||
2870 | return true; | ||||||||
2871 | } else { | ||||||||
2872 | // Prefer the candidate with the lesser height, but only if one of them has | ||||||||
2873 | // height greater than the total latency scheduled so far, otherwise either | ||||||||
2874 | // of them could be scheduled now with no stall. | ||||||||
2875 | if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > | ||||||||
2876 | Zone.getScheduledLatency()) { | ||||||||
2877 | if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), | ||||||||
2878 | TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) | ||||||||
2879 | return true; | ||||||||
2880 | } | ||||||||
2881 | if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), | ||||||||
2882 | TryCand, Cand, GenericSchedulerBase::BotPathReduce)) | ||||||||
2883 | return true; | ||||||||
2884 | } | ||||||||
2885 | return false; | ||||||||
2886 | } | ||||||||
2887 | } // end namespace llvm | ||||||||
2888 | |||||||||
2889 | static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { | ||||||||
2890 | LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")do { } while (false) | ||||||||
2891 | << GenericSchedulerBase::getReasonStr(Reason) << '\n')do { } while (false); | ||||||||
2892 | } | ||||||||
2893 | |||||||||
2894 | static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { | ||||||||
2895 | tracePick(Cand.Reason, Cand.AtTop); | ||||||||
2896 | } | ||||||||
2897 | |||||||||
2898 | void GenericScheduler::initialize(ScheduleDAGMI *dag) { | ||||||||
2899 | assert(dag->hasVRegLiveness() &&((void)0) | ||||||||
2900 | "(PreRA)GenericScheduler needs vreg liveness")((void)0); | ||||||||
2901 | DAG = static_cast<ScheduleDAGMILive*>(dag); | ||||||||
2902 | SchedModel = DAG->getSchedModel(); | ||||||||
2903 | TRI = DAG->TRI; | ||||||||
2904 | |||||||||
2905 | if (RegionPolicy.ComputeDFSResult) | ||||||||
2906 | DAG->computeDFSResult(); | ||||||||
2907 | |||||||||
2908 | Rem.init(DAG, SchedModel); | ||||||||
2909 | Top.init(DAG, SchedModel, &Rem); | ||||||||
2910 | Bot.init(DAG, SchedModel, &Rem); | ||||||||
2911 | |||||||||
2912 | // Initialize resource counts. | ||||||||
2913 | |||||||||
2914 | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or | ||||||||
2915 | // are disabled, then these HazardRecs will be disabled. | ||||||||
2916 | const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); | ||||||||
2917 | if (!Top.HazardRec) { | ||||||||
2918 | Top.HazardRec = | ||||||||
2919 | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( | ||||||||
2920 | Itin, DAG); | ||||||||
2921 | } | ||||||||
2922 | if (!Bot.HazardRec) { | ||||||||
2923 | Bot.HazardRec = | ||||||||
2924 | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( | ||||||||
2925 | Itin, DAG); | ||||||||
2926 | } | ||||||||
2927 | TopCand.SU = nullptr; | ||||||||
2928 | BotCand.SU = nullptr; | ||||||||
2929 | } | ||||||||
2930 | |||||||||
2931 | /// Initialize the per-region scheduling policy. | ||||||||
2932 | void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, | ||||||||
2933 | MachineBasicBlock::iterator End, | ||||||||
2934 | unsigned NumRegionInstrs) { | ||||||||
2935 | const MachineFunction &MF = *Begin->getMF(); | ||||||||
2936 | const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); | ||||||||
2937 | |||||||||
2938 | // Avoid setting up the register pressure tracker for small regions to save | ||||||||
2939 | // compile time. As a rough heuristic, only track pressure when the number of | ||||||||
2940 | // schedulable instructions exceeds half the integer register file. | ||||||||
2941 | RegionPolicy.ShouldTrackPressure = true; | ||||||||
2942 | for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { | ||||||||
2943 | MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; | ||||||||
2944 | if (TLI->isTypeLegal(LegalIntVT)) { | ||||||||
2945 | unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( | ||||||||
2946 | TLI->getRegClassFor(LegalIntVT)); | ||||||||
2947 | RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); | ||||||||
2948 | } | ||||||||
2949 | } | ||||||||
2950 | |||||||||
2951 | // For generic targets, we default to bottom-up, because it's simpler and more | ||||||||
2952 | // compile-time optimizations have been implemented in that direction. | ||||||||
2953 | RegionPolicy.OnlyBottomUp = true; | ||||||||
2954 | |||||||||
2955 | // Allow the subtarget to override default policy. | ||||||||
2956 | MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); | ||||||||
2957 | |||||||||
2958 | // After subtarget overrides, apply command line options. | ||||||||
2959 | if (!EnableRegPressure) { | ||||||||
2960 | RegionPolicy.ShouldTrackPressure = false; | ||||||||
2961 | RegionPolicy.ShouldTrackLaneMasks = false; | ||||||||
2962 | } | ||||||||
2963 | |||||||||
2964 | // Check -misched-topdown/bottomup can force or unforce scheduling direction. | ||||||||
2965 | // e.g. -misched-bottomup=false allows scheduling in both directions. | ||||||||
2966 | assert((!ForceTopDown || !ForceBottomUp) &&((void)0) | ||||||||
2967 | "-misched-topdown incompatible with -misched-bottomup")((void)0); | ||||||||
2968 | if (ForceBottomUp.getNumOccurrences() > 0) { | ||||||||
2969 | RegionPolicy.OnlyBottomUp = ForceBottomUp; | ||||||||
2970 | if (RegionPolicy.OnlyBottomUp) | ||||||||
2971 | RegionPolicy.OnlyTopDown = false; | ||||||||
2972 | } | ||||||||
2973 | if (ForceTopDown.getNumOccurrences() > 0) { | ||||||||
2974 | RegionPolicy.OnlyTopDown = ForceTopDown; | ||||||||
2975 | if (RegionPolicy.OnlyTopDown) | ||||||||
2976 | RegionPolicy.OnlyBottomUp = false; | ||||||||
2977 | } | ||||||||
2978 | } | ||||||||
2979 | |||||||||
2980 | void GenericScheduler::dumpPolicy() const { | ||||||||
2981 | // Cannot completely remove virtual function even in release mode. | ||||||||
2982 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | ||||||||
2983 | dbgs() << "GenericScheduler RegionPolicy: " | ||||||||
2984 | << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure | ||||||||
2985 | << " OnlyTopDown=" << RegionPolicy.OnlyTopDown | ||||||||
2986 | << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp | ||||||||
2987 | << "\n"; | ||||||||
2988 | #endif | ||||||||
2989 | } | ||||||||
2990 | |||||||||
2991 | /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic | ||||||||
2992 | /// critical path by more cycles than it takes to drain the instruction buffer. | ||||||||
2993 | /// We estimate an upper bounds on in-flight instructions as: | ||||||||
2994 | /// | ||||||||
2995 | /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) | ||||||||
2996 | /// InFlightIterations = AcyclicPath / CyclesPerIteration | ||||||||
2997 | /// InFlightResources = InFlightIterations * LoopResources | ||||||||
2998 | /// | ||||||||
2999 | /// TODO: Check execution resources in addition to IssueCount. | ||||||||
3000 | void GenericScheduler::checkAcyclicLatency() { | ||||||||
3001 | if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) | ||||||||
3002 | return; | ||||||||
3003 | |||||||||
3004 | // Scaled number of cycles per loop iteration. | ||||||||
3005 | unsigned IterCount = | ||||||||
3006 | std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), | ||||||||
3007 | Rem.RemIssueCount); | ||||||||
3008 | // Scaled acyclic critical path. | ||||||||
3009 | unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); | ||||||||
3010 | // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop | ||||||||
3011 | unsigned InFlightCount = | ||||||||
3012 | (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; | ||||||||
3013 | unsigned BufferLimit = | ||||||||
3014 | SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); | ||||||||
3015 | |||||||||
3016 | Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; | ||||||||
3017 | |||||||||
3018 | LLVM_DEBUG(do { } while (false) | ||||||||
3019 | dbgs() << "IssueCycles="do { } while (false) | ||||||||
3020 | << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "do { } while (false) | ||||||||
3021 | << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()do { } while (false) | ||||||||
3022 | << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCountdo { } while (false) | ||||||||
3023 | << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()do { } while (false) | ||||||||
3024 | << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";do { } while (false) | ||||||||
3025 | if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n")do { } while (false); | ||||||||
3026 | } | ||||||||
3027 | |||||||||
3028 | void GenericScheduler::registerRoots() { | ||||||||
3029 | Rem.CriticalPath = DAG->ExitSU.getDepth(); | ||||||||
3030 | |||||||||
3031 | // Some roots may not feed into ExitSU. Check all of them in case. | ||||||||
3032 | for (const SUnit *SU : Bot.Available) { | ||||||||
3033 | if (SU->getDepth() > Rem.CriticalPath) | ||||||||
3034 | Rem.CriticalPath = SU->getDepth(); | ||||||||
3035 | } | ||||||||
3036 | LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n')do { } while (false); | ||||||||
3037 | if (DumpCriticalPathLength) { | ||||||||
3038 | errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; | ||||||||
3039 | } | ||||||||
3040 | |||||||||
3041 | if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { | ||||||||
3042 | Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); | ||||||||
3043 | checkAcyclicLatency(); | ||||||||
3044 | } | ||||||||
3045 | } | ||||||||
3046 | |||||||||
3047 | namespace llvm { | ||||||||
3048 | bool tryPressure(const PressureChange &TryP, | ||||||||
3049 | const PressureChange &CandP, | ||||||||
3050 | GenericSchedulerBase::SchedCandidate &TryCand, | ||||||||
3051 | GenericSchedulerBase::SchedCandidate &Cand, | ||||||||
3052 | GenericSchedulerBase::CandReason Reason, | ||||||||
3053 | const TargetRegisterInfo *TRI, | ||||||||
3054 | const MachineFunction &MF) { | ||||||||
3055 | // If one candidate decreases and the other increases, go with it. | ||||||||
3056 | // Invalid candidates have UnitInc==0. | ||||||||
3057 | if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, | ||||||||
3058 | Reason)) { | ||||||||
3059 | return true; | ||||||||
3060 | } | ||||||||
3061 | // Do not compare the magnitude of pressure changes between top and bottom | ||||||||
3062 | // boundary. | ||||||||
3063 | if (Cand.AtTop != TryCand.AtTop) | ||||||||
3064 | return false; | ||||||||
3065 | |||||||||
3066 | // If both candidates affect the same set in the same boundary, go with the | ||||||||
3067 | // smallest increase. | ||||||||
3068 | unsigned TryPSet = TryP.getPSetOrMax(); | ||||||||
3069 | unsigned CandPSet = CandP.getPSetOrMax(); | ||||||||
3070 | if (TryPSet == CandPSet) { | ||||||||
3071 | return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, | ||||||||
3072 | Reason); | ||||||||
3073 | } | ||||||||
3074 | |||||||||
3075 | int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : | ||||||||
3076 | std::numeric_limits<int>::max(); | ||||||||
3077 | |||||||||
3078 | int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : | ||||||||
3079 | std::numeric_limits<int>::max(); | ||||||||
3080 | |||||||||
3081 | // If the candidates are decreasing pressure, reverse priority. | ||||||||
3082 | if (TryP.getUnitInc() < 0) | ||||||||
3083 | std::swap(TryRank, CandRank); | ||||||||
3084 | return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); | ||||||||
3085 | } | ||||||||
3086 | |||||||||
3087 | unsigned getWeakLeft(const SUnit *SU, bool isTop) { | ||||||||
3088 | return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; | ||||||||
3089 | } | ||||||||
3090 | |||||||||
3091 | /// Minimize physical register live ranges. Regalloc wants them adjacent to | ||||||||
3092 | /// their physreg def/use. | ||||||||
3093 | /// | ||||||||
3094 | /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf | ||||||||
3095 | /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled | ||||||||
3096 | /// with the operation that produces or consumes the physreg. We'll do this when | ||||||||
3097 | /// regalloc has support for parallel copies. | ||||||||
3098 | int biasPhysReg(const SUnit *SU, bool isTop) { | ||||||||
3099 | const MachineInstr *MI = SU->getInstr(); | ||||||||
3100 | |||||||||
3101 | if (MI->isCopy()) { | ||||||||
3102 | unsigned ScheduledOper = isTop ? 1 : 0; | ||||||||
3103 | unsigned UnscheduledOper = isTop ? 0 : 1; | ||||||||
3104 | // If we have already scheduled the physreg produce/consumer, immediately | ||||||||
3105 | // schedule the copy. | ||||||||
3106 | if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg())) | ||||||||
3107 | return 1; | ||||||||
3108 | // If the physreg is at the boundary, defer it. Otherwise schedule it | ||||||||
3109 | // immediately to free the dependent. We can hoist the copy later. | ||||||||
3110 | bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; | ||||||||
3111 | if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg())) | ||||||||
3112 | return AtBoundary ? -1 : 1; | ||||||||
3113 | } | ||||||||
3114 | |||||||||
3115 | if (MI->isMoveImmediate()) { | ||||||||
3116 | // If we have a move immediate and all successors have been assigned, bias | ||||||||
3117 | // towards scheduling this later. Make sure all register defs are to | ||||||||
3118 | // physical registers. | ||||||||
3119 | bool DoBias = true; | ||||||||
3120 | for (const MachineOperand &Op : MI->defs()) { | ||||||||
3121 | if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) { | ||||||||
3122 | DoBias = false; | ||||||||
3123 | break; | ||||||||
3124 | } | ||||||||
3125 | } | ||||||||
3126 | |||||||||
3127 | if (DoBias) | ||||||||
3128 | return isTop ? -1 : 1; | ||||||||
3129 | } | ||||||||
3130 | |||||||||
3131 | return 0; | ||||||||
3132 | } | ||||||||
3133 | } // end namespace llvm | ||||||||
3134 | |||||||||
3135 | void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, | ||||||||
3136 | bool AtTop, | ||||||||
3137 | const RegPressureTracker &RPTracker, | ||||||||
3138 | RegPressureTracker &TempTracker) { | ||||||||
3139 | Cand.SU = SU; | ||||||||
3140 | Cand.AtTop = AtTop; | ||||||||
3141 | if (DAG->isTrackingPressure()) { | ||||||||
3142 | if (AtTop) { | ||||||||
3143 | TempTracker.getMaxDownwardPressureDelta( | ||||||||
3144 | Cand.SU->getInstr(), | ||||||||
3145 | Cand.RPDelta, | ||||||||
3146 | DAG->getRegionCriticalPSets(), | ||||||||
3147 | DAG->getRegPressure().MaxSetPressure); | ||||||||
3148 | } else { | ||||||||
3149 | if (VerifyScheduling) { | ||||||||
3150 | TempTracker.getMaxUpwardPressureDelta( | ||||||||
3151 | Cand.SU->getInstr(), | ||||||||
3152 | &DAG->getPressureDiff(Cand.SU), | ||||||||
3153 | Cand.RPDelta, | ||||||||
3154 | DAG->getRegionCriticalPSets(), | ||||||||
3155 | DAG->getRegPressure().MaxSetPressure); | ||||||||
3156 | } else { | ||||||||
3157 | RPTracker.getUpwardPressureDelta( | ||||||||
3158 | Cand.SU->getInstr(), | ||||||||
3159 | DAG->getPressureDiff(Cand.SU), | ||||||||
3160 | Cand.RPDelta, | ||||||||
3161 | DAG->getRegionCriticalPSets(), | ||||||||
3162 | DAG->getRegPressure().MaxSetPressure); | ||||||||
3163 | } | ||||||||
3164 | } | ||||||||
3165 | } | ||||||||
3166 | LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()do { } while (false) | ||||||||
3167 | << " Try SU(" << Cand.SU->NodeNum << ") "do { } while (false) | ||||||||
3168 | << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"do { } while (false) | ||||||||
3169 | << Cand.RPDelta.Excess.getUnitInc() << "\n")do { } while (false); | ||||||||
3170 | } | ||||||||
3171 | |||||||||
3172 | /// Apply a set of heuristics to a new candidate. Heuristics are currently | ||||||||
3173 | /// hierarchical. This may be more efficient than a graduated cost model because | ||||||||
3174 | /// we don't need to evaluate all aspects of the model for each node in the | ||||||||
3175 | /// queue. But it's really done to make the heuristics easier to debug and | ||||||||
3176 | /// statistically analyze. | ||||||||
3177 | /// | ||||||||
3178 | /// \param Cand provides the policy and current best candidate. | ||||||||
3179 | /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. | ||||||||
3180 | /// \param Zone describes the scheduled zone that we are extending, or nullptr | ||||||||
3181 | /// if Cand is from a different zone than TryCand. | ||||||||
3182 | /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) | ||||||||
3183 | bool GenericScheduler::tryCandidate(SchedCandidate &Cand, | ||||||||
3184 | SchedCandidate &TryCand, | ||||||||
3185 | SchedBoundary *Zone) const { | ||||||||
3186 | // Initialize the candidate if needed. | ||||||||
3187 | if (!Cand.isValid()) { | ||||||||
3188 | TryCand.Reason = NodeOrder; | ||||||||
3189 | return true; | ||||||||
3190 | } | ||||||||
3191 | |||||||||
3192 | // Bias PhysReg Defs and copies to their uses and defined respectively. | ||||||||
3193 | if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), | ||||||||
3194 | biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) | ||||||||
3195 | return TryCand.Reason != NoCand; | ||||||||
3196 | |||||||||
3197 | // Avoid exceeding the target's limit. | ||||||||
3198 | if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, | ||||||||
3199 | Cand.RPDelta.Excess, | ||||||||
3200 | TryCand, Cand, RegExcess, TRI, | ||||||||
3201 | DAG->MF)) | ||||||||
3202 | return TryCand.Reason != NoCand; | ||||||||
3203 | |||||||||
3204 | // Avoid increasing the max critical pressure in the scheduled region. | ||||||||
3205 | if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, | ||||||||
3206 | Cand.RPDelta.CriticalMax, | ||||||||
3207 | TryCand, Cand, RegCritical, TRI, | ||||||||
3208 | DAG->MF)) | ||||||||
3209 | return TryCand.Reason != NoCand; | ||||||||
3210 | |||||||||
3211 | // We only compare a subset of features when comparing nodes between | ||||||||
3212 | // Top and Bottom boundary. Some properties are simply incomparable, in many | ||||||||
3213 | // other instances we should only override the other boundary if something | ||||||||
3214 | // is a clear good pick on one boundary. Skip heuristics that are more | ||||||||
3215 | // "tie-breaking" in nature. | ||||||||
3216 | bool SameBoundary = Zone != nullptr; | ||||||||
3217 | if (SameBoundary) { | ||||||||
3218 | // For loops that are acyclic path limited, aggressively schedule for | ||||||||
3219 | // latency. Within an single cycle, whenever CurrMOps > 0, allow normal | ||||||||
3220 | // heuristics to take precedence. | ||||||||
3221 | if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && | ||||||||
3222 | tryLatency(TryCand, Cand, *Zone)) | ||||||||
3223 | return TryCand.Reason != NoCand; | ||||||||
3224 | |||||||||
3225 | // Prioritize instructions that read unbuffered resources by stall cycles. | ||||||||
3226 | if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), | ||||||||
3227 | Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) | ||||||||
3228 | return TryCand.Reason != NoCand; | ||||||||
3229 | } | ||||||||
3230 | |||||||||
3231 | // Keep clustered nodes together to encourage downstream peephole | ||||||||
3232 | // optimizations which may reduce resource requirements. | ||||||||
3233 | // | ||||||||
3234 | // This is a best effort to set things up for a post-RA pass. Optimizations | ||||||||
3235 | // like generating loads of multiple registers should ideally be done within | ||||||||
3236 | // the scheduler pass by combining the loads during DAG postprocessing. | ||||||||
3237 | const SUnit *CandNextClusterSU = | ||||||||
3238 | Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); | ||||||||
3239 | const SUnit *TryCandNextClusterSU = | ||||||||
3240 | TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); | ||||||||
3241 | if (tryGreater(TryCand.SU == TryCandNextClusterSU, | ||||||||
3242 | Cand.SU == CandNextClusterSU, | ||||||||
3243 | TryCand, Cand, Cluster)) | ||||||||
3244 | return TryCand.Reason != NoCand; | ||||||||
3245 | |||||||||
3246 | if (SameBoundary) { | ||||||||
3247 | // Weak edges are for clustering and other constraints. | ||||||||
3248 | if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), | ||||||||
3249 | getWeakLeft(Cand.SU, Cand.AtTop), | ||||||||
3250 | TryCand, Cand, Weak)) | ||||||||
3251 | return TryCand.Reason != NoCand; | ||||||||
3252 | } | ||||||||
3253 | |||||||||
3254 | // Avoid increasing the max pressure of the entire region. | ||||||||
3255 | if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, | ||||||||
3256 | Cand.RPDelta.CurrentMax, | ||||||||
3257 | TryCand, Cand, RegMax, TRI, | ||||||||
3258 | DAG->MF)) | ||||||||
3259 | return TryCand.Reason != NoCand; | ||||||||
3260 | |||||||||
3261 | if (SameBoundary) { | ||||||||
3262 | // Avoid critical resource consumption and balance the schedule. | ||||||||
3263 | TryCand.initResourceDelta(DAG, SchedModel); | ||||||||
3264 | if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, | ||||||||
3265 | TryCand, Cand, ResourceReduce)) | ||||||||
3266 | return TryCand.Reason != NoCand; | ||||||||
3267 | if (tryGreater(TryCand.ResDelta.DemandedResources, | ||||||||
3268 | Cand.ResDelta.DemandedResources, | ||||||||
3269 | TryCand, Cand, ResourceDemand)) | ||||||||
3270 | return TryCand.Reason != NoCand; | ||||||||
3271 | |||||||||
3272 | // Avoid serializing long latency dependence chains. | ||||||||
3273 | // For acyclic path limited loops, latency was already checked above. | ||||||||
3274 | if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && | ||||||||
3275 | !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) | ||||||||
3276 | return TryCand.Reason != NoCand; | ||||||||
3277 | |||||||||
3278 | // Fall through to original instruction order. | ||||||||
3279 | if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) | ||||||||
3280 | || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { | ||||||||
3281 | TryCand.Reason = NodeOrder; | ||||||||
3282 | return true; | ||||||||
3283 | } | ||||||||
3284 | } | ||||||||
3285 | |||||||||
3286 | return false; | ||||||||
3287 | } | ||||||||
3288 | |||||||||
3289 | /// Pick the best candidate from the queue. | ||||||||
3290 | /// | ||||||||
3291 | /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during | ||||||||
3292 | /// DAG building. To adjust for the current scheduling location we need to | ||||||||
3293 | /// maintain the number of vreg uses remaining to be top-scheduled. | ||||||||
3294 | void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, | ||||||||
3295 | const CandPolicy &ZonePolicy, | ||||||||
3296 | const RegPressureTracker &RPTracker, | ||||||||
3297 | SchedCandidate &Cand) { | ||||||||
3298 | // getMaxPressureDelta temporarily modifies the tracker. | ||||||||
3299 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); | ||||||||
3300 | |||||||||
3301 | ReadyQueue &Q = Zone.Available; | ||||||||
3302 | for (SUnit *SU : Q) { | ||||||||
3303 | |||||||||
3304 | SchedCandidate TryCand(ZonePolicy); | ||||||||
3305 | initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); | ||||||||
3306 | // Pass SchedBoundary only when comparing nodes from the same boundary. | ||||||||
3307 | SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; | ||||||||
3308 | if (tryCandidate(Cand, TryCand, ZoneArg)) { | ||||||||
3309 | // Initialize resource delta if needed in case future heuristics query it. | ||||||||
3310 | if (TryCand.ResDelta == SchedResourceDelta()) | ||||||||
3311 | TryCand.initResourceDelta(DAG, SchedModel); | ||||||||
3312 | Cand.setBest(TryCand); | ||||||||
3313 | LLVM_DEBUG(traceCandidate(Cand))do { } while (false); | ||||||||
3314 | } | ||||||||
3315 | } | ||||||||
3316 | } | ||||||||
3317 | |||||||||
3318 | /// Pick the best candidate node from either the top or bottom queue. | ||||||||
3319 | SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { | ||||||||
3320 | // Schedule as far as possible in the direction of no choice. This is most | ||||||||
3321 | // efficient, but also provides the best heuristics for CriticalPSets. | ||||||||
3322 | if (SUnit *SU = Bot.pickOnlyChoice()) { | ||||||||
3323 | IsTopNode = false; | ||||||||
3324 | tracePick(Only1, false); | ||||||||
3325 | return SU; | ||||||||
3326 | } | ||||||||
3327 | if (SUnit *SU = Top.pickOnlyChoice()) { | ||||||||
3328 | IsTopNode = true; | ||||||||
3329 | tracePick(Only1, true); | ||||||||
3330 | return SU; | ||||||||
3331 | } | ||||||||
3332 | // Set the bottom-up policy based on the state of the current bottom zone and | ||||||||
3333 | // the instructions outside the zone, including the top zone. | ||||||||
3334 | CandPolicy BotPolicy; | ||||||||
3335 | setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); | ||||||||
3336 | // Set the top-down policy based on the state of the current top zone and | ||||||||
3337 | // the instructions outside the zone, including the bottom zone. | ||||||||
3338 | CandPolicy TopPolicy; | ||||||||
3339 | setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); | ||||||||
3340 | |||||||||
3341 | // See if BotCand is still valid (because we previously scheduled from Top). | ||||||||
3342 | LLVM_DEBUG(dbgs() << "Picking from Bot:\n")do { } while (false); | ||||||||
3343 | if (!BotCand.isValid() || BotCand.SU->isScheduled || | ||||||||
3344 | BotCand.Policy != BotPolicy) { | ||||||||
3345 | BotCand.reset(CandPolicy()); | ||||||||
3346 | pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); | ||||||||
3347 | assert(BotCand.Reason != NoCand && "failed to find the first candidate")((void)0); | ||||||||
3348 | } else { | ||||||||
3349 | LLVM_DEBUG(traceCandidate(BotCand))do { } while (false); | ||||||||
3350 | #ifndef NDEBUG1 | ||||||||
3351 | if (VerifyScheduling) { | ||||||||
3352 | SchedCandidate TCand; | ||||||||
3353 | TCand.reset(CandPolicy()); | ||||||||
3354 | pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); | ||||||||
3355 | assert(TCand.SU == BotCand.SU &&((void)0) | ||||||||
3356 | "Last pick result should correspond to re-picking right now")((void)0); | ||||||||
3357 | } | ||||||||
3358 | #endif | ||||||||
3359 | } | ||||||||
3360 | |||||||||
3361 | // Check if the top Q has a better candidate. | ||||||||
3362 | LLVM_DEBUG(dbgs() << "Picking from Top:\n")do { } while (false); | ||||||||
3363 | if (!TopCand.isValid() || TopCand.SU->isScheduled || | ||||||||
3364 | TopCand.Policy != TopPolicy) { | ||||||||
3365 | TopCand.reset(CandPolicy()); | ||||||||
3366 | pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); | ||||||||
3367 | assert(TopCand.Reason != NoCand && "failed to find the first candidate")((void)0); | ||||||||
3368 | } else { | ||||||||
3369 | LLVM_DEBUG(traceCandidate(TopCand))do { } while (false); | ||||||||
3370 | #ifndef NDEBUG1 | ||||||||
3371 | if (VerifyScheduling) { | ||||||||
3372 | SchedCandidate TCand; | ||||||||
3373 | TCand.reset(CandPolicy()); | ||||||||
3374 | pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); | ||||||||
3375 | assert(TCand.SU == TopCand.SU &&((void)0) | ||||||||
3376 | "Last pick result should correspond to re-picking right now")((void)0); | ||||||||
3377 | } | ||||||||
3378 | #endif | ||||||||
3379 | } | ||||||||
3380 | |||||||||
3381 | // Pick best from BotCand and TopCand. | ||||||||
3382 | assert(BotCand.isValid())((void)0); | ||||||||
3383 | assert(TopCand.isValid())((void)0); | ||||||||
3384 | SchedCandidate Cand = BotCand; | ||||||||
3385 | TopCand.Reason = NoCand; | ||||||||
3386 | if (tryCandidate(Cand, TopCand, nullptr)) { | ||||||||
3387 | Cand.setBest(TopCand); | ||||||||
3388 | LLVM_DEBUG(traceCandidate(Cand))do { } while (false); | ||||||||
3389 | } | ||||||||
3390 | |||||||||
3391 | IsTopNode = Cand.AtTop; | ||||||||
3392 | tracePick(Cand); | ||||||||
3393 | return Cand.SU; | ||||||||
3394 | } | ||||||||
3395 | |||||||||
3396 | /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. | ||||||||
3397 | SUnit *GenericScheduler::pickNode(bool &IsTopNode) { | ||||||||
3398 | if (DAG->top() == DAG->bottom()) { | ||||||||
3399 | assert(Top.Available.empty() && Top.Pending.empty() &&((void)0) | ||||||||
3400 | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage")((void)0); | ||||||||
3401 | return nullptr; | ||||||||
3402 | } | ||||||||
3403 | SUnit *SU; | ||||||||
3404 | do { | ||||||||
3405 | if (RegionPolicy.OnlyTopDown) { | ||||||||
3406 | SU = Top.pickOnlyChoice(); | ||||||||
3407 | if (!SU) { | ||||||||
3408 | CandPolicy NoPolicy; | ||||||||
3409 | TopCand.reset(NoPolicy); | ||||||||
3410 | pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); | ||||||||
3411 | assert(TopCand.Reason != NoCand && "failed to find a candidate")((void)0); | ||||||||
3412 | tracePick(TopCand); | ||||||||
3413 | SU = TopCand.SU; | ||||||||
3414 | } | ||||||||
3415 | IsTopNode = true; | ||||||||
3416 | } else if (RegionPolicy.OnlyBottomUp) { | ||||||||
3417 | SU = Bot.pickOnlyChoice(); | ||||||||
3418 | if (!SU) { | ||||||||
3419 | CandPolicy NoPolicy; | ||||||||
3420 | BotCand.reset(NoPolicy); | ||||||||
3421 | pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); | ||||||||
3422 | assert(BotCand.Reason != NoCand && "failed to find a candidate")((void)0); | ||||||||
3423 | tracePick(BotCand); | ||||||||
3424 | SU = BotCand.SU; | ||||||||
3425 | } | ||||||||
3426 | IsTopNode = false; | ||||||||
3427 | } else { | ||||||||
3428 | SU = pickNodeBidirectional(IsTopNode); | ||||||||
3429 | } | ||||||||
3430 | } while (SU->isScheduled); | ||||||||
3431 | |||||||||
3432 | if (SU->isTopReady()) | ||||||||
3433 | Top.removeReady(SU); | ||||||||
3434 | if (SU->isBottomReady()) | ||||||||
3435 | Bot.removeReady(SU); | ||||||||
3436 | |||||||||
3437 | LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { } while (false) | ||||||||
3438 | << *SU->getInstr())do { } while (false); | ||||||||
3439 | return SU; | ||||||||
3440 | } | ||||||||
3441 | |||||||||
3442 | void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { | ||||||||
3443 | MachineBasicBlock::iterator InsertPos = SU->getInstr(); | ||||||||
3444 | if (!isTop) | ||||||||
3445 | ++InsertPos; | ||||||||
3446 | SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; | ||||||||
3447 | |||||||||
3448 | // Find already scheduled copies with a single physreg dependence and move | ||||||||
3449 | // them just above the scheduled instruction. | ||||||||
3450 | for (SDep &Dep : Deps) { | ||||||||
3451 | if (Dep.getKind() != SDep::Data || | ||||||||
3452 | !Register::isPhysicalRegister(Dep.getReg())) | ||||||||
3453 | continue; | ||||||||
3454 | SUnit *DepSU = Dep.getSUnit(); | ||||||||
3455 | if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) | ||||||||
3456 | continue; | ||||||||
3457 | MachineInstr *Copy = DepSU->getInstr(); | ||||||||
3458 | if (!Copy->isCopy() && !Copy->isMoveImmediate()) | ||||||||
3459 | continue; | ||||||||
3460 | LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";do { } while (false) | ||||||||
3461 | DAG->dumpNode(*Dep.getSUnit()))do { } while (false); | ||||||||
3462 | DAG->moveInstruction(Copy, InsertPos); | ||||||||
3463 | } | ||||||||
3464 | } | ||||||||
3465 | |||||||||
3466 | /// Update the scheduler's state after scheduling a node. This is the same node | ||||||||
3467 | /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to | ||||||||
3468 | /// update it's state based on the current cycle before MachineSchedStrategy | ||||||||
3469 | /// does. | ||||||||
3470 | /// | ||||||||
3471 | /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling | ||||||||
3472 | /// them here. See comments in biasPhysReg. | ||||||||
3473 | void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { | ||||||||
3474 | if (IsTopNode) { | ||||||||
3475 | SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); | ||||||||
3476 | Top.bumpNode(SU); | ||||||||
3477 | if (SU->hasPhysRegUses) | ||||||||
3478 | reschedulePhysReg(SU, true); | ||||||||
3479 | } else { | ||||||||
3480 | SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); | ||||||||
3481 | Bot.bumpNode(SU); | ||||||||
3482 | if (SU->hasPhysRegDefs) | ||||||||
3483 | reschedulePhysReg(SU, false); | ||||||||
3484 | } | ||||||||
3485 | } | ||||||||
3486 | |||||||||
3487 | /// Create the standard converging machine scheduler. This will be used as the | ||||||||
3488 | /// default scheduler if the target does not set a default. | ||||||||
3489 | ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { | ||||||||
3490 | ScheduleDAGMILive *DAG = | ||||||||
3491 | new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); | ||||||||
3492 | // Register DAG post-processors. | ||||||||
3493 | // | ||||||||
3494 | // FIXME: extend the mutation API to allow earlier mutations to instantiate | ||||||||
3495 | // data and pass it to later mutations. Have a single mutation that gathers | ||||||||
3496 | // the interesting nodes in one pass. | ||||||||
3497 | DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); | ||||||||
3498 | return DAG; | ||||||||
3499 | } | ||||||||
3500 | |||||||||
3501 | static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { | ||||||||
3502 | return createGenericSchedLive(C); | ||||||||
3503 | } | ||||||||
3504 | |||||||||
3505 | static MachineSchedRegistry | ||||||||
3506 | GenericSchedRegistry("converge", "Standard converging scheduler.", | ||||||||
3507 | createConvergingSched); | ||||||||
3508 | |||||||||
3509 | //===----------------------------------------------------------------------===// | ||||||||
3510 | // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. | ||||||||
3511 | //===----------------------------------------------------------------------===// | ||||||||
3512 | |||||||||
3513 | void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { | ||||||||
3514 | DAG = Dag; | ||||||||
3515 | SchedModel = DAG->getSchedModel(); | ||||||||
3516 | TRI = DAG->TRI; | ||||||||
3517 | |||||||||
3518 | Rem.init(DAG, SchedModel); | ||||||||
3519 | Top.init(DAG, SchedModel, &Rem); | ||||||||
3520 | BotRoots.clear(); | ||||||||
3521 | |||||||||
3522 | // Initialize the HazardRecognizers. If itineraries don't exist, are empty, | ||||||||
3523 | // or are disabled, then these HazardRecs will be disabled. | ||||||||
3524 | const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); | ||||||||
3525 | if (!Top.HazardRec) { | ||||||||
3526 | Top.HazardRec = | ||||||||
3527 | DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( | ||||||||
3528 | Itin, DAG); | ||||||||
3529 | } | ||||||||
3530 | } | ||||||||
3531 | |||||||||
3532 | void PostGenericScheduler::registerRoots() { | ||||||||
3533 | Rem.CriticalPath = DAG->ExitSU.getDepth(); | ||||||||
3534 | |||||||||
3535 | // Some roots may not feed into ExitSU. Check all of them in case. | ||||||||
3536 | for (const SUnit *SU : BotRoots) { | ||||||||
3537 | if (SU->getDepth() > Rem.CriticalPath) | ||||||||
3538 | Rem.CriticalPath = SU->getDepth(); | ||||||||
3539 | } | ||||||||
3540 | LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n')do { } while (false); | ||||||||
3541 | if (DumpCriticalPathLength) { | ||||||||
3542 | errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; | ||||||||
3543 | } | ||||||||
3544 | } | ||||||||
3545 | |||||||||
3546 | /// Apply a set of heuristics to a new candidate for PostRA scheduling. | ||||||||
3547 | /// | ||||||||
3548 | /// \param Cand provides the policy and current best candidate. | ||||||||
3549 | /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. | ||||||||
3550 | /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) | ||||||||
3551 | bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, | ||||||||
3552 | SchedCandidate &TryCand) { | ||||||||
3553 | // Initialize the candidate if needed. | ||||||||
3554 | if (!Cand.isValid()) { | ||||||||
3555 | TryCand.Reason = NodeOrder; | ||||||||
3556 | return true; | ||||||||
3557 | } | ||||||||
3558 | |||||||||
3559 | // Prioritize instructions that read unbuffered resources by stall cycles. | ||||||||
3560 | if (tryLess(Top.getLatencyStallCycles(TryCand.SU), | ||||||||
3561 | Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) | ||||||||
3562 | return TryCand.Reason != NoCand; | ||||||||
3563 | |||||||||
3564 | // Keep clustered nodes together. | ||||||||
3565 | if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), | ||||||||
3566 | Cand.SU == DAG->getNextClusterSucc(), | ||||||||
3567 | TryCand, Cand, Cluster)) | ||||||||
3568 | return TryCand.Reason != NoCand; | ||||||||
3569 | |||||||||
3570 | // Avoid critical resource consumption and balance the schedule. | ||||||||
3571 | if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, | ||||||||
3572 | TryCand, Cand, ResourceReduce)) | ||||||||
3573 | return TryCand.Reason != NoCand; | ||||||||
3574 | if (tryGreater(TryCand.ResDelta.DemandedResources, | ||||||||
3575 | Cand.ResDelta.DemandedResources, | ||||||||
3576 | TryCand, Cand, ResourceDemand)) | ||||||||
3577 | return TryCand.Reason != NoCand; | ||||||||
3578 | |||||||||
3579 | // Avoid serializing long latency dependence chains. | ||||||||
3580 | if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { | ||||||||
3581 | return TryCand.Reason != NoCand; | ||||||||
3582 | } | ||||||||
3583 | |||||||||
3584 | // Fall through to original instruction order. | ||||||||
3585 | if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { | ||||||||
3586 | TryCand.Reason = NodeOrder; | ||||||||
3587 | return true; | ||||||||
3588 | } | ||||||||
3589 | |||||||||
3590 | return false; | ||||||||
3591 | } | ||||||||
3592 | |||||||||
3593 | void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { | ||||||||
3594 | ReadyQueue &Q = Top.Available; | ||||||||
3595 | for (SUnit *SU : Q) { | ||||||||
3596 | SchedCandidate TryCand(Cand.Policy); | ||||||||
3597 | TryCand.SU = SU; | ||||||||
3598 | TryCand.AtTop = true; | ||||||||
3599 | TryCand.initResourceDelta(DAG, SchedModel); | ||||||||
3600 | if (tryCandidate(Cand, TryCand)) { | ||||||||
3601 | Cand.setBest(TryCand); | ||||||||
3602 | LLVM_DEBUG(traceCandidate(Cand))do { } while (false); | ||||||||
3603 | } | ||||||||
3604 | } | ||||||||
3605 | } | ||||||||
3606 | |||||||||
3607 | /// Pick the next node to schedule. | ||||||||
3608 | SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { | ||||||||
3609 | if (DAG->top() == DAG->bottom()) { | ||||||||
| |||||||||
3610 | assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage")((void)0); | ||||||||
3611 | return nullptr; | ||||||||
3612 | } | ||||||||
3613 | SUnit *SU; | ||||||||
3614 | do { | ||||||||
3615 | SU = Top.pickOnlyChoice(); | ||||||||
3616 | if (SU
| ||||||||
3617 | tracePick(Only1, true); | ||||||||
3618 | } else { | ||||||||
3619 | CandPolicy NoPolicy; | ||||||||
3620 | SchedCandidate TopCand(NoPolicy); | ||||||||
3621 | // Set the top-down policy based on the state of the current top zone and | ||||||||
3622 | // the instructions outside the zone, including the bottom zone. | ||||||||
3623 | setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); | ||||||||
3624 | pickNodeFromQueue(TopCand); | ||||||||
3625 | assert(TopCand.Reason != NoCand && "failed to find a candidate")((void)0); | ||||||||
3626 | tracePick(TopCand); | ||||||||
3627 | SU = TopCand.SU; | ||||||||
3628 | } | ||||||||
3629 | } while (SU->isScheduled); | ||||||||
| |||||||||
3630 | |||||||||
3631 | IsTopNode = true; | ||||||||
3632 | Top.removeReady(SU); | ||||||||
3633 | |||||||||
3634 | LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { } while (false) | ||||||||
3635 | << *SU->getInstr())do { } while (false); | ||||||||
3636 | return SU; | ||||||||
3637 | } | ||||||||
3638 | |||||||||
3639 | /// Called after ScheduleDAGMI has scheduled an instruction and updated | ||||||||
3640 | /// scheduled/remaining flags in the DAG nodes. | ||||||||
3641 | void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { | ||||||||
3642 | SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); | ||||||||
3643 | Top.bumpNode(SU); | ||||||||
3644 | } | ||||||||
3645 | |||||||||
3646 | ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { | ||||||||
3647 | return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), | ||||||||
3648 | /*RemoveKillFlags=*/true); | ||||||||
3649 | } | ||||||||
3650 | |||||||||
3651 | //===----------------------------------------------------------------------===// | ||||||||
3652 | // ILP Scheduler. Currently for experimental analysis of heuristics. | ||||||||
3653 | //===----------------------------------------------------------------------===// | ||||||||
3654 | |||||||||
3655 | namespace { | ||||||||
3656 | |||||||||
3657 | /// Order nodes by the ILP metric. | ||||||||
3658 | struct ILPOrder { | ||||||||
3659 | const SchedDFSResult *DFSResult = nullptr; | ||||||||
3660 | const BitVector *ScheduledTrees = nullptr; | ||||||||
3661 | bool MaximizeILP; | ||||||||
3662 | |||||||||
3663 | ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} | ||||||||
3664 | |||||||||
3665 | /// Apply a less-than relation on node priority. | ||||||||
3666 | /// | ||||||||
3667 | /// (Return true if A comes after B in the Q.) | ||||||||
3668 | bool operator()(const SUnit *A, const SUnit *B) const { | ||||||||
3669 | unsigned SchedTreeA = DFSResult->getSubtreeID(A); | ||||||||
3670 | unsigned SchedTreeB = DFSResult->getSubtreeID(B); | ||||||||
3671 | if (SchedTreeA != SchedTreeB) { | ||||||||
3672 | // Unscheduled trees have lower priority. | ||||||||
3673 | if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) | ||||||||
3674 | return ScheduledTrees->test(SchedTreeB); | ||||||||
3675 | |||||||||
3676 | // Trees with shallower connections have have lower priority. | ||||||||
3677 | if (DFSResult->getSubtreeLevel(SchedTreeA) | ||||||||
3678 | != DFSResult->getSubtreeLevel(SchedTreeB)) { | ||||||||
3679 | return DFSResult->getSubtreeLevel(SchedTreeA) | ||||||||
3680 | < DFSResult->getSubtreeLevel(SchedTreeB); | ||||||||
3681 | } | ||||||||
3682 | } | ||||||||
3683 | if (MaximizeILP) | ||||||||
3684 | return DFSResult->getILP(A) < DFSResult->getILP(B); | ||||||||
3685 | else | ||||||||
3686 | return DFSResult->getILP(A) > DFSResult->getILP(B); | ||||||||
3687 | } | ||||||||
3688 | }; | ||||||||
3689 | |||||||||
3690 | /// Schedule based on the ILP metric. | ||||||||
3691 | class ILPScheduler : public MachineSchedStrategy { | ||||||||
3692 | ScheduleDAGMILive *DAG = nullptr; | ||||||||
3693 | ILPOrder Cmp; | ||||||||
3694 | |||||||||
3695 | std::vector<SUnit*> ReadyQ; | ||||||||
3696 | |||||||||
3697 | public: | ||||||||
3698 | ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} | ||||||||
3699 | |||||||||
3700 | void initialize(ScheduleDAGMI *dag) override { | ||||||||
3701 | assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness")((void)0); | ||||||||
3702 | DAG = static_cast<ScheduleDAGMILive*>(dag); | ||||||||
3703 | DAG->computeDFSResult(); | ||||||||
3704 | Cmp.DFSResult = DAG->getDFSResult(); | ||||||||
3705 | Cmp.ScheduledTrees = &DAG->getScheduledTrees(); | ||||||||
3706 | ReadyQ.clear(); | ||||||||
3707 | } | ||||||||
3708 | |||||||||
3709 | void registerRoots() override { | ||||||||
3710 | // Restore the heap in ReadyQ with the updated DFS results. | ||||||||
3711 | std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); | ||||||||
3712 | } | ||||||||
3713 | |||||||||
3714 | /// Implement MachineSchedStrategy interface. | ||||||||
3715 | /// ----------------------------------------- | ||||||||
3716 | |||||||||
3717 | /// Callback to select the highest priority node from the ready Q. | ||||||||
3718 | SUnit *pickNode(bool &IsTopNode) override { | ||||||||
3719 | if (ReadyQ.empty()) return nullptr; | ||||||||
3720 | std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); | ||||||||
3721 | SUnit *SU = ReadyQ.back(); | ||||||||
3722 | ReadyQ.pop_back(); | ||||||||
3723 | IsTopNode = false; | ||||||||
3724 | LLVM_DEBUG(dbgs() << "Pick node "do { } while (false) | ||||||||
3725 | << "SU(" << SU->NodeNum << ") "do { } while (false) | ||||||||
3726 | << " ILP: " << DAG->getDFSResult()->getILP(SU)do { } while (false) | ||||||||
3727 | << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)do { } while (false) | ||||||||
3728 | << " @"do { } while (false) | ||||||||
3729 | << DAG->getDFSResult()->getSubtreeLevel(do { } while (false) | ||||||||
3730 | DAG->getDFSResult()->getSubtreeID(SU))do { } while (false) | ||||||||
3731 | << '\n'do { } while (false) | ||||||||
3732 | << "Scheduling " << *SU->getInstr())do { } while (false); | ||||||||
3733 | return SU; | ||||||||
3734 | } | ||||||||
3735 | |||||||||
3736 | /// Scheduler callback to notify that a new subtree is scheduled. | ||||||||
3737 | void scheduleTree(unsigned SubtreeID) override { | ||||||||
3738 | std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); | ||||||||
3739 | } | ||||||||
3740 | |||||||||
3741 | /// Callback after a node is scheduled. Mark a newly scheduled tree, notify | ||||||||
3742 | /// DFSResults, and resort the priority Q. | ||||||||
3743 | void schedNode(SUnit *SU, bool IsTopNode) override { | ||||||||
3744 | assert(!IsTopNode && "SchedDFSResult needs bottom-up")((void)0); | ||||||||
3745 | } | ||||||||
3746 | |||||||||
3747 | void releaseTopNode(SUnit *) override { /*only called for top roots*/ } | ||||||||
3748 | |||||||||
3749 | void releaseBottomNode(SUnit *SU) override { | ||||||||
3750 | ReadyQ.push_back(SU); | ||||||||
3751 | std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); | ||||||||
3752 | } | ||||||||
3753 | }; | ||||||||
3754 | |||||||||
3755 | } // end anonymous namespace | ||||||||
3756 | |||||||||
3757 | static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { | ||||||||
3758 | return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true)); | ||||||||
3759 | } | ||||||||
3760 | static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { | ||||||||
3761 | return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false)); | ||||||||
3762 | } | ||||||||
3763 | |||||||||
3764 | static MachineSchedRegistry ILPMaxRegistry( | ||||||||
3765 | "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); | ||||||||
3766 | static MachineSchedRegistry ILPMinRegistry( | ||||||||
3767 | "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); | ||||||||
3768 | |||||||||
3769 | //===----------------------------------------------------------------------===// | ||||||||
3770 | // Machine Instruction Shuffler for Correctness Testing | ||||||||
3771 | //===----------------------------------------------------------------------===// | ||||||||
3772 | |||||||||
3773 | #ifndef NDEBUG1 | ||||||||
3774 | namespace { | ||||||||
3775 | |||||||||
3776 | /// Apply a less-than relation on the node order, which corresponds to the | ||||||||
3777 | /// instruction order prior to scheduling. IsReverse implements greater-than. | ||||||||
3778 | template<bool IsReverse> | ||||||||
3779 | struct SUnitOrder { | ||||||||
3780 | bool operator()(SUnit *A, SUnit *B) const { | ||||||||
3781 | if (IsReverse) | ||||||||
3782 | return A->NodeNum > B->NodeNum; | ||||||||
3783 | else | ||||||||
3784 | return A->NodeNum < B->NodeNum; | ||||||||
3785 | } | ||||||||
3786 | }; | ||||||||
3787 | |||||||||
3788 | /// Reorder instructions as much as possible. | ||||||||
3789 | class InstructionShuffler : public MachineSchedStrategy { | ||||||||
3790 | bool IsAlternating; | ||||||||
3791 | bool IsTopDown; | ||||||||
3792 | |||||||||
3793 | // Using a less-than relation (SUnitOrder<false>) for the TopQ priority | ||||||||
3794 | // gives nodes with a higher number higher priority causing the latest | ||||||||
3795 | // instructions to be scheduled first. | ||||||||
3796 | PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> | ||||||||
3797 | TopQ; | ||||||||
3798 | |||||||||
3799 | // When scheduling bottom-up, use greater-than as the queue priority. | ||||||||
3800 | PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> | ||||||||
3801 | BottomQ; | ||||||||
3802 | |||||||||
3803 | public: | ||||||||
3804 | InstructionShuffler(bool alternate, bool topdown) | ||||||||
3805 | : IsAlternating(alternate), IsTopDown(topdown) {} | ||||||||
3806 | |||||||||
3807 | void initialize(ScheduleDAGMI*) override { | ||||||||
3808 | TopQ.clear(); | ||||||||
3809 | BottomQ.clear(); | ||||||||
3810 | } | ||||||||
3811 | |||||||||
3812 | /// Implement MachineSchedStrategy interface. | ||||||||
3813 | /// ----------------------------------------- | ||||||||
3814 | |||||||||
3815 | SUnit *pickNode(bool &IsTopNode) override { | ||||||||
3816 | SUnit *SU; | ||||||||
3817 | if (IsTopDown) { | ||||||||
3818 | do { | ||||||||
3819 | if (TopQ.empty()) return nullptr; | ||||||||
3820 | SU = TopQ.top(); | ||||||||
3821 | TopQ.pop(); | ||||||||
3822 | } while (SU->isScheduled); | ||||||||
3823 | IsTopNode = true; | ||||||||
3824 | } else { | ||||||||
3825 | do { | ||||||||
3826 | if (BottomQ.empty()) return nullptr; | ||||||||
3827 | SU = BottomQ.top(); | ||||||||
3828 | BottomQ.pop(); | ||||||||
3829 | } while (SU->isScheduled); | ||||||||
3830 | IsTopNode = false; | ||||||||
3831 | } | ||||||||
3832 | if (IsAlternating) | ||||||||
3833 | IsTopDown = !IsTopDown; | ||||||||
3834 | return SU; | ||||||||
3835 | } | ||||||||
3836 | |||||||||
3837 | void schedNode(SUnit *SU, bool IsTopNode) override {} | ||||||||
3838 | |||||||||
3839 | void releaseTopNode(SUnit *SU) override { | ||||||||
3840 | TopQ.push(SU); | ||||||||
3841 | } | ||||||||
3842 | void releaseBottomNode(SUnit *SU) override { | ||||||||
3843 | BottomQ.push(SU); | ||||||||
3844 | } | ||||||||
3845 | }; | ||||||||
3846 | |||||||||
3847 | } // end anonymous namespace | ||||||||
3848 | |||||||||
3849 | static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { | ||||||||
3850 | bool Alternate = !ForceTopDown && !ForceBottomUp; | ||||||||
3851 | bool TopDown = !ForceBottomUp; | ||||||||
3852 | assert((TopDown || !ForceTopDown) &&((void)0) | ||||||||
3853 | "-misched-topdown incompatible with -misched-bottomup")((void)0); | ||||||||
3854 | return new ScheduleDAGMILive( | ||||||||
3855 | C, std::make_unique<InstructionShuffler>(Alternate, TopDown)); | ||||||||
3856 | } | ||||||||
3857 | |||||||||
3858 | static MachineSchedRegistry ShufflerRegistry( | ||||||||
3859 | "shuffle", "Shuffle machine instructions alternating directions", | ||||||||
3860 | createInstructionShuffler); | ||||||||
3861 | #endif // !NDEBUG | ||||||||
3862 | |||||||||
3863 | //===----------------------------------------------------------------------===// | ||||||||
3864 | // GraphWriter support for ScheduleDAGMILive. | ||||||||
3865 | //===----------------------------------------------------------------------===// | ||||||||
3866 | |||||||||
3867 | #ifndef NDEBUG1 | ||||||||
3868 | namespace llvm { | ||||||||
3869 | |||||||||
3870 | template<> struct GraphTraits< | ||||||||
3871 | ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; | ||||||||
3872 | |||||||||
3873 | template<> | ||||||||
3874 | struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { | ||||||||
3875 | DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} | ||||||||
3876 | |||||||||
3877 | static std::string getGraphName(const ScheduleDAG *G) { | ||||||||
3878 | return std::string(G->MF.getName()); | ||||||||
3879 | } | ||||||||
3880 | |||||||||
3881 | static bool renderGraphFromBottomUp() { | ||||||||
3882 | return true; | ||||||||
3883 | } | ||||||||
3884 | |||||||||
3885 | static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { | ||||||||
3886 | if (ViewMISchedCutoff == 0) | ||||||||
3887 | return false; | ||||||||
3888 | return (Node->Preds.size() > ViewMISchedCutoff | ||||||||
3889 | || Node->Succs.size() > ViewMISchedCutoff); | ||||||||
3890 | } | ||||||||
3891 | |||||||||
3892 | /// If you want to override the dot attributes printed for a particular | ||||||||
3893 | /// edge, override this method. | ||||||||
3894 | static std::string getEdgeAttributes(const SUnit *Node, | ||||||||
3895 | SUnitIterator EI, | ||||||||
3896 | const ScheduleDAG *Graph) { | ||||||||
3897 | if (EI.isArtificialDep()) | ||||||||
3898 | return "color=cyan,style=dashed"; | ||||||||
3899 | if (EI.isCtrlDep()) | ||||||||
3900 | return "color=blue,style=dashed"; | ||||||||
3901 | return ""; | ||||||||
3902 | } | ||||||||
3903 | |||||||||
3904 | static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { | ||||||||
3905 | std::string Str; | ||||||||
3906 | raw_string_ostream SS(Str); | ||||||||
3907 | const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); | ||||||||
3908 | const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? | ||||||||
3909 | static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; | ||||||||
3910 | SS << "SU:" << SU->NodeNum; | ||||||||
3911 | if (DFS) | ||||||||
3912 | SS << " I:" << DFS->getNumInstrs(SU); | ||||||||
3913 | return SS.str(); | ||||||||
3914 | } | ||||||||
3915 | |||||||||
3916 | static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { | ||||||||
3917 | return G->getGraphNodeLabel(SU); | ||||||||
3918 | } | ||||||||
3919 | |||||||||
3920 | static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { | ||||||||
3921 | std::string Str("shape=Mrecord"); | ||||||||
3922 | const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); | ||||||||
3923 | const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? | ||||||||
3924 | static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; | ||||||||
3925 | if (DFS) { | ||||||||
3926 | Str += ",style=filled,fillcolor=\"#"; | ||||||||
3927 | Str += DOT::getColorString(DFS->getSubtreeID(N)); | ||||||||
3928 | Str += '"'; | ||||||||
3929 | } | ||||||||
3930 | return Str; | ||||||||
3931 | } | ||||||||
3932 | }; | ||||||||
3933 | |||||||||
3934 | } // end namespace llvm | ||||||||
3935 | #endif // NDEBUG | ||||||||
3936 | |||||||||
3937 | /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG | ||||||||
3938 | /// rendered using 'dot'. | ||||||||
3939 | void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { | ||||||||
3940 | #ifndef NDEBUG1 | ||||||||
3941 | ViewGraph(this, Name, false, Title); | ||||||||
3942 | #else | ||||||||
3943 | errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " | ||||||||
3944 | << "systems with Graphviz or gv!\n"; | ||||||||
3945 | #endif // NDEBUG | ||||||||
3946 | } | ||||||||
3947 | |||||||||
3948 | /// Out-of-line implementation with no arguments is handy for gdb. | ||||||||
3949 | void ScheduleDAGMI::viewGraph() { | ||||||||
3950 | viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); | ||||||||
3951 | } |
1 | //===- llvm/CodeGen/MachineInstrBundleIterator.h ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Defines an iterator class that bundles MachineInstr. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H |
14 | #define LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H |
15 | |
16 | #include "llvm/ADT/ilist.h" |
17 | #include "llvm/ADT/simple_ilist.h" |
18 | #include <cassert> |
19 | #include <iterator> |
20 | #include <type_traits> |
21 | |
22 | namespace llvm { |
23 | |
24 | template <class T, bool IsReverse> struct MachineInstrBundleIteratorTraits; |
25 | template <class T> struct MachineInstrBundleIteratorTraits<T, false> { |
26 | using list_type = simple_ilist<T, ilist_sentinel_tracking<true>>; |
27 | using instr_iterator = typename list_type::iterator; |
28 | using nonconst_instr_iterator = typename list_type::iterator; |
29 | using const_instr_iterator = typename list_type::const_iterator; |
30 | }; |
31 | template <class T> struct MachineInstrBundleIteratorTraits<T, true> { |
32 | using list_type = simple_ilist<T, ilist_sentinel_tracking<true>>; |
33 | using instr_iterator = typename list_type::reverse_iterator; |
34 | using nonconst_instr_iterator = typename list_type::reverse_iterator; |
35 | using const_instr_iterator = typename list_type::const_reverse_iterator; |
36 | }; |
37 | template <class T> struct MachineInstrBundleIteratorTraits<const T, false> { |
38 | using list_type = simple_ilist<T, ilist_sentinel_tracking<true>>; |
39 | using instr_iterator = typename list_type::const_iterator; |
40 | using nonconst_instr_iterator = typename list_type::iterator; |
41 | using const_instr_iterator = typename list_type::const_iterator; |
42 | }; |
43 | template <class T> struct MachineInstrBundleIteratorTraits<const T, true> { |
44 | using list_type = simple_ilist<T, ilist_sentinel_tracking<true>>; |
45 | using instr_iterator = typename list_type::const_reverse_iterator; |
46 | using nonconst_instr_iterator = typename list_type::reverse_iterator; |
47 | using const_instr_iterator = typename list_type::const_reverse_iterator; |
48 | }; |
49 | |
50 | template <bool IsReverse> struct MachineInstrBundleIteratorHelper; |
51 | template <> struct MachineInstrBundleIteratorHelper<false> { |
52 | /// Get the beginning of the current bundle. |
53 | template <class Iterator> static Iterator getBundleBegin(Iterator I) { |
54 | if (!I.isEnd()) |
55 | while (I->isBundledWithPred()) |
56 | --I; |
57 | return I; |
58 | } |
59 | |
60 | /// Get the final node of the current bundle. |
61 | template <class Iterator> static Iterator getBundleFinal(Iterator I) { |
62 | if (!I.isEnd()) |
63 | while (I->isBundledWithSucc()) |
64 | ++I; |
65 | return I; |
66 | } |
67 | |
68 | /// Increment forward ilist iterator. |
69 | template <class Iterator> static void increment(Iterator &I) { |
70 | I = std::next(getBundleFinal(I)); |
71 | } |
72 | |
73 | /// Decrement forward ilist iterator. |
74 | template <class Iterator> static void decrement(Iterator &I) { |
75 | I = getBundleBegin(std::prev(I)); |
76 | } |
77 | }; |
78 | |
79 | template <> struct MachineInstrBundleIteratorHelper<true> { |
80 | /// Get the beginning of the current bundle. |
81 | template <class Iterator> static Iterator getBundleBegin(Iterator I) { |
82 | return MachineInstrBundleIteratorHelper<false>::getBundleBegin( |
83 | I.getReverse()) |
84 | .getReverse(); |
85 | } |
86 | |
87 | /// Get the final node of the current bundle. |
88 | template <class Iterator> static Iterator getBundleFinal(Iterator I) { |
89 | return MachineInstrBundleIteratorHelper<false>::getBundleFinal( |
90 | I.getReverse()) |
91 | .getReverse(); |
92 | } |
93 | |
94 | /// Increment reverse ilist iterator. |
95 | template <class Iterator> static void increment(Iterator &I) { |
96 | I = getBundleBegin(std::next(I)); |
97 | } |
98 | |
99 | /// Decrement reverse ilist iterator. |
100 | template <class Iterator> static void decrement(Iterator &I) { |
101 | I = std::prev(getBundleFinal(I)); |
102 | } |
103 | }; |
104 | |
105 | /// MachineBasicBlock iterator that automatically skips over MIs that are |
106 | /// inside bundles (i.e. walk top level MIs only). |
107 | template <typename Ty, bool IsReverse = false> |
108 | class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper<IsReverse> { |
109 | using Traits = MachineInstrBundleIteratorTraits<Ty, IsReverse>; |
110 | using instr_iterator = typename Traits::instr_iterator; |
111 | |
112 | instr_iterator MII; |
113 | |
114 | public: |
115 | using value_type = typename instr_iterator::value_type; |
116 | using difference_type = typename instr_iterator::difference_type; |
117 | using pointer = typename instr_iterator::pointer; |
118 | using reference = typename instr_iterator::reference; |
119 | using const_pointer = typename instr_iterator::const_pointer; |
120 | using const_reference = typename instr_iterator::const_reference; |
121 | using iterator_category = std::bidirectional_iterator_tag; |
122 | |
123 | private: |
124 | using nonconst_instr_iterator = typename Traits::nonconst_instr_iterator; |
125 | using const_instr_iterator = typename Traits::const_instr_iterator; |
126 | using nonconst_iterator = |
127 | MachineInstrBundleIterator<typename nonconst_instr_iterator::value_type, |
128 | IsReverse>; |
129 | using reverse_iterator = MachineInstrBundleIterator<Ty, !IsReverse>; |
130 | |
131 | public: |
132 | MachineInstrBundleIterator(instr_iterator MI) : MII(MI) { |
133 | assert((!MI.getNodePtr() || MI.isEnd() || !MI->isBundledWithPred()) &&((void)0) |
134 | "It's not legal to initialize MachineInstrBundleIterator with a "((void)0) |
135 | "bundled MI")((void)0); |
136 | } |
137 | |
138 | MachineInstrBundleIterator(reference MI) : MII(MI) { |
139 | assert(!MI.isBundledWithPred() && "It's not legal to initialize "((void)0) |
140 | "MachineInstrBundleIterator with a "((void)0) |
141 | "bundled MI")((void)0); |
142 | } |
143 | |
144 | MachineInstrBundleIterator(pointer MI) : MII(MI) { |
145 | // FIXME: This conversion should be explicit. |
146 | assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize "((void)0) |
147 | "MachineInstrBundleIterator "((void)0) |
148 | "with a bundled MI")((void)0); |
149 | } |
150 | |
151 | // Template allows conversion from const to nonconst. |
152 | template <class OtherTy> |
153 | MachineInstrBundleIterator( |
154 | const MachineInstrBundleIterator<OtherTy, IsReverse> &I, |
155 | std::enable_if_t<std::is_convertible<OtherTy *, Ty *>::value, void *> = |
156 | nullptr) |
157 | : MII(I.getInstrIterator()) {} |
158 | |
159 | MachineInstrBundleIterator() : MII(nullptr) {} |
160 | |
161 | /// Explicit conversion between forward/reverse iterators. |
162 | /// |
163 | /// Translate between forward and reverse iterators without changing range |
164 | /// boundaries. The resulting iterator will dereference (and have a handle) |
165 | /// to the previous node, which is somewhat unexpected; but converting the |
166 | /// two endpoints in a range will give the same range in reverse. |
167 | /// |
168 | /// This matches std::reverse_iterator conversions. |
169 | explicit MachineInstrBundleIterator( |
170 | const MachineInstrBundleIterator<Ty, !IsReverse> &I) |
171 | : MachineInstrBundleIterator(++I.getReverse()) {} |
172 | |
173 | /// Get the bundle iterator for the given instruction's bundle. |
174 | static MachineInstrBundleIterator getAtBundleBegin(instr_iterator MI) { |
175 | return MachineInstrBundleIteratorHelper<IsReverse>::getBundleBegin(MI); |
176 | } |
177 | |
178 | reference operator*() const { return *MII; } |
179 | pointer operator->() const { return &operator*(); } |
180 | |
181 | /// Check for null. |
182 | bool isValid() const { return MII.getNodePtr(); } |
183 | |
184 | friend bool operator==(const MachineInstrBundleIterator &L, |
185 | const MachineInstrBundleIterator &R) { |
186 | return L.MII == R.MII; |
187 | } |
188 | friend bool operator==(const MachineInstrBundleIterator &L, |
189 | const const_instr_iterator &R) { |
190 | return L.MII == R; // Avoid assertion about validity of R. |
191 | } |
192 | friend bool operator==(const const_instr_iterator &L, |
193 | const MachineInstrBundleIterator &R) { |
194 | return L == R.MII; // Avoid assertion about validity of L. |
195 | } |
196 | friend bool operator==(const MachineInstrBundleIterator &L, |
197 | const nonconst_instr_iterator &R) { |
198 | return L.MII == R; // Avoid assertion about validity of R. |
199 | } |
200 | friend bool operator==(const nonconst_instr_iterator &L, |
201 | const MachineInstrBundleIterator &R) { |
202 | return L == R.MII; // Avoid assertion about validity of L. |
203 | } |
204 | friend bool operator==(const MachineInstrBundleIterator &L, const_pointer R) { |
205 | return L == const_instr_iterator(R); // Avoid assertion about validity of R. |
206 | } |
207 | friend bool operator==(const_pointer L, const MachineInstrBundleIterator &R) { |
208 | return const_instr_iterator(L) == R; // Avoid assertion about validity of L. |
209 | } |
210 | friend bool operator==(const MachineInstrBundleIterator &L, |
211 | const_reference R) { |
212 | return L == &R; // Avoid assertion about validity of R. |
213 | } |
214 | friend bool operator==(const_reference L, |
215 | const MachineInstrBundleIterator &R) { |
216 | return &L == R; // Avoid assertion about validity of L. |
217 | } |
218 | |
219 | friend bool operator!=(const MachineInstrBundleIterator &L, |
220 | const MachineInstrBundleIterator &R) { |
221 | return !(L == R); |
222 | } |
223 | friend bool operator!=(const MachineInstrBundleIterator &L, |
224 | const const_instr_iterator &R) { |
225 | return !(L == R); |
226 | } |
227 | friend bool operator!=(const const_instr_iterator &L, |
228 | const MachineInstrBundleIterator &R) { |
229 | return !(L == R); |
230 | } |
231 | friend bool operator!=(const MachineInstrBundleIterator &L, |
232 | const nonconst_instr_iterator &R) { |
233 | return !(L == R); |
234 | } |
235 | friend bool operator!=(const nonconst_instr_iterator &L, |
236 | const MachineInstrBundleIterator &R) { |
237 | return !(L == R); |
238 | } |
239 | friend bool operator!=(const MachineInstrBundleIterator &L, const_pointer R) { |
240 | return !(L == R); |
241 | } |
242 | friend bool operator!=(const_pointer L, const MachineInstrBundleIterator &R) { |
243 | return !(L == R); |
244 | } |
245 | friend bool operator!=(const MachineInstrBundleIterator &L, |
246 | const_reference R) { |
247 | return !(L == R); |
248 | } |
249 | friend bool operator!=(const_reference L, |
250 | const MachineInstrBundleIterator &R) { |
251 | return !(L == R); |
252 | } |
253 | |
254 | // Increment and decrement operators... |
255 | MachineInstrBundleIterator &operator--() { |
256 | this->decrement(MII); |
257 | return *this; |
258 | } |
259 | MachineInstrBundleIterator &operator++() { |
260 | this->increment(MII); |
261 | return *this; |
262 | } |
263 | MachineInstrBundleIterator operator--(int) { |
264 | MachineInstrBundleIterator Temp = *this; |
265 | --*this; |
266 | return Temp; |
267 | } |
268 | MachineInstrBundleIterator operator++(int) { |
269 | MachineInstrBundleIterator Temp = *this; |
270 | ++*this; |
271 | return Temp; |
272 | } |
273 | |
274 | instr_iterator getInstrIterator() const { return MII; } |
275 | |
276 | nonconst_iterator getNonConstIterator() const { return MII.getNonConst(); } |
277 | |
278 | /// Get a reverse iterator to the same node. |
279 | /// |
280 | /// Gives a reverse iterator that will dereference (and have a handle) to the |
281 | /// same node. Converting the endpoint iterators in a range will give a |
282 | /// different range; for range operations, use the explicit conversions. |
283 | reverse_iterator getReverse() const { return MII.getReverse(); } |
284 | }; |
285 | |
286 | } // end namespace llvm |
287 | |
288 | #endif // LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H |
1 | //===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_ADT_ILIST_ITERATOR_H |
10 | #define LLVM_ADT_ILIST_ITERATOR_H |
11 | |
12 | #include "llvm/ADT/ilist_node.h" |
13 | #include <cassert> |
14 | #include <cstddef> |
15 | #include <iterator> |
16 | #include <type_traits> |
17 | |
18 | namespace llvm { |
19 | |
20 | namespace ilist_detail { |
21 | |
22 | /// Find const-correct node types. |
23 | template <class OptionsT, bool IsConst> struct IteratorTraits; |
24 | template <class OptionsT> struct IteratorTraits<OptionsT, false> { |
25 | using value_type = typename OptionsT::value_type; |
26 | using pointer = typename OptionsT::pointer; |
27 | using reference = typename OptionsT::reference; |
28 | using node_pointer = ilist_node_impl<OptionsT> *; |
29 | using node_reference = ilist_node_impl<OptionsT> &; |
30 | }; |
31 | template <class OptionsT> struct IteratorTraits<OptionsT, true> { |
32 | using value_type = const typename OptionsT::value_type; |
33 | using pointer = typename OptionsT::const_pointer; |
34 | using reference = typename OptionsT::const_reference; |
35 | using node_pointer = const ilist_node_impl<OptionsT> *; |
36 | using node_reference = const ilist_node_impl<OptionsT> &; |
37 | }; |
38 | |
39 | template <bool IsReverse> struct IteratorHelper; |
40 | template <> struct IteratorHelper<false> : ilist_detail::NodeAccess { |
41 | using Access = ilist_detail::NodeAccess; |
42 | |
43 | template <class T> static void increment(T *&I) { I = Access::getNext(*I); } |
44 | template <class T> static void decrement(T *&I) { I = Access::getPrev(*I); } |
45 | }; |
46 | template <> struct IteratorHelper<true> : ilist_detail::NodeAccess { |
47 | using Access = ilist_detail::NodeAccess; |
48 | |
49 | template <class T> static void increment(T *&I) { I = Access::getPrev(*I); } |
50 | template <class T> static void decrement(T *&I) { I = Access::getNext(*I); } |
51 | }; |
52 | |
53 | } // end namespace ilist_detail |
54 | |
55 | /// Iterator for intrusive lists based on ilist_node. |
56 | template <class OptionsT, bool IsReverse, bool IsConst> |
57 | class ilist_iterator : ilist_detail::SpecificNodeAccess<OptionsT> { |
58 | friend ilist_iterator<OptionsT, IsReverse, !IsConst>; |
59 | friend ilist_iterator<OptionsT, !IsReverse, IsConst>; |
60 | friend ilist_iterator<OptionsT, !IsReverse, !IsConst>; |
61 | |
62 | using Traits = ilist_detail::IteratorTraits<OptionsT, IsConst>; |
63 | using Access = ilist_detail::SpecificNodeAccess<OptionsT>; |
64 | |
65 | public: |
66 | using value_type = typename Traits::value_type; |
67 | using pointer = typename Traits::pointer; |
68 | using reference = typename Traits::reference; |
69 | using difference_type = ptrdiff_t; |
70 | using iterator_category = std::bidirectional_iterator_tag; |
71 | using const_pointer = typename OptionsT::const_pointer; |
72 | using const_reference = typename OptionsT::const_reference; |
73 | |
74 | private: |
75 | using node_pointer = typename Traits::node_pointer; |
76 | using node_reference = typename Traits::node_reference; |
77 | |
78 | node_pointer NodePtr = nullptr; |
79 | |
80 | public: |
81 | /// Create from an ilist_node. |
82 | explicit ilist_iterator(node_reference N) : NodePtr(&N) {} |
83 | |
84 | explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {} |
85 | explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {} |
86 | ilist_iterator() = default; |
87 | |
88 | // This is templated so that we can allow constructing a const iterator from |
89 | // a nonconst iterator... |
90 | template <bool RHSIsConst> |
91 | ilist_iterator(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS, |
92 | std::enable_if_t<IsConst || !RHSIsConst, void *> = nullptr) |
93 | : NodePtr(RHS.NodePtr) {} |
94 | |
95 | // This is templated so that we can allow assigning to a const iterator from |
96 | // a nonconst iterator... |
97 | template <bool RHSIsConst> |
98 | std::enable_if_t<IsConst || !RHSIsConst, ilist_iterator &> |
99 | operator=(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS) { |
100 | NodePtr = RHS.NodePtr; |
101 | return *this; |
102 | } |
103 | |
104 | /// Explicit conversion between forward/reverse iterators. |
105 | /// |
106 | /// Translate between forward and reverse iterators without changing range |
107 | /// boundaries. The resulting iterator will dereference (and have a handle) |
108 | /// to the previous node, which is somewhat unexpected; but converting the |
109 | /// two endpoints in a range will give the same range in reverse. |
110 | /// |
111 | /// This matches std::reverse_iterator conversions. |
112 | explicit ilist_iterator( |
113 | const ilist_iterator<OptionsT, !IsReverse, IsConst> &RHS) |
114 | : ilist_iterator(++RHS.getReverse()) {} |
115 | |
116 | /// Get a reverse iterator to the same node. |
117 | /// |
118 | /// Gives a reverse iterator that will dereference (and have a handle) to the |
119 | /// same node. Converting the endpoint iterators in a range will give a |
120 | /// different range; for range operations, use the explicit conversions. |
121 | ilist_iterator<OptionsT, !IsReverse, IsConst> getReverse() const { |
122 | if (NodePtr) |
123 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(*NodePtr); |
124 | return ilist_iterator<OptionsT, !IsReverse, IsConst>(); |
125 | } |
126 | |
127 | /// Const-cast. |
128 | ilist_iterator<OptionsT, IsReverse, false> getNonConst() const { |
129 | if (NodePtr) |
130 | return ilist_iterator<OptionsT, IsReverse, false>( |
131 | const_cast<typename ilist_iterator<OptionsT, IsReverse, |
132 | false>::node_reference>(*NodePtr)); |
133 | return ilist_iterator<OptionsT, IsReverse, false>(); |
134 | } |
135 | |
136 | // Accessors... |
137 | reference operator*() const { |
138 | assert(!NodePtr->isKnownSentinel())((void)0); |
139 | return *Access::getValuePtr(NodePtr); |
140 | } |
141 | pointer operator->() const { return &operator*(); } |
142 | |
143 | // Comparison operators |
144 | friend bool operator==(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
145 | return LHS.NodePtr == RHS.NodePtr; |
146 | } |
147 | friend bool operator!=(const ilist_iterator &LHS, const ilist_iterator &RHS) { |
148 | return LHS.NodePtr != RHS.NodePtr; |
149 | } |
150 | |
151 | // Increment and decrement operators... |
152 | ilist_iterator &operator--() { |
153 | NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev(); |
154 | return *this; |
155 | } |
156 | ilist_iterator &operator++() { |
157 | NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext(); |
158 | return *this; |
159 | } |
160 | ilist_iterator operator--(int) { |
161 | ilist_iterator tmp = *this; |
162 | --*this; |
163 | return tmp; |
164 | } |
165 | ilist_iterator operator++(int) { |
166 | ilist_iterator tmp = *this; |
167 | ++*this; |
168 | return tmp; |
169 | } |
170 | |
171 | /// Get the underlying ilist_node. |
172 | node_pointer getNodePtr() const { return static_cast<node_pointer>(NodePtr); } |
173 | |
174 | /// Check for end. Only valid if ilist_sentinel_tracking<true>. |
175 | bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; } |
176 | }; |
177 | |
178 | template <typename From> struct simplify_type; |
179 | |
180 | /// Allow ilist_iterators to convert into pointers to a node automatically when |
181 | /// used by the dyn_cast, cast, isa mechanisms... |
182 | /// |
183 | /// FIXME: remove this, since there is no implicit conversion to NodeTy. |
184 | template <class OptionsT, bool IsConst> |
185 | struct simplify_type<ilist_iterator<OptionsT, false, IsConst>> { |
186 | using iterator = ilist_iterator<OptionsT, false, IsConst>; |
187 | using SimpleType = typename iterator::pointer; |
188 | |
189 | static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } |
190 | }; |
191 | template <class OptionsT, bool IsConst> |
192 | struct simplify_type<const ilist_iterator<OptionsT, false, IsConst>> |
193 | : simplify_type<ilist_iterator<OptionsT, false, IsConst>> {}; |
194 | |
195 | } // end namespace llvm |
196 | |
197 | #endif // LLVM_ADT_ILIST_ITERATOR_H |
1 | //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides an interface for customizing the standard MachineScheduler |
10 | // pass. Note that the entire pass may be replaced as follows: |
11 | // |
12 | // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { |
13 | // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); |
14 | // ...} |
15 | // |
16 | // The MachineScheduler pass is only responsible for choosing the regions to be |
17 | // scheduled. Targets can override the DAG builder and scheduler without |
18 | // replacing the pass as follows: |
19 | // |
20 | // ScheduleDAGInstrs *<Target>PassConfig:: |
21 | // createMachineScheduler(MachineSchedContext *C) { |
22 | // return new CustomMachineScheduler(C); |
23 | // } |
24 | // |
25 | // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list |
26 | // scheduling while updating the instruction stream, register pressure, and live |
27 | // intervals. Most targets don't need to override the DAG builder and list |
28 | // scheduler, but subtargets that require custom scheduling heuristics may |
29 | // plugin an alternate MachineSchedStrategy. The strategy is responsible for |
30 | // selecting the highest priority node from the list: |
31 | // |
32 | // ScheduleDAGInstrs *<Target>PassConfig:: |
33 | // createMachineScheduler(MachineSchedContext *C) { |
34 | // return new ScheduleDAGMILive(C, CustomStrategy(C)); |
35 | // } |
36 | // |
37 | // The DAG builder can also be customized in a sense by adding DAG mutations |
38 | // that will run after DAG building and before list scheduling. DAG mutations |
39 | // can adjust dependencies based on target-specific knowledge or add weak edges |
40 | // to aid heuristics: |
41 | // |
42 | // ScheduleDAGInstrs *<Target>PassConfig:: |
43 | // createMachineScheduler(MachineSchedContext *C) { |
44 | // ScheduleDAGMI *DAG = createGenericSchedLive(C); |
45 | // DAG->addMutation(new CustomDAGMutation(...)); |
46 | // return DAG; |
47 | // } |
48 | // |
49 | // A target that supports alternative schedulers can use the |
50 | // MachineSchedRegistry to allow command line selection. This can be done by |
51 | // implementing the following boilerplate: |
52 | // |
53 | // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { |
54 | // return new CustomMachineScheduler(C); |
55 | // } |
56 | // static MachineSchedRegistry |
57 | // SchedCustomRegistry("custom", "Run my target's custom scheduler", |
58 | // createCustomMachineSched); |
59 | // |
60 | // |
61 | // Finally, subtargets that don't need to implement custom heuristics but would |
62 | // like to configure the GenericScheduler's policy for a given scheduler region, |
63 | // including scheduling direction and register pressure tracking policy, can do |
64 | // this: |
65 | // |
66 | // void <SubTarget>Subtarget:: |
67 | // overrideSchedPolicy(MachineSchedPolicy &Policy, |
68 | // unsigned NumRegionInstrs) const { |
69 | // Policy.<Flag> = true; |
70 | // } |
71 | // |
72 | //===----------------------------------------------------------------------===// |
73 | |
74 | #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H |
75 | #define LLVM_CODEGEN_MACHINESCHEDULER_H |
76 | |
77 | #include "llvm/ADT/APInt.h" |
78 | #include "llvm/ADT/ArrayRef.h" |
79 | #include "llvm/ADT/BitVector.h" |
80 | #include "llvm/ADT/STLExtras.h" |
81 | #include "llvm/ADT/SmallVector.h" |
82 | #include "llvm/ADT/StringRef.h" |
83 | #include "llvm/ADT/Twine.h" |
84 | #include "llvm/CodeGen/MachineBasicBlock.h" |
85 | #include "llvm/CodeGen/MachinePassRegistry.h" |
86 | #include "llvm/CodeGen/RegisterPressure.h" |
87 | #include "llvm/CodeGen/ScheduleDAG.h" |
88 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" |
89 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
90 | #include "llvm/CodeGen/TargetSchedule.h" |
91 | #include "llvm/Support/CommandLine.h" |
92 | #include "llvm/Support/ErrorHandling.h" |
93 | #include <algorithm> |
94 | #include <cassert> |
95 | #include <memory> |
96 | #include <string> |
97 | #include <vector> |
98 | |
99 | namespace llvm { |
100 | |
101 | extern cl::opt<bool> ForceTopDown; |
102 | extern cl::opt<bool> ForceBottomUp; |
103 | extern cl::opt<bool> VerifyScheduling; |
104 | |
105 | class AAResults; |
106 | class LiveIntervals; |
107 | class MachineDominatorTree; |
108 | class MachineFunction; |
109 | class MachineInstr; |
110 | class MachineLoopInfo; |
111 | class RegisterClassInfo; |
112 | class SchedDFSResult; |
113 | class ScheduleHazardRecognizer; |
114 | class TargetInstrInfo; |
115 | class TargetPassConfig; |
116 | class TargetRegisterInfo; |
117 | |
118 | /// MachineSchedContext provides enough context from the MachineScheduler pass |
119 | /// for the target to instantiate a scheduler. |
120 | struct MachineSchedContext { |
121 | MachineFunction *MF = nullptr; |
122 | const MachineLoopInfo *MLI = nullptr; |
123 | const MachineDominatorTree *MDT = nullptr; |
124 | const TargetPassConfig *PassConfig = nullptr; |
125 | AAResults *AA = nullptr; |
126 | LiveIntervals *LIS = nullptr; |
127 | |
128 | RegisterClassInfo *RegClassInfo; |
129 | |
130 | MachineSchedContext(); |
131 | virtual ~MachineSchedContext(); |
132 | }; |
133 | |
134 | /// MachineSchedRegistry provides a selection of available machine instruction |
135 | /// schedulers. |
136 | class MachineSchedRegistry |
137 | : public MachinePassRegistryNode< |
138 | ScheduleDAGInstrs *(*)(MachineSchedContext *)> { |
139 | public: |
140 | using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); |
141 | |
142 | // RegisterPassParser requires a (misnamed) FunctionPassCtor type. |
143 | using FunctionPassCtor = ScheduleDAGCtor; |
144 | |
145 | static MachinePassRegistry<ScheduleDAGCtor> Registry; |
146 | |
147 | MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) |
148 | : MachinePassRegistryNode(N, D, C) { |
149 | Registry.Add(this); |
150 | } |
151 | |
152 | ~MachineSchedRegistry() { Registry.Remove(this); } |
153 | |
154 | // Accessors. |
155 | // |
156 | MachineSchedRegistry *getNext() const { |
157 | return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); |
158 | } |
159 | |
160 | static MachineSchedRegistry *getList() { |
161 | return (MachineSchedRegistry *)Registry.getList(); |
162 | } |
163 | |
164 | static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { |
165 | Registry.setListener(L); |
166 | } |
167 | }; |
168 | |
169 | class ScheduleDAGMI; |
170 | |
171 | /// Define a generic scheduling policy for targets that don't provide their own |
172 | /// MachineSchedStrategy. This can be overriden for each scheduling region |
173 | /// before building the DAG. |
174 | struct MachineSchedPolicy { |
175 | // Allow the scheduler to disable register pressure tracking. |
176 | bool ShouldTrackPressure = false; |
177 | /// Track LaneMasks to allow reordering of independent subregister writes |
178 | /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() |
179 | bool ShouldTrackLaneMasks = false; |
180 | |
181 | // Allow the scheduler to force top-down or bottom-up scheduling. If neither |
182 | // is true, the scheduler runs in both directions and converges. |
183 | bool OnlyTopDown = false; |
184 | bool OnlyBottomUp = false; |
185 | |
186 | // Disable heuristic that tries to fetch nodes from long dependency chains |
187 | // first. |
188 | bool DisableLatencyHeuristic = false; |
189 | |
190 | // Compute DFSResult for use in scheduling heuristics. |
191 | bool ComputeDFSResult = false; |
192 | |
193 | MachineSchedPolicy() = default; |
194 | }; |
195 | |
196 | /// MachineSchedStrategy - Interface to the scheduling algorithm used by |
197 | /// ScheduleDAGMI. |
198 | /// |
199 | /// Initialization sequence: |
200 | /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots |
201 | class MachineSchedStrategy { |
202 | virtual void anchor(); |
203 | |
204 | public: |
205 | virtual ~MachineSchedStrategy() = default; |
206 | |
207 | /// Optionally override the per-region scheduling policy. |
208 | virtual void initPolicy(MachineBasicBlock::iterator Begin, |
209 | MachineBasicBlock::iterator End, |
210 | unsigned NumRegionInstrs) {} |
211 | |
212 | virtual void dumpPolicy() const {} |
213 | |
214 | /// Check if pressure tracking is needed before building the DAG and |
215 | /// initializing this strategy. Called after initPolicy. |
216 | virtual bool shouldTrackPressure() const { return true; } |
217 | |
218 | /// Returns true if lanemasks should be tracked. LaneMask tracking is |
219 | /// necessary to reorder independent subregister defs for the same vreg. |
220 | /// This has to be enabled in combination with shouldTrackPressure(). |
221 | virtual bool shouldTrackLaneMasks() const { return false; } |
222 | |
223 | // If this method returns true, handling of the scheduling regions |
224 | // themselves (in case of a scheduling boundary in MBB) will be done |
225 | // beginning with the topmost region of MBB. |
226 | virtual bool doMBBSchedRegionsTopDown() const { return false; } |
227 | |
228 | /// Initialize the strategy after building the DAG for a new region. |
229 | virtual void initialize(ScheduleDAGMI *DAG) = 0; |
230 | |
231 | /// Tell the strategy that MBB is about to be processed. |
232 | virtual void enterMBB(MachineBasicBlock *MBB) {}; |
233 | |
234 | /// Tell the strategy that current MBB is done. |
235 | virtual void leaveMBB() {}; |
236 | |
237 | /// Notify this strategy that all roots have been released (including those |
238 | /// that depend on EntrySU or ExitSU). |
239 | virtual void registerRoots() {} |
240 | |
241 | /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to |
242 | /// schedule the node at the top of the unscheduled region. Otherwise it will |
243 | /// be scheduled at the bottom. |
244 | virtual SUnit *pickNode(bool &IsTopNode) = 0; |
245 | |
246 | /// Scheduler callback to notify that a new subtree is scheduled. |
247 | virtual void scheduleTree(unsigned SubtreeID) {} |
248 | |
249 | /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an |
250 | /// instruction and updated scheduled/remaining flags in the DAG nodes. |
251 | virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; |
252 | |
253 | /// When all predecessor dependencies have been resolved, free this node for |
254 | /// top-down scheduling. |
255 | virtual void releaseTopNode(SUnit *SU) = 0; |
256 | |
257 | /// When all successor dependencies have been resolved, free this node for |
258 | /// bottom-up scheduling. |
259 | virtual void releaseBottomNode(SUnit *SU) = 0; |
260 | }; |
261 | |
262 | /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply |
263 | /// schedules machine instructions according to the given MachineSchedStrategy |
264 | /// without much extra book-keeping. This is the common functionality between |
265 | /// PreRA and PostRA MachineScheduler. |
266 | class ScheduleDAGMI : public ScheduleDAGInstrs { |
267 | protected: |
268 | AAResults *AA; |
269 | LiveIntervals *LIS; |
270 | std::unique_ptr<MachineSchedStrategy> SchedImpl; |
271 | |
272 | /// Ordered list of DAG postprocessing steps. |
273 | std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; |
274 | |
275 | /// The top of the unscheduled zone. |
276 | MachineBasicBlock::iterator CurrentTop; |
277 | |
278 | /// The bottom of the unscheduled zone. |
279 | MachineBasicBlock::iterator CurrentBottom; |
280 | |
281 | /// Record the next node in a scheduled cluster. |
282 | const SUnit *NextClusterPred = nullptr; |
283 | const SUnit *NextClusterSucc = nullptr; |
284 | |
285 | #ifndef NDEBUG1 |
286 | /// The number of instructions scheduled so far. Used to cut off the |
287 | /// scheduler at the point determined by misched-cutoff. |
288 | unsigned NumInstrsScheduled = 0; |
289 | #endif |
290 | |
291 | public: |
292 | ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, |
293 | bool RemoveKillFlags) |
294 | : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), |
295 | LIS(C->LIS), SchedImpl(std::move(S)) {} |
296 | |
297 | // Provide a vtable anchor |
298 | ~ScheduleDAGMI() override; |
299 | |
300 | /// If this method returns true, handling of the scheduling regions |
301 | /// themselves (in case of a scheduling boundary in MBB) will be done |
302 | /// beginning with the topmost region of MBB. |
303 | bool doMBBSchedRegionsTopDown() const override { |
304 | return SchedImpl->doMBBSchedRegionsTopDown(); |
305 | } |
306 | |
307 | // Returns LiveIntervals instance for use in DAG mutators and such. |
308 | LiveIntervals *getLIS() const { return LIS; } |
309 | |
310 | /// Return true if this DAG supports VReg liveness and RegPressure. |
311 | virtual bool hasVRegLiveness() const { return false; } |
312 | |
313 | /// Add a postprocessing step to the DAG builder. |
314 | /// Mutations are applied in the order that they are added after normal DAG |
315 | /// building and before MachineSchedStrategy initialization. |
316 | /// |
317 | /// ScheduleDAGMI takes ownership of the Mutation object. |
318 | void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { |
319 | if (Mutation) |
320 | Mutations.push_back(std::move(Mutation)); |
321 | } |
322 | |
323 | MachineBasicBlock::iterator top() const { return CurrentTop; } |
324 | MachineBasicBlock::iterator bottom() const { return CurrentBottom; } |
325 | |
326 | /// Implement the ScheduleDAGInstrs interface for handling the next scheduling |
327 | /// region. This covers all instructions in a block, while schedule() may only |
328 | /// cover a subset. |
329 | void enterRegion(MachineBasicBlock *bb, |
330 | MachineBasicBlock::iterator begin, |
331 | MachineBasicBlock::iterator end, |
332 | unsigned regioninstrs) override; |
333 | |
334 | /// Implement ScheduleDAGInstrs interface for scheduling a sequence of |
335 | /// reorderable instructions. |
336 | void schedule() override; |
337 | |
338 | void startBlock(MachineBasicBlock *bb) override; |
339 | void finishBlock() override; |
340 | |
341 | /// Change the position of an instruction within the basic block and update |
342 | /// live ranges and region boundary iterators. |
343 | void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); |
344 | |
345 | const SUnit *getNextClusterPred() const { return NextClusterPred; } |
346 | |
347 | const SUnit *getNextClusterSucc() const { return NextClusterSucc; } |
348 | |
349 | void viewGraph(const Twine &Name, const Twine &Title) override; |
350 | void viewGraph() override; |
351 | |
352 | protected: |
353 | // Top-Level entry points for the schedule() driver... |
354 | |
355 | /// Apply each ScheduleDAGMutation step in order. This allows different |
356 | /// instances of ScheduleDAGMI to perform custom DAG postprocessing. |
357 | void postprocessDAG(); |
358 | |
359 | /// Release ExitSU predecessors and setup scheduler queues. |
360 | void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); |
361 | |
362 | /// Update scheduler DAG and queues after scheduling an instruction. |
363 | void updateQueues(SUnit *SU, bool IsTopNode); |
364 | |
365 | /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. |
366 | void placeDebugValues(); |
367 | |
368 | /// dump the scheduled Sequence. |
369 | void dumpSchedule() const; |
370 | |
371 | // Lesser helpers... |
372 | bool checkSchedLimit(); |
373 | |
374 | void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, |
375 | SmallVectorImpl<SUnit*> &BotRoots); |
376 | |
377 | void releaseSucc(SUnit *SU, SDep *SuccEdge); |
378 | void releaseSuccessors(SUnit *SU); |
379 | void releasePred(SUnit *SU, SDep *PredEdge); |
380 | void releasePredecessors(SUnit *SU); |
381 | }; |
382 | |
383 | /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules |
384 | /// machine instructions while updating LiveIntervals and tracking regpressure. |
385 | class ScheduleDAGMILive : public ScheduleDAGMI { |
386 | protected: |
387 | RegisterClassInfo *RegClassInfo; |
388 | |
389 | /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees |
390 | /// will be empty. |
391 | SchedDFSResult *DFSResult = nullptr; |
392 | BitVector ScheduledTrees; |
393 | |
394 | MachineBasicBlock::iterator LiveRegionEnd; |
395 | |
396 | /// Maps vregs to the SUnits of their uses in the current scheduling region. |
397 | VReg2SUnitMultiMap VRegUses; |
398 | |
399 | // Map each SU to its summary of pressure changes. This array is updated for |
400 | // liveness during bottom-up scheduling. Top-down scheduling may proceed but |
401 | // has no affect on the pressure diffs. |
402 | PressureDiffs SUPressureDiffs; |
403 | |
404 | /// Register pressure in this region computed by initRegPressure. |
405 | bool ShouldTrackPressure = false; |
406 | bool ShouldTrackLaneMasks = false; |
407 | IntervalPressure RegPressure; |
408 | RegPressureTracker RPTracker; |
409 | |
410 | /// List of pressure sets that exceed the target's pressure limit before |
411 | /// scheduling, listed in increasing set ID order. Each pressure set is paired |
412 | /// with its max pressure in the currently scheduled regions. |
413 | std::vector<PressureChange> RegionCriticalPSets; |
414 | |
415 | /// The top of the unscheduled zone. |
416 | IntervalPressure TopPressure; |
417 | RegPressureTracker TopRPTracker; |
418 | |
419 | /// The bottom of the unscheduled zone. |
420 | IntervalPressure BotPressure; |
421 | RegPressureTracker BotRPTracker; |
422 | |
423 | /// True if disconnected subregister components are already renamed. |
424 | /// The renaming is only done on demand if lane masks are tracked. |
425 | bool DisconnectedComponentsRenamed = false; |
426 | |
427 | public: |
428 | ScheduleDAGMILive(MachineSchedContext *C, |
429 | std::unique_ptr<MachineSchedStrategy> S) |
430 | : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), |
431 | RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), |
432 | TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} |
433 | |
434 | ~ScheduleDAGMILive() override; |
435 | |
436 | /// Return true if this DAG supports VReg liveness and RegPressure. |
437 | bool hasVRegLiveness() const override { return true; } |
438 | |
439 | /// Return true if register pressure tracking is enabled. |
440 | bool isTrackingPressure() const { return ShouldTrackPressure; } |
441 | |
442 | /// Get current register pressure for the top scheduled instructions. |
443 | const IntervalPressure &getTopPressure() const { return TopPressure; } |
444 | const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } |
445 | |
446 | /// Get current register pressure for the bottom scheduled instructions. |
447 | const IntervalPressure &getBotPressure() const { return BotPressure; } |
448 | const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } |
449 | |
450 | /// Get register pressure for the entire scheduling region before scheduling. |
451 | const IntervalPressure &getRegPressure() const { return RegPressure; } |
452 | |
453 | const std::vector<PressureChange> &getRegionCriticalPSets() const { |
454 | return RegionCriticalPSets; |
455 | } |
456 | |
457 | PressureDiff &getPressureDiff(const SUnit *SU) { |
458 | return SUPressureDiffs[SU->NodeNum]; |
459 | } |
460 | const PressureDiff &getPressureDiff(const SUnit *SU) const { |
461 | return SUPressureDiffs[SU->NodeNum]; |
462 | } |
463 | |
464 | /// Compute a DFSResult after DAG building is complete, and before any |
465 | /// queue comparisons. |
466 | void computeDFSResult(); |
467 | |
468 | /// Return a non-null DFS result if the scheduling strategy initialized it. |
469 | const SchedDFSResult *getDFSResult() const { return DFSResult; } |
470 | |
471 | BitVector &getScheduledTrees() { return ScheduledTrees; } |
472 | |
473 | /// Implement the ScheduleDAGInstrs interface for handling the next scheduling |
474 | /// region. This covers all instructions in a block, while schedule() may only |
475 | /// cover a subset. |
476 | void enterRegion(MachineBasicBlock *bb, |
477 | MachineBasicBlock::iterator begin, |
478 | MachineBasicBlock::iterator end, |
479 | unsigned regioninstrs) override; |
480 | |
481 | /// Implement ScheduleDAGInstrs interface for scheduling a sequence of |
482 | /// reorderable instructions. |
483 | void schedule() override; |
484 | |
485 | /// Compute the cyclic critical path through the DAG. |
486 | unsigned computeCyclicCriticalPath(); |
487 | |
488 | void dump() const override; |
489 | |
490 | protected: |
491 | // Top-Level entry points for the schedule() driver... |
492 | |
493 | /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking |
494 | /// enabled. This sets up three trackers. RPTracker will cover the entire DAG |
495 | /// region, TopTracker and BottomTracker will be initialized to the top and |
496 | /// bottom of the DAG region without covereing any unscheduled instruction. |
497 | void buildDAGWithRegPressure(); |
498 | |
499 | /// Release ExitSU predecessors and setup scheduler queues. Re-position |
500 | /// the Top RP tracker in case the region beginning has changed. |
501 | void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); |
502 | |
503 | /// Move an instruction and update register pressure. |
504 | void scheduleMI(SUnit *SU, bool IsTopNode); |
505 | |
506 | // Lesser helpers... |
507 | |
508 | void initRegPressure(); |
509 | |
510 | void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); |
511 | |
512 | void updateScheduledPressure(const SUnit *SU, |
513 | const std::vector<unsigned> &NewMaxPressure); |
514 | |
515 | void collectVRegUses(SUnit &SU); |
516 | }; |
517 | |
518 | //===----------------------------------------------------------------------===// |
519 | /// |
520 | /// Helpers for implementing custom MachineSchedStrategy classes. These take |
521 | /// care of the book-keeping associated with list scheduling heuristics. |
522 | /// |
523 | //===----------------------------------------------------------------------===// |
524 | |
525 | /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience |
526 | /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified |
527 | /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. |
528 | /// |
529 | /// This is a convenience class that may be used by implementations of |
530 | /// MachineSchedStrategy. |
531 | class ReadyQueue { |
532 | unsigned ID; |
533 | std::string Name; |
534 | std::vector<SUnit*> Queue; |
535 | |
536 | public: |
537 | ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} |
538 | |
539 | unsigned getID() const { return ID; } |
540 | |
541 | StringRef getName() const { return Name; } |
542 | |
543 | // SU is in this queue if it's NodeQueueID is a superset of this ID. |
544 | bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } |
545 | |
546 | bool empty() const { return Queue.empty(); } |
547 | |
548 | void clear() { Queue.clear(); } |
549 | |
550 | unsigned size() const { return Queue.size(); } |
551 | |
552 | using iterator = std::vector<SUnit*>::iterator; |
553 | |
554 | iterator begin() { return Queue.begin(); } |
555 | |
556 | iterator end() { return Queue.end(); } |
557 | |
558 | ArrayRef<SUnit*> elements() { return Queue; } |
559 | |
560 | iterator find(SUnit *SU) { return llvm::find(Queue, SU); } |
561 | |
562 | void push(SUnit *SU) { |
563 | Queue.push_back(SU); |
564 | SU->NodeQueueId |= ID; |
565 | } |
566 | |
567 | iterator remove(iterator I) { |
568 | (*I)->NodeQueueId &= ~ID; |
569 | *I = Queue.back(); |
570 | unsigned idx = I - Queue.begin(); |
571 | Queue.pop_back(); |
572 | return Queue.begin() + idx; |
573 | } |
574 | |
575 | void dump() const; |
576 | }; |
577 | |
578 | /// Summarize the unscheduled region. |
579 | struct SchedRemainder { |
580 | // Critical path through the DAG in expected latency. |
581 | unsigned CriticalPath; |
582 | unsigned CyclicCritPath; |
583 | |
584 | // Scaled count of micro-ops left to schedule. |
585 | unsigned RemIssueCount; |
586 | |
587 | bool IsAcyclicLatencyLimited; |
588 | |
589 | // Unscheduled resources |
590 | SmallVector<unsigned, 16> RemainingCounts; |
591 | |
592 | SchedRemainder() { reset(); } |
593 | |
594 | void reset() { |
595 | CriticalPath = 0; |
596 | CyclicCritPath = 0; |
597 | RemIssueCount = 0; |
598 | IsAcyclicLatencyLimited = false; |
599 | RemainingCounts.clear(); |
600 | } |
601 | |
602 | void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); |
603 | }; |
604 | |
605 | /// Each Scheduling boundary is associated with ready queues. It tracks the |
606 | /// current cycle in the direction of movement, and maintains the state |
607 | /// of "hazards" and other interlocks at the current cycle. |
608 | class SchedBoundary { |
609 | public: |
610 | /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) |
611 | enum { |
612 | TopQID = 1, |
613 | BotQID = 2, |
614 | LogMaxQID = 2 |
615 | }; |
616 | |
617 | ScheduleDAGMI *DAG = nullptr; |
618 | const TargetSchedModel *SchedModel = nullptr; |
619 | SchedRemainder *Rem = nullptr; |
620 | |
621 | ReadyQueue Available; |
622 | ReadyQueue Pending; |
623 | |
624 | ScheduleHazardRecognizer *HazardRec = nullptr; |
625 | |
626 | private: |
627 | /// True if the pending Q should be checked/updated before scheduling another |
628 | /// instruction. |
629 | bool CheckPending; |
630 | |
631 | /// Number of cycles it takes to issue the instructions scheduled in this |
632 | /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. |
633 | /// See getStalls(). |
634 | unsigned CurrCycle; |
635 | |
636 | /// Micro-ops issued in the current cycle |
637 | unsigned CurrMOps; |
638 | |
639 | /// MinReadyCycle - Cycle of the soonest available instruction. |
640 | unsigned MinReadyCycle; |
641 | |
642 | // The expected latency of the critical path in this scheduled zone. |
643 | unsigned ExpectedLatency; |
644 | |
645 | // The latency of dependence chains leading into this zone. |
646 | // For each node scheduled bottom-up: DLat = max DLat, N.Depth. |
647 | // For each cycle scheduled: DLat -= 1. |
648 | unsigned DependentLatency; |
649 | |
650 | /// Count the scheduled (issued) micro-ops that can be retired by |
651 | /// time=CurrCycle assuming the first scheduled instr is retired at time=0. |
652 | unsigned RetiredMOps; |
653 | |
654 | // Count scheduled resources that have been executed. Resources are |
655 | // considered executed if they become ready in the time that it takes to |
656 | // saturate any resource including the one in question. Counts are scaled |
657 | // for direct comparison with other resources. Counts can be compared with |
658 | // MOps * getMicroOpFactor and Latency * getLatencyFactor. |
659 | SmallVector<unsigned, 16> ExecutedResCounts; |
660 | |
661 | /// Cache the max count for a single resource. |
662 | unsigned MaxExecutedResCount; |
663 | |
664 | // Cache the critical resources ID in this scheduled zone. |
665 | unsigned ZoneCritResIdx; |
666 | |
667 | // Is the scheduled region resource limited vs. latency limited. |
668 | bool IsResourceLimited; |
669 | |
670 | // Record the highest cycle at which each resource has been reserved by a |
671 | // scheduled instruction. |
672 | SmallVector<unsigned, 16> ReservedCycles; |
673 | |
674 | // For each PIdx, stores first index into ReservedCycles that corresponds to |
675 | // it. |
676 | SmallVector<unsigned, 16> ReservedCyclesIndex; |
677 | |
678 | // For each PIdx, stores the resource group IDs of its subunits |
679 | SmallVector<APInt, 16> ResourceGroupSubUnitMasks; |
680 | |
681 | #ifndef NDEBUG1 |
682 | // Remember the greatest possible stall as an upper bound on the number of |
683 | // times we should retry the pending queue because of a hazard. |
684 | unsigned MaxObservedStall; |
685 | #endif |
686 | |
687 | public: |
688 | /// Pending queues extend the ready queues with the same ID and the |
689 | /// PendingFlag set. |
690 | SchedBoundary(unsigned ID, const Twine &Name): |
691 | Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { |
692 | reset(); |
693 | } |
694 | |
695 | ~SchedBoundary(); |
696 | |
697 | void reset(); |
698 | |
699 | void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, |
700 | SchedRemainder *rem); |
701 | |
702 | bool isTop() const { |
703 | return Available.getID() == TopQID; |
704 | } |
705 | |
706 | /// Number of cycles to issue the instructions scheduled in this zone. |
707 | unsigned getCurrCycle() const { return CurrCycle; } |
708 | |
709 | /// Micro-ops issued in the current cycle |
710 | unsigned getCurrMOps() const { return CurrMOps; } |
711 | |
712 | // The latency of dependence chains leading into this zone. |
713 | unsigned getDependentLatency() const { return DependentLatency; } |
714 | |
715 | /// Get the number of latency cycles "covered" by the scheduled |
716 | /// instructions. This is the larger of the critical path within the zone |
717 | /// and the number of cycles required to issue the instructions. |
718 | unsigned getScheduledLatency() const { |
719 | return std::max(ExpectedLatency, CurrCycle); |
720 | } |
721 | |
722 | unsigned getUnscheduledLatency(SUnit *SU) const { |
723 | return isTop() ? SU->getHeight() : SU->getDepth(); |
724 | } |
725 | |
726 | unsigned getResourceCount(unsigned ResIdx) const { |
727 | return ExecutedResCounts[ResIdx]; |
728 | } |
729 | |
730 | /// Get the scaled count of scheduled micro-ops and resources, including |
731 | /// executed resources. |
732 | unsigned getCriticalCount() const { |
733 | if (!ZoneCritResIdx) |
734 | return RetiredMOps * SchedModel->getMicroOpFactor(); |
735 | return getResourceCount(ZoneCritResIdx); |
736 | } |
737 | |
738 | /// Get a scaled count for the minimum execution time of the scheduled |
739 | /// micro-ops that are ready to execute by getExecutedCount. Notice the |
740 | /// feedback loop. |
741 | unsigned getExecutedCount() const { |
742 | return std::max(CurrCycle * SchedModel->getLatencyFactor(), |
743 | MaxExecutedResCount); |
744 | } |
745 | |
746 | unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } |
747 | |
748 | // Is the scheduled region resource limited vs. latency limited. |
749 | bool isResourceLimited() const { return IsResourceLimited; } |
750 | |
751 | /// Get the difference between the given SUnit's ready time and the current |
752 | /// cycle. |
753 | unsigned getLatencyStallCycles(SUnit *SU); |
754 | |
755 | unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, |
756 | unsigned Cycles); |
757 | |
758 | std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC, |
759 | unsigned PIdx, |
760 | unsigned Cycles); |
761 | |
762 | bool isUnbufferedGroup(unsigned PIdx) const { |
763 | return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && |
764 | !SchedModel->getProcResource(PIdx)->BufferSize; |
765 | } |
766 | |
767 | bool checkHazard(SUnit *SU); |
768 | |
769 | unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); |
770 | |
771 | unsigned getOtherResourceCount(unsigned &OtherCritIdx); |
772 | |
773 | /// Release SU to make it ready. If it's not in hazard, remove it from |
774 | /// pending queue (if already in) and push into available queue. |
775 | /// Otherwise, push the SU into pending queue. |
776 | /// |
777 | /// @param SU The unit to be released. |
778 | /// @param ReadyCycle Until which cycle the unit is ready. |
779 | /// @param InPQueue Whether SU is already in pending queue. |
780 | /// @param Idx Position offset in pending queue (if in it). |
781 | void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, |
782 | unsigned Idx = 0); |
783 | |
784 | void bumpCycle(unsigned NextCycle); |
785 | |
786 | void incExecutedResources(unsigned PIdx, unsigned Count); |
787 | |
788 | unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, |
789 | unsigned Cycles, unsigned ReadyCycle); |
790 | |
791 | void bumpNode(SUnit *SU); |
792 | |
793 | void releasePending(); |
794 | |
795 | void removeReady(SUnit *SU); |
796 | |
797 | /// Call this before applying any other heuristics to the Available queue. |
798 | /// Updates the Available/Pending Q's if necessary and returns the single |
799 | /// available instruction, or NULL if there are multiple candidates. |
800 | SUnit *pickOnlyChoice(); |
801 | |
802 | void dumpScheduledState() const; |
803 | }; |
804 | |
805 | /// Base class for GenericScheduler. This class maintains information about |
806 | /// scheduling candidates based on TargetSchedModel making it easy to implement |
807 | /// heuristics for either preRA or postRA scheduling. |
808 | class GenericSchedulerBase : public MachineSchedStrategy { |
809 | public: |
810 | /// Represent the type of SchedCandidate found within a single queue. |
811 | /// pickNodeBidirectional depends on these listed by decreasing priority. |
812 | enum CandReason : uint8_t { |
813 | NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, |
814 | RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, |
815 | TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; |
816 | |
817 | #ifndef NDEBUG1 |
818 | static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); |
819 | #endif |
820 | |
821 | /// Policy for scheduling the next instruction in the candidate's zone. |
822 | struct CandPolicy { |
823 | bool ReduceLatency = false; |
824 | unsigned ReduceResIdx = 0; |
825 | unsigned DemandResIdx = 0; |
826 | |
827 | CandPolicy() = default; |
828 | |
829 | bool operator==(const CandPolicy &RHS) const { |
830 | return ReduceLatency == RHS.ReduceLatency && |
831 | ReduceResIdx == RHS.ReduceResIdx && |
832 | DemandResIdx == RHS.DemandResIdx; |
833 | } |
834 | bool operator!=(const CandPolicy &RHS) const { |
835 | return !(*this == RHS); |
836 | } |
837 | }; |
838 | |
839 | /// Status of an instruction's critical resource consumption. |
840 | struct SchedResourceDelta { |
841 | // Count critical resources in the scheduled region required by SU. |
842 | unsigned CritResources = 0; |
843 | |
844 | // Count critical resources from another region consumed by SU. |
845 | unsigned DemandedResources = 0; |
846 | |
847 | SchedResourceDelta() = default; |
848 | |
849 | bool operator==(const SchedResourceDelta &RHS) const { |
850 | return CritResources == RHS.CritResources |
851 | && DemandedResources == RHS.DemandedResources; |
852 | } |
853 | bool operator!=(const SchedResourceDelta &RHS) const { |
854 | return !operator==(RHS); |
855 | } |
856 | }; |
857 | |
858 | /// Store the state used by GenericScheduler heuristics, required for the |
859 | /// lifetime of one invocation of pickNode(). |
860 | struct SchedCandidate { |
861 | CandPolicy Policy; |
862 | |
863 | // The best SUnit candidate. |
864 | SUnit *SU; |
865 | |
866 | // The reason for this candidate. |
867 | CandReason Reason; |
868 | |
869 | // Whether this candidate should be scheduled at top/bottom. |
870 | bool AtTop; |
871 | |
872 | // Register pressure values for the best candidate. |
873 | RegPressureDelta RPDelta; |
874 | |
875 | // Critical resource consumption of the best candidate. |
876 | SchedResourceDelta ResDelta; |
877 | |
878 | SchedCandidate() { reset(CandPolicy()); } |
879 | SchedCandidate(const CandPolicy &Policy) { reset(Policy); } |
880 | |
881 | void reset(const CandPolicy &NewPolicy) { |
882 | Policy = NewPolicy; |
883 | SU = nullptr; |
884 | Reason = NoCand; |
885 | AtTop = false; |
886 | RPDelta = RegPressureDelta(); |
887 | ResDelta = SchedResourceDelta(); |
888 | } |
889 | |
890 | bool isValid() const { return SU; } |
891 | |
892 | // Copy the status of another candidate without changing policy. |
893 | void setBest(SchedCandidate &Best) { |
894 | assert(Best.Reason != NoCand && "uninitialized Sched candidate")((void)0); |
895 | SU = Best.SU; |
896 | Reason = Best.Reason; |
897 | AtTop = Best.AtTop; |
898 | RPDelta = Best.RPDelta; |
899 | ResDelta = Best.ResDelta; |
900 | } |
901 | |
902 | void initResourceDelta(const ScheduleDAGMI *DAG, |
903 | const TargetSchedModel *SchedModel); |
904 | }; |
905 | |
906 | protected: |
907 | const MachineSchedContext *Context; |
908 | const TargetSchedModel *SchedModel = nullptr; |
909 | const TargetRegisterInfo *TRI = nullptr; |
910 | |
911 | SchedRemainder Rem; |
912 | |
913 | GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} |
914 | |
915 | void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, |
916 | SchedBoundary *OtherZone); |
917 | |
918 | #ifndef NDEBUG1 |
919 | void traceCandidate(const SchedCandidate &Cand); |
920 | #endif |
921 | |
922 | private: |
923 | bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, |
924 | bool ComputeRemLatency, unsigned &RemLatency) const; |
925 | }; |
926 | |
927 | // Utility functions used by heuristics in tryCandidate(). |
928 | bool tryLess(int TryVal, int CandVal, |
929 | GenericSchedulerBase::SchedCandidate &TryCand, |
930 | GenericSchedulerBase::SchedCandidate &Cand, |
931 | GenericSchedulerBase::CandReason Reason); |
932 | bool tryGreater(int TryVal, int CandVal, |
933 | GenericSchedulerBase::SchedCandidate &TryCand, |
934 | GenericSchedulerBase::SchedCandidate &Cand, |
935 | GenericSchedulerBase::CandReason Reason); |
936 | bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, |
937 | GenericSchedulerBase::SchedCandidate &Cand, |
938 | SchedBoundary &Zone); |
939 | bool tryPressure(const PressureChange &TryP, |
940 | const PressureChange &CandP, |
941 | GenericSchedulerBase::SchedCandidate &TryCand, |
942 | GenericSchedulerBase::SchedCandidate &Cand, |
943 | GenericSchedulerBase::CandReason Reason, |
944 | const TargetRegisterInfo *TRI, |
945 | const MachineFunction &MF); |
946 | unsigned getWeakLeft(const SUnit *SU, bool isTop); |
947 | int biasPhysReg(const SUnit *SU, bool isTop); |
948 | |
949 | /// GenericScheduler shrinks the unscheduled zone using heuristics to balance |
950 | /// the schedule. |
951 | class GenericScheduler : public GenericSchedulerBase { |
952 | public: |
953 | GenericScheduler(const MachineSchedContext *C): |
954 | GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), |
955 | Bot(SchedBoundary::BotQID, "BotQ") {} |
956 | |
957 | void initPolicy(MachineBasicBlock::iterator Begin, |
958 | MachineBasicBlock::iterator End, |
959 | unsigned NumRegionInstrs) override; |
960 | |
961 | void dumpPolicy() const override; |
962 | |
963 | bool shouldTrackPressure() const override { |
964 | return RegionPolicy.ShouldTrackPressure; |
965 | } |
966 | |
967 | bool shouldTrackLaneMasks() const override { |
968 | return RegionPolicy.ShouldTrackLaneMasks; |
969 | } |
970 | |
971 | void initialize(ScheduleDAGMI *dag) override; |
972 | |
973 | SUnit *pickNode(bool &IsTopNode) override; |
974 | |
975 | void schedNode(SUnit *SU, bool IsTopNode) override; |
976 | |
977 | void releaseTopNode(SUnit *SU) override { |
978 | if (SU->isScheduled) |
979 | return; |
980 | |
981 | Top.releaseNode(SU, SU->TopReadyCycle, false); |
982 | TopCand.SU = nullptr; |
983 | } |
984 | |
985 | void releaseBottomNode(SUnit *SU) override { |
986 | if (SU->isScheduled) |
987 | return; |
988 | |
989 | Bot.releaseNode(SU, SU->BotReadyCycle, false); |
990 | BotCand.SU = nullptr; |
991 | } |
992 | |
993 | void registerRoots() override; |
994 | |
995 | protected: |
996 | ScheduleDAGMILive *DAG = nullptr; |
997 | |
998 | MachineSchedPolicy RegionPolicy; |
999 | |
1000 | // State of the top and bottom scheduled instruction boundaries. |
1001 | SchedBoundary Top; |
1002 | SchedBoundary Bot; |
1003 | |
1004 | /// Candidate last picked from Top boundary. |
1005 | SchedCandidate TopCand; |
1006 | /// Candidate last picked from Bot boundary. |
1007 | SchedCandidate BotCand; |
1008 | |
1009 | void checkAcyclicLatency(); |
1010 | |
1011 | void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, |
1012 | const RegPressureTracker &RPTracker, |
1013 | RegPressureTracker &TempTracker); |
1014 | |
1015 | virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
1016 | SchedBoundary *Zone) const; |
1017 | |
1018 | SUnit *pickNodeBidirectional(bool &IsTopNode); |
1019 | |
1020 | void pickNodeFromQueue(SchedBoundary &Zone, |
1021 | const CandPolicy &ZonePolicy, |
1022 | const RegPressureTracker &RPTracker, |
1023 | SchedCandidate &Candidate); |
1024 | |
1025 | void reschedulePhysReg(SUnit *SU, bool isTop); |
1026 | }; |
1027 | |
1028 | /// PostGenericScheduler - Interface to the scheduling algorithm used by |
1029 | /// ScheduleDAGMI. |
1030 | /// |
1031 | /// Callbacks from ScheduleDAGMI: |
1032 | /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... |
1033 | class PostGenericScheduler : public GenericSchedulerBase { |
1034 | protected: |
1035 | ScheduleDAGMI *DAG = nullptr; |
1036 | SchedBoundary Top; |
1037 | SmallVector<SUnit*, 8> BotRoots; |
1038 | |
1039 | public: |
1040 | PostGenericScheduler(const MachineSchedContext *C): |
1041 | GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} |
1042 | |
1043 | ~PostGenericScheduler() override = default; |
1044 | |
1045 | void initPolicy(MachineBasicBlock::iterator Begin, |
1046 | MachineBasicBlock::iterator End, |
1047 | unsigned NumRegionInstrs) override { |
1048 | /* no configurable policy */ |
1049 | } |
1050 | |
1051 | /// PostRA scheduling does not track pressure. |
1052 | bool shouldTrackPressure() const override { return false; } |
1053 | |
1054 | void initialize(ScheduleDAGMI *Dag) override; |
1055 | |
1056 | void registerRoots() override; |
1057 | |
1058 | SUnit *pickNode(bool &IsTopNode) override; |
1059 | |
1060 | void scheduleTree(unsigned SubtreeID) override { |
1061 | llvm_unreachable("PostRA scheduler does not support subtree analysis.")__builtin_unreachable(); |
1062 | } |
1063 | |
1064 | void schedNode(SUnit *SU, bool IsTopNode) override; |
1065 | |
1066 | void releaseTopNode(SUnit *SU) override { |
1067 | if (SU->isScheduled) |
1068 | return; |
1069 | Top.releaseNode(SU, SU->TopReadyCycle, false); |
1070 | } |
1071 | |
1072 | // Only called for roots. |
1073 | void releaseBottomNode(SUnit *SU) override { |
1074 | BotRoots.push_back(SU); |
1075 | } |
1076 | |
1077 | protected: |
1078 | virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); |
1079 | |
1080 | void pickNodeFromQueue(SchedCandidate &Cand); |
1081 | }; |
1082 | |
1083 | /// Create the standard converging machine scheduler. This will be used as the |
1084 | /// default scheduler if the target does not set a default. |
1085 | /// Adds default DAG mutations. |
1086 | ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); |
1087 | |
1088 | /// Create a generic scheduler with no vreg liveness or DAG mutation passes. |
1089 | ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); |
1090 | |
1091 | std::unique_ptr<ScheduleDAGMutation> |
1092 | createLoadClusterDAGMutation(const TargetInstrInfo *TII, |
1093 | const TargetRegisterInfo *TRI); |
1094 | |
1095 | std::unique_ptr<ScheduleDAGMutation> |
1096 | createStoreClusterDAGMutation(const TargetInstrInfo *TII, |
1097 | const TargetRegisterInfo *TRI); |
1098 | |
1099 | std::unique_ptr<ScheduleDAGMutation> |
1100 | createCopyConstrainDAGMutation(const TargetInstrInfo *TII, |
1101 | const TargetRegisterInfo *TRI); |
1102 | |
1103 | } // end namespace llvm |
1104 | |
1105 | #endif // LLVM_CODEGEN_MACHINESCHEDULER_H |