File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- GVNSink.cpp - sink expressions into successors ---------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file GVNSink.cpp | |||
10 | /// This pass attempts to sink instructions into successors, reducing static | |||
11 | /// instruction count and enabling if-conversion. | |||
12 | /// | |||
13 | /// We use a variant of global value numbering to decide what can be sunk. | |||
14 | /// Consider: | |||
15 | /// | |||
16 | /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] | |||
17 | /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] | |||
18 | /// \ / | |||
19 | /// [ %e = phi i32 %a2, %c2 ] | |||
20 | /// [ add i32 %e, 4 ] | |||
21 | /// | |||
22 | /// | |||
23 | /// GVN would number %a1 and %c1 differently because they compute different | |||
24 | /// results - the VN of an instruction is a function of its opcode and the | |||
25 | /// transitive closure of its operands. This is the key property for hoisting | |||
26 | /// and CSE. | |||
27 | /// | |||
28 | /// What we want when sinking however is for a numbering that is a function of | |||
29 | /// the *uses* of an instruction, which allows us to answer the question "if I | |||
30 | /// replace %a1 with %c1, will it contribute in an equivalent way to all | |||
31 | /// successive instructions?". The PostValueTable class in GVN provides this | |||
32 | /// mapping. | |||
33 | // | |||
34 | //===----------------------------------------------------------------------===// | |||
35 | ||||
36 | #include "llvm/ADT/ArrayRef.h" | |||
37 | #include "llvm/ADT/DenseMap.h" | |||
38 | #include "llvm/ADT/DenseMapInfo.h" | |||
39 | #include "llvm/ADT/DenseSet.h" | |||
40 | #include "llvm/ADT/Hashing.h" | |||
41 | #include "llvm/ADT/None.h" | |||
42 | #include "llvm/ADT/Optional.h" | |||
43 | #include "llvm/ADT/PostOrderIterator.h" | |||
44 | #include "llvm/ADT/STLExtras.h" | |||
45 | #include "llvm/ADT/SmallPtrSet.h" | |||
46 | #include "llvm/ADT/SmallVector.h" | |||
47 | #include "llvm/ADT/Statistic.h" | |||
48 | #include "llvm/ADT/StringExtras.h" | |||
49 | #include "llvm/Analysis/GlobalsModRef.h" | |||
50 | #include "llvm/IR/BasicBlock.h" | |||
51 | #include "llvm/IR/CFG.h" | |||
52 | #include "llvm/IR/Constants.h" | |||
53 | #include "llvm/IR/Function.h" | |||
54 | #include "llvm/IR/InstrTypes.h" | |||
55 | #include "llvm/IR/Instruction.h" | |||
56 | #include "llvm/IR/Instructions.h" | |||
57 | #include "llvm/IR/PassManager.h" | |||
58 | #include "llvm/IR/Type.h" | |||
59 | #include "llvm/IR/Use.h" | |||
60 | #include "llvm/IR/Value.h" | |||
61 | #include "llvm/InitializePasses.h" | |||
62 | #include "llvm/Pass.h" | |||
63 | #include "llvm/Support/Allocator.h" | |||
64 | #include "llvm/Support/ArrayRecycler.h" | |||
65 | #include "llvm/Support/AtomicOrdering.h" | |||
66 | #include "llvm/Support/Casting.h" | |||
67 | #include "llvm/Support/Compiler.h" | |||
68 | #include "llvm/Support/Debug.h" | |||
69 | #include "llvm/Support/raw_ostream.h" | |||
70 | #include "llvm/Transforms/Scalar.h" | |||
71 | #include "llvm/Transforms/Scalar/GVN.h" | |||
72 | #include "llvm/Transforms/Scalar/GVNExpression.h" | |||
73 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
74 | #include "llvm/Transforms/Utils/Local.h" | |||
75 | #include <algorithm> | |||
76 | #include <cassert> | |||
77 | #include <cstddef> | |||
78 | #include <cstdint> | |||
79 | #include <iterator> | |||
80 | #include <utility> | |||
81 | ||||
82 | using namespace llvm; | |||
83 | ||||
84 | #define DEBUG_TYPE"gvn-sink" "gvn-sink" | |||
85 | ||||
86 | STATISTIC(NumRemoved, "Number of instructions removed")static llvm::Statistic NumRemoved = {"gvn-sink", "NumRemoved" , "Number of instructions removed"}; | |||
87 | ||||
88 | namespace llvm { | |||
89 | namespace GVNExpression { | |||
90 | ||||
91 | LLVM_DUMP_METHOD__attribute__((noinline)) void Expression::dump() const { | |||
92 | print(dbgs()); | |||
93 | dbgs() << "\n"; | |||
94 | } | |||
95 | ||||
96 | } // end namespace GVNExpression | |||
97 | } // end namespace llvm | |||
98 | ||||
99 | namespace { | |||
100 | ||||
101 | static bool isMemoryInst(const Instruction *I) { | |||
102 | return isa<LoadInst>(I) || isa<StoreInst>(I) || | |||
103 | (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) || | |||
104 | (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory()); | |||
105 | } | |||
106 | ||||
107 | /// Iterates through instructions in a set of blocks in reverse order from the | |||
108 | /// first non-terminator. For example (assume all blocks have size n): | |||
109 | /// LockstepReverseIterator I([B1, B2, B3]); | |||
110 | /// *I-- = [B1[n], B2[n], B3[n]]; | |||
111 | /// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; | |||
112 | /// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; | |||
113 | /// ... | |||
114 | /// | |||
115 | /// It continues until all blocks have been exhausted. Use \c getActiveBlocks() | |||
116 | /// to | |||
117 | /// determine which blocks are still going and the order they appear in the | |||
118 | /// list returned by operator*. | |||
119 | class LockstepReverseIterator { | |||
120 | ArrayRef<BasicBlock *> Blocks; | |||
121 | SmallSetVector<BasicBlock *, 4> ActiveBlocks; | |||
122 | SmallVector<Instruction *, 4> Insts; | |||
123 | bool Fail; | |||
124 | ||||
125 | public: | |||
126 | LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) { | |||
127 | reset(); | |||
128 | } | |||
129 | ||||
130 | void reset() { | |||
131 | Fail = false; | |||
132 | ActiveBlocks.clear(); | |||
133 | for (BasicBlock *BB : Blocks) | |||
134 | ActiveBlocks.insert(BB); | |||
135 | Insts.clear(); | |||
136 | for (BasicBlock *BB : Blocks) { | |||
137 | if (BB->size() <= 1) { | |||
138 | // Block wasn't big enough - only contained a terminator. | |||
139 | ActiveBlocks.remove(BB); | |||
140 | continue; | |||
141 | } | |||
142 | Insts.push_back(BB->getTerminator()->getPrevNode()); | |||
143 | } | |||
144 | if (Insts.empty()) | |||
145 | Fail = true; | |||
146 | } | |||
147 | ||||
148 | bool isValid() const { return !Fail; } | |||
149 | ArrayRef<Instruction *> operator*() const { return Insts; } | |||
150 | ||||
151 | // Note: This needs to return a SmallSetVector as the elements of | |||
152 | // ActiveBlocks will be later copied to Blocks using std::copy. The | |||
153 | // resultant order of elements in Blocks needs to be deterministic. | |||
154 | // Using SmallPtrSet instead causes non-deterministic order while | |||
155 | // copying. And we cannot simply sort Blocks as they need to match the | |||
156 | // corresponding Values. | |||
157 | SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; } | |||
158 | ||||
159 | void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) { | |||
160 | for (auto II = Insts.begin(); II != Insts.end();) { | |||
161 | if (!llvm::is_contained(Blocks, (*II)->getParent())) { | |||
162 | ActiveBlocks.remove((*II)->getParent()); | |||
163 | II = Insts.erase(II); | |||
164 | } else { | |||
165 | ++II; | |||
166 | } | |||
167 | } | |||
168 | } | |||
169 | ||||
170 | void operator--() { | |||
171 | if (Fail) | |||
172 | return; | |||
173 | SmallVector<Instruction *, 4> NewInsts; | |||
174 | for (auto *Inst : Insts) { | |||
175 | if (Inst == &Inst->getParent()->front()) | |||
176 | ActiveBlocks.remove(Inst->getParent()); | |||
177 | else | |||
178 | NewInsts.push_back(Inst->getPrevNode()); | |||
179 | } | |||
180 | if (NewInsts.empty()) { | |||
181 | Fail = true; | |||
182 | return; | |||
183 | } | |||
184 | Insts = NewInsts; | |||
185 | } | |||
186 | }; | |||
187 | ||||
188 | //===----------------------------------------------------------------------===// | |||
189 | ||||
190 | /// Candidate solution for sinking. There may be different ways to | |||
191 | /// sink instructions, differing in the number of instructions sunk, | |||
192 | /// the number of predecessors sunk from and the number of PHIs | |||
193 | /// required. | |||
194 | struct SinkingInstructionCandidate { | |||
195 | unsigned NumBlocks; | |||
196 | unsigned NumInstructions; | |||
197 | unsigned NumPHIs; | |||
198 | unsigned NumMemoryInsts; | |||
199 | int Cost = -1; | |||
200 | SmallVector<BasicBlock *, 4> Blocks; | |||
201 | ||||
202 | void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { | |||
203 | unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; | |||
204 | unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; | |||
205 | Cost = (NumInstructions * (NumBlocks - 1)) - | |||
206 | (NumExtraPHIs * | |||
207 | NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. | |||
208 | - SplitEdgeCost; | |||
209 | } | |||
210 | ||||
211 | bool operator>(const SinkingInstructionCandidate &Other) const { | |||
212 | return Cost > Other.Cost; | |||
213 | } | |||
214 | }; | |||
215 | ||||
216 | #ifndef NDEBUG1 | |||
217 | raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { | |||
218 | OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks | |||
219 | << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; | |||
220 | return OS; | |||
221 | } | |||
222 | #endif | |||
223 | ||||
224 | //===----------------------------------------------------------------------===// | |||
225 | ||||
226 | /// Describes a PHI node that may or may not exist. These track the PHIs | |||
227 | /// that must be created if we sunk a sequence of instructions. It provides | |||
228 | /// a hash function for efficient equality comparisons. | |||
229 | class ModelledPHI { | |||
230 | SmallVector<Value *, 4> Values; | |||
231 | SmallVector<BasicBlock *, 4> Blocks; | |||
232 | ||||
233 | public: | |||
234 | ModelledPHI() = default; | |||
235 | ||||
236 | ModelledPHI(const PHINode *PN) { | |||
237 | // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order. | |||
238 | SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops; | |||
239 | for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) | |||
240 | Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)}); | |||
241 | llvm::sort(Ops); | |||
242 | for (auto &P : Ops) { | |||
243 | Blocks.push_back(P.first); | |||
244 | Values.push_back(P.second); | |||
245 | } | |||
246 | } | |||
247 | ||||
248 | /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI | |||
249 | /// without the same ID. | |||
250 | /// \note This is specifically for DenseMapInfo - do not use this! | |||
251 | static ModelledPHI createDummy(size_t ID) { | |||
252 | ModelledPHI M; | |||
253 | M.Values.push_back(reinterpret_cast<Value*>(ID)); | |||
254 | return M; | |||
255 | } | |||
256 | ||||
257 | /// Create a PHI from an array of incoming values and incoming blocks. | |||
258 | template <typename VArray, typename BArray> | |||
259 | ModelledPHI(const VArray &V, const BArray &B) { | |||
260 | llvm::copy(V, std::back_inserter(Values)); | |||
261 | llvm::copy(B, std::back_inserter(Blocks)); | |||
262 | } | |||
263 | ||||
264 | /// Create a PHI from [I[OpNum] for I in Insts]. | |||
265 | template <typename BArray> | |||
266 | ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) { | |||
267 | llvm::copy(B, std::back_inserter(Blocks)); | |||
268 | for (auto *I : Insts) | |||
269 | Values.push_back(I->getOperand(OpNum)); | |||
270 | } | |||
271 | ||||
272 | /// Restrict the PHI's contents down to only \c NewBlocks. | |||
273 | /// \c NewBlocks must be a subset of \c this->Blocks. | |||
274 | void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) { | |||
275 | auto BI = Blocks.begin(); | |||
276 | auto VI = Values.begin(); | |||
277 | while (BI != Blocks.end()) { | |||
278 | assert(VI != Values.end())((void)0); | |||
279 | if (!llvm::is_contained(NewBlocks, *BI)) { | |||
280 | BI = Blocks.erase(BI); | |||
281 | VI = Values.erase(VI); | |||
282 | } else { | |||
283 | ++BI; | |||
284 | ++VI; | |||
285 | } | |||
286 | } | |||
287 | assert(Blocks.size() == NewBlocks.size())((void)0); | |||
288 | } | |||
289 | ||||
290 | ArrayRef<Value *> getValues() const { return Values; } | |||
291 | ||||
292 | bool areAllIncomingValuesSame() const { | |||
293 | return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; }); | |||
294 | } | |||
295 | ||||
296 | bool areAllIncomingValuesSameType() const { | |||
297 | return llvm::all_of( | |||
298 | Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); | |||
299 | } | |||
300 | ||||
301 | bool areAnyIncomingValuesConstant() const { | |||
302 | return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); }); | |||
303 | } | |||
304 | ||||
305 | // Hash functor | |||
306 | unsigned hash() const { | |||
307 | return (unsigned)hash_combine_range(Values.begin(), Values.end()); | |||
308 | } | |||
309 | ||||
310 | bool operator==(const ModelledPHI &Other) const { | |||
311 | return Values == Other.Values && Blocks == Other.Blocks; | |||
312 | } | |||
313 | }; | |||
314 | ||||
315 | template <typename ModelledPHI> struct DenseMapInfo { | |||
316 | static inline ModelledPHI &getEmptyKey() { | |||
317 | static ModelledPHI Dummy = ModelledPHI::createDummy(0); | |||
318 | return Dummy; | |||
319 | } | |||
320 | ||||
321 | static inline ModelledPHI &getTombstoneKey() { | |||
322 | static ModelledPHI Dummy = ModelledPHI::createDummy(1); | |||
323 | return Dummy; | |||
324 | } | |||
325 | ||||
326 | static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } | |||
327 | ||||
328 | static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { | |||
329 | return LHS == RHS; | |||
330 | } | |||
331 | }; | |||
332 | ||||
333 | using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; | |||
334 | ||||
335 | //===----------------------------------------------------------------------===// | |||
336 | // ValueTable | |||
337 | //===----------------------------------------------------------------------===// | |||
338 | // This is a value number table where the value number is a function of the | |||
339 | // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know | |||
340 | // that the program would be equivalent if we replaced A with PHI(A, B). | |||
341 | //===----------------------------------------------------------------------===// | |||
342 | ||||
343 | /// A GVN expression describing how an instruction is used. The operands | |||
344 | /// field of BasicExpression is used to store uses, not operands. | |||
345 | /// | |||
346 | /// This class also contains fields for discriminators used when determining | |||
347 | /// equivalence of instructions with sideeffects. | |||
348 | class InstructionUseExpr : public GVNExpression::BasicExpression { | |||
349 | unsigned MemoryUseOrder = -1; | |||
350 | bool Volatile = false; | |||
351 | ArrayRef<int> ShuffleMask; | |||
352 | ||||
353 | public: | |||
354 | InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, | |||
355 | BumpPtrAllocator &A) | |||
356 | : GVNExpression::BasicExpression(I->getNumUses()) { | |||
357 | allocateOperands(R, A); | |||
358 | setOpcode(I->getOpcode()); | |||
359 | setType(I->getType()); | |||
360 | ||||
361 | if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) | |||
362 | ShuffleMask = SVI->getShuffleMask().copy(A); | |||
363 | ||||
364 | for (auto &U : I->uses()) | |||
365 | op_push_back(U.getUser()); | |||
366 | llvm::sort(op_begin(), op_end()); | |||
367 | } | |||
368 | ||||
369 | void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } | |||
370 | void setVolatile(bool V) { Volatile = V; } | |||
371 | ||||
372 | hash_code getHashValue() const override { | |||
373 | return hash_combine(GVNExpression::BasicExpression::getHashValue(), | |||
374 | MemoryUseOrder, Volatile, ShuffleMask); | |||
375 | } | |||
376 | ||||
377 | template <typename Function> hash_code getHashValue(Function MapFn) { | |||
378 | hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile, | |||
379 | ShuffleMask); | |||
380 | for (auto *V : operands()) | |||
| ||||
381 | H = hash_combine(H, MapFn(V)); | |||
382 | return H; | |||
383 | } | |||
384 | }; | |||
385 | ||||
386 | class ValueTable { | |||
387 | DenseMap<Value *, uint32_t> ValueNumbering; | |||
388 | DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; | |||
389 | DenseMap<size_t, uint32_t> HashNumbering; | |||
390 | BumpPtrAllocator Allocator; | |||
391 | ArrayRecycler<Value *> Recycler; | |||
392 | uint32_t nextValueNumber = 1; | |||
393 | ||||
394 | /// Create an expression for I based on its opcode and its uses. If I | |||
395 | /// touches or reads memory, the expression is also based upon its memory | |||
396 | /// order - see \c getMemoryUseOrder(). | |||
397 | InstructionUseExpr *createExpr(Instruction *I) { | |||
398 | InstructionUseExpr *E = | |||
399 | new (Allocator) InstructionUseExpr(I, Recycler, Allocator); | |||
400 | if (isMemoryInst(I)) | |||
401 | E->setMemoryUseOrder(getMemoryUseOrder(I)); | |||
402 | ||||
403 | if (CmpInst *C = dyn_cast<CmpInst>(I)) { | |||
404 | CmpInst::Predicate Predicate = C->getPredicate(); | |||
405 | E->setOpcode((C->getOpcode() << 8) | Predicate); | |||
406 | } | |||
407 | return E; | |||
408 | } | |||
409 | ||||
410 | /// Helper to compute the value number for a memory instruction | |||
411 | /// (LoadInst/StoreInst), including checking the memory ordering and | |||
412 | /// volatility. | |||
413 | template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { | |||
414 | if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) | |||
415 | return nullptr; | |||
416 | InstructionUseExpr *E = createExpr(I); | |||
417 | E->setVolatile(I->isVolatile()); | |||
418 | return E; | |||
419 | } | |||
420 | ||||
421 | public: | |||
422 | ValueTable() = default; | |||
423 | ||||
424 | /// Returns the value number for the specified value, assigning | |||
425 | /// it a new number if it did not have one before. | |||
426 | uint32_t lookupOrAdd(Value *V) { | |||
427 | auto VI = ValueNumbering.find(V); | |||
428 | if (VI != ValueNumbering.end()) | |||
429 | return VI->second; | |||
430 | ||||
431 | if (!isa<Instruction>(V)) { | |||
432 | ValueNumbering[V] = nextValueNumber; | |||
433 | return nextValueNumber++; | |||
434 | } | |||
435 | ||||
436 | Instruction *I = cast<Instruction>(V); | |||
437 | InstructionUseExpr *exp = nullptr; | |||
438 | switch (I->getOpcode()) { | |||
439 | case Instruction::Load: | |||
440 | exp = createMemoryExpr(cast<LoadInst>(I)); | |||
441 | break; | |||
442 | case Instruction::Store: | |||
443 | exp = createMemoryExpr(cast<StoreInst>(I)); | |||
444 | break; | |||
445 | case Instruction::Call: | |||
446 | case Instruction::Invoke: | |||
447 | case Instruction::FNeg: | |||
448 | case Instruction::Add: | |||
449 | case Instruction::FAdd: | |||
450 | case Instruction::Sub: | |||
451 | case Instruction::FSub: | |||
452 | case Instruction::Mul: | |||
453 | case Instruction::FMul: | |||
454 | case Instruction::UDiv: | |||
455 | case Instruction::SDiv: | |||
456 | case Instruction::FDiv: | |||
457 | case Instruction::URem: | |||
458 | case Instruction::SRem: | |||
459 | case Instruction::FRem: | |||
460 | case Instruction::Shl: | |||
461 | case Instruction::LShr: | |||
462 | case Instruction::AShr: | |||
463 | case Instruction::And: | |||
464 | case Instruction::Or: | |||
465 | case Instruction::Xor: | |||
466 | case Instruction::ICmp: | |||
467 | case Instruction::FCmp: | |||
468 | case Instruction::Trunc: | |||
469 | case Instruction::ZExt: | |||
470 | case Instruction::SExt: | |||
471 | case Instruction::FPToUI: | |||
472 | case Instruction::FPToSI: | |||
473 | case Instruction::UIToFP: | |||
474 | case Instruction::SIToFP: | |||
475 | case Instruction::FPTrunc: | |||
476 | case Instruction::FPExt: | |||
477 | case Instruction::PtrToInt: | |||
478 | case Instruction::IntToPtr: | |||
479 | case Instruction::BitCast: | |||
480 | case Instruction::AddrSpaceCast: | |||
481 | case Instruction::Select: | |||
482 | case Instruction::ExtractElement: | |||
483 | case Instruction::InsertElement: | |||
484 | case Instruction::ShuffleVector: | |||
485 | case Instruction::InsertValue: | |||
486 | case Instruction::GetElementPtr: | |||
487 | exp = createExpr(I); | |||
488 | break; | |||
489 | default: | |||
490 | break; | |||
491 | } | |||
492 | ||||
493 | if (!exp) { | |||
494 | ValueNumbering[V] = nextValueNumber; | |||
495 | return nextValueNumber++; | |||
496 | } | |||
497 | ||||
498 | uint32_t e = ExpressionNumbering[exp]; | |||
499 | if (!e) { | |||
500 | hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); | |||
501 | auto I = HashNumbering.find(H); | |||
502 | if (I != HashNumbering.end()) { | |||
503 | e = I->second; | |||
504 | } else { | |||
505 | e = nextValueNumber++; | |||
506 | HashNumbering[H] = e; | |||
507 | ExpressionNumbering[exp] = e; | |||
508 | } | |||
509 | } | |||
510 | ValueNumbering[V] = e; | |||
511 | return e; | |||
512 | } | |||
513 | ||||
514 | /// Returns the value number of the specified value. Fails if the value has | |||
515 | /// not yet been numbered. | |||
516 | uint32_t lookup(Value *V) const { | |||
517 | auto VI = ValueNumbering.find(V); | |||
518 | assert(VI != ValueNumbering.end() && "Value not numbered?")((void)0); | |||
519 | return VI->second; | |||
520 | } | |||
521 | ||||
522 | /// Removes all value numberings and resets the value table. | |||
523 | void clear() { | |||
524 | ValueNumbering.clear(); | |||
525 | ExpressionNumbering.clear(); | |||
526 | HashNumbering.clear(); | |||
527 | Recycler.clear(Allocator); | |||
528 | nextValueNumber = 1; | |||
529 | } | |||
530 | ||||
531 | /// \c Inst uses or touches memory. Return an ID describing the memory state | |||
532 | /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), | |||
533 | /// the exact same memory operations happen after I1 and I2. | |||
534 | /// | |||
535 | /// This is a very hard problem in general, so we use domain-specific | |||
536 | /// knowledge that we only ever check for equivalence between blocks sharing a | |||
537 | /// single immediate successor that is common, and when determining if I1 == | |||
538 | /// I2 we will have already determined that next(I1) == next(I2). This | |||
539 | /// inductive property allows us to simply return the value number of the next | |||
540 | /// instruction that defines memory. | |||
541 | uint32_t getMemoryUseOrder(Instruction *Inst) { | |||
542 | auto *BB = Inst->getParent(); | |||
543 | for (auto I = std::next(Inst->getIterator()), E = BB->end(); | |||
544 | I != E && !I->isTerminator(); ++I) { | |||
545 | if (!isMemoryInst(&*I)) | |||
546 | continue; | |||
547 | if (isa<LoadInst>(&*I)) | |||
548 | continue; | |||
549 | CallInst *CI = dyn_cast<CallInst>(&*I); | |||
550 | if (CI && CI->onlyReadsMemory()) | |||
551 | continue; | |||
552 | InvokeInst *II = dyn_cast<InvokeInst>(&*I); | |||
553 | if (II && II->onlyReadsMemory()) | |||
554 | continue; | |||
555 | return lookupOrAdd(&*I); | |||
556 | } | |||
557 | return 0; | |||
558 | } | |||
559 | }; | |||
560 | ||||
561 | //===----------------------------------------------------------------------===// | |||
562 | ||||
563 | class GVNSink { | |||
564 | public: | |||
565 | GVNSink() = default; | |||
566 | ||||
567 | bool run(Function &F) { | |||
568 | LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()do { } while (false) | |||
569 | << "\n")do { } while (false); | |||
570 | ||||
571 | unsigned NumSunk = 0; | |||
572 | ReversePostOrderTraversal<Function*> RPOT(&F); | |||
573 | for (auto *N : RPOT) | |||
574 | NumSunk += sinkBB(N); | |||
575 | ||||
576 | return NumSunk > 0; | |||
577 | } | |||
578 | ||||
579 | private: | |||
580 | ValueTable VN; | |||
581 | ||||
582 | bool shouldAvoidSinkingInstruction(Instruction *I) { | |||
583 | // These instructions may change or break semantics if moved. | |||
584 | if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || | |||
585 | I->getType()->isTokenTy()) | |||
586 | return true; | |||
587 | return false; | |||
588 | } | |||
589 | ||||
590 | /// The main heuristic function. Analyze the set of instructions pointed to by | |||
591 | /// LRI and return a candidate solution if these instructions can be sunk, or | |||
592 | /// None otherwise. | |||
593 | Optional<SinkingInstructionCandidate> analyzeInstructionForSinking( | |||
594 | LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, | |||
595 | ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents); | |||
596 | ||||
597 | /// Create a ModelledPHI for each PHI in BB, adding to PHIs. | |||
598 | void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, | |||
599 | SmallPtrSetImpl<Value *> &PHIContents) { | |||
600 | for (PHINode &PN : BB->phis()) { | |||
601 | auto MPHI = ModelledPHI(&PN); | |||
602 | PHIs.insert(MPHI); | |||
603 | for (auto *V : MPHI.getValues()) | |||
604 | PHIContents.insert(V); | |||
605 | } | |||
606 | } | |||
607 | ||||
608 | /// The main instruction sinking driver. Set up state and try and sink | |||
609 | /// instructions into BBEnd from its predecessors. | |||
610 | unsigned sinkBB(BasicBlock *BBEnd); | |||
611 | ||||
612 | /// Perform the actual mechanics of sinking an instruction from Blocks into | |||
613 | /// BBEnd, which is their only successor. | |||
614 | void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd); | |||
615 | ||||
616 | /// Remove PHIs that all have the same incoming value. | |||
617 | void foldPointlessPHINodes(BasicBlock *BB) { | |||
618 | auto I = BB->begin(); | |||
619 | while (PHINode *PN = dyn_cast<PHINode>(I++)) { | |||
620 | if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) { | |||
621 | return V == PN->getIncomingValue(0); | |||
622 | })) | |||
623 | continue; | |||
624 | if (PN->getIncomingValue(0) != PN) | |||
625 | PN->replaceAllUsesWith(PN->getIncomingValue(0)); | |||
626 | else | |||
627 | PN->replaceAllUsesWith(UndefValue::get(PN->getType())); | |||
628 | PN->eraseFromParent(); | |||
629 | } | |||
630 | } | |||
631 | }; | |||
632 | ||||
633 | Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( | |||
634 | LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, | |||
635 | ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) { | |||
636 | auto Insts = *LRI; | |||
637 | LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *Ido { } while (false) | |||
638 | : Insts) {do { } while (false) | |||
639 | I->dump();do { } while (false) | |||
640 | } dbgs() << " ]\n";)do { } while (false); | |||
641 | ||||
642 | DenseMap<uint32_t, unsigned> VNums; | |||
643 | for (auto *I : Insts) { | |||
644 | uint32_t N = VN.lookupOrAdd(I); | |||
645 | LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n")do { } while (false); | |||
646 | if (N == ~0U) | |||
647 | return None; | |||
648 | VNums[N]++; | |||
649 | } | |||
650 | unsigned VNumToSink = | |||
651 | std::max_element(VNums.begin(), VNums.end(), | |||
652 | [](const std::pair<uint32_t, unsigned> &I, | |||
653 | const std::pair<uint32_t, unsigned> &J) { | |||
654 | return I.second < J.second; | |||
655 | }) | |||
656 | ->first; | |||
657 | ||||
658 | if (VNums[VNumToSink] == 1) | |||
659 | // Can't sink anything! | |||
660 | return None; | |||
661 | ||||
662 | // Now restrict the number of incoming blocks down to only those with | |||
663 | // VNumToSink. | |||
664 | auto &ActivePreds = LRI.getActiveBlocks(); | |||
665 | unsigned InitialActivePredSize = ActivePreds.size(); | |||
666 | SmallVector<Instruction *, 4> NewInsts; | |||
667 | for (auto *I : Insts) { | |||
668 | if (VN.lookup(I) != VNumToSink) | |||
669 | ActivePreds.remove(I->getParent()); | |||
670 | else | |||
671 | NewInsts.push_back(I); | |||
672 | } | |||
673 | for (auto *I : NewInsts) | |||
674 | if (shouldAvoidSinkingInstruction(I)) | |||
675 | return None; | |||
676 | ||||
677 | // If we've restricted the incoming blocks, restrict all needed PHIs also | |||
678 | // to that set. | |||
679 | bool RecomputePHIContents = false; | |||
680 | if (ActivePreds.size() != InitialActivePredSize) { | |||
681 | ModelledPHISet NewNeededPHIs; | |||
682 | for (auto P : NeededPHIs) { | |||
683 | P.restrictToBlocks(ActivePreds); | |||
684 | NewNeededPHIs.insert(P); | |||
685 | } | |||
686 | NeededPHIs = NewNeededPHIs; | |||
687 | LRI.restrictToBlocks(ActivePreds); | |||
688 | RecomputePHIContents = true; | |||
689 | } | |||
690 | ||||
691 | // The sunk instruction's results. | |||
692 | ModelledPHI NewPHI(NewInsts, ActivePreds); | |||
693 | ||||
694 | // Does sinking this instruction render previous PHIs redundant? | |||
695 | if (NeededPHIs.erase(NewPHI)) | |||
696 | RecomputePHIContents = true; | |||
697 | ||||
698 | if (RecomputePHIContents) { | |||
699 | // The needed PHIs have changed, so recompute the set of all needed | |||
700 | // values. | |||
701 | PHIContents.clear(); | |||
702 | for (auto &PHI : NeededPHIs) | |||
703 | PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); | |||
704 | } | |||
705 | ||||
706 | // Is this instruction required by a later PHI that doesn't match this PHI? | |||
707 | // if so, we can't sink this instruction. | |||
708 | for (auto *V : NewPHI.getValues()) | |||
709 | if (PHIContents.count(V)) | |||
710 | // V exists in this PHI, but the whole PHI is different to NewPHI | |||
711 | // (else it would have been removed earlier). We cannot continue | |||
712 | // because this isn't representable. | |||
713 | return None; | |||
714 | ||||
715 | // Which operands need PHIs? | |||
716 | // FIXME: If any of these fail, we should partition up the candidates to | |||
717 | // try and continue making progress. | |||
718 | Instruction *I0 = NewInsts[0]; | |||
719 | ||||
720 | // If all instructions that are going to participate don't have the same | |||
721 | // number of operands, we can't do any useful PHI analysis for all operands. | |||
722 | auto hasDifferentNumOperands = [&I0](Instruction *I) { | |||
723 | return I->getNumOperands() != I0->getNumOperands(); | |||
724 | }; | |||
725 | if (any_of(NewInsts, hasDifferentNumOperands)) | |||
726 | return None; | |||
727 | ||||
728 | for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { | |||
729 | ModelledPHI PHI(NewInsts, OpNum, ActivePreds); | |||
730 | if (PHI.areAllIncomingValuesSame()) | |||
731 | continue; | |||
732 | if (!canReplaceOperandWithVariable(I0, OpNum)) | |||
733 | // We can 't create a PHI from this instruction! | |||
734 | return None; | |||
735 | if (NeededPHIs.count(PHI)) | |||
736 | continue; | |||
737 | if (!PHI.areAllIncomingValuesSameType()) | |||
738 | return None; | |||
739 | // Don't create indirect calls! The called value is the final operand. | |||
740 | if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 && | |||
741 | PHI.areAnyIncomingValuesConstant()) | |||
742 | return None; | |||
743 | ||||
744 | NeededPHIs.reserve(NeededPHIs.size()); | |||
745 | NeededPHIs.insert(PHI); | |||
746 | PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); | |||
747 | } | |||
748 | ||||
749 | if (isMemoryInst(NewInsts[0])) | |||
750 | ++MemoryInstNum; | |||
751 | ||||
752 | SinkingInstructionCandidate Cand; | |||
753 | Cand.NumInstructions = ++InstNum; | |||
754 | Cand.NumMemoryInsts = MemoryInstNum; | |||
755 | Cand.NumBlocks = ActivePreds.size(); | |||
756 | Cand.NumPHIs = NeededPHIs.size(); | |||
757 | append_range(Cand.Blocks, ActivePreds); | |||
758 | ||||
759 | return Cand; | |||
760 | } | |||
761 | ||||
762 | unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { | |||
763 | LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";do { } while (false) | |||
764 | BBEnd->printAsOperand(dbgs()); dbgs() << "\n")do { } while (false); | |||
765 | SmallVector<BasicBlock *, 4> Preds; | |||
766 | for (auto *B : predecessors(BBEnd)) { | |||
767 | auto *T = B->getTerminator(); | |||
768 | if (isa<BranchInst>(T) || isa<SwitchInst>(T)) | |||
769 | Preds.push_back(B); | |||
770 | else | |||
771 | return 0; | |||
772 | } | |||
773 | if (Preds.size() < 2) | |||
774 | return 0; | |||
775 | llvm::sort(Preds); | |||
776 | ||||
777 | unsigned NumOrigPreds = Preds.size(); | |||
778 | // We can only sink instructions through unconditional branches. | |||
779 | for (auto I = Preds.begin(); I != Preds.end();) { | |||
780 | if ((*I)->getTerminator()->getNumSuccessors() != 1) | |||
781 | I = Preds.erase(I); | |||
782 | else | |||
783 | ++I; | |||
784 | } | |||
785 | ||||
786 | LockstepReverseIterator LRI(Preds); | |||
787 | SmallVector<SinkingInstructionCandidate, 4> Candidates; | |||
788 | unsigned InstNum = 0, MemoryInstNum = 0; | |||
789 | ModelledPHISet NeededPHIs; | |||
790 | SmallPtrSet<Value *, 4> PHIContents; | |||
791 | analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); | |||
792 | unsigned NumOrigPHIs = NeededPHIs.size(); | |||
793 | ||||
794 | while (LRI.isValid()) { | |||
795 | auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, | |||
796 | NeededPHIs, PHIContents); | |||
797 | if (!Cand) | |||
798 | break; | |||
799 | Cand->calculateCost(NumOrigPHIs, Preds.size()); | |||
800 | Candidates.emplace_back(*Cand); | |||
801 | --LRI; | |||
802 | } | |||
803 | ||||
804 | llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>()); | |||
805 | LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &Cdo { } while (false) | |||
806 | : Candidates) dbgs()do { } while (false) | |||
807 | << " " << C << "\n";)do { } while (false); | |||
808 | ||||
809 | // Pick the top candidate, as long it is positive! | |||
810 | if (Candidates.empty() || Candidates.front().Cost <= 0) | |||
811 | return 0; | |||
812 | auto C = Candidates.front(); | |||
813 | ||||
814 | LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n")do { } while (false); | |||
815 | BasicBlock *InsertBB = BBEnd; | |||
816 | if (C.Blocks.size() < NumOrigPreds) { | |||
817 | LLVM_DEBUG(dbgs() << " -- Splitting edge to ";do { } while (false) | |||
818 | BBEnd->printAsOperand(dbgs()); dbgs() << "\n")do { } while (false); | |||
819 | InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); | |||
820 | if (!InsertBB) { | |||
821 | LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n")do { } while (false); | |||
822 | // Edge couldn't be split. | |||
823 | return 0; | |||
824 | } | |||
825 | } | |||
826 | ||||
827 | for (unsigned I = 0; I < C.NumInstructions; ++I) | |||
828 | sinkLastInstruction(C.Blocks, InsertBB); | |||
829 | ||||
830 | return C.NumInstructions; | |||
831 | } | |||
832 | ||||
833 | void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, | |||
834 | BasicBlock *BBEnd) { | |||
835 | SmallVector<Instruction *, 4> Insts; | |||
836 | for (BasicBlock *BB : Blocks) | |||
837 | Insts.push_back(BB->getTerminator()->getPrevNode()); | |||
838 | Instruction *I0 = Insts.front(); | |||
839 | ||||
840 | SmallVector<Value *, 4> NewOperands; | |||
841 | for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { | |||
842 | bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) { | |||
843 | return I->getOperand(O) != I0->getOperand(O); | |||
844 | }); | |||
845 | if (!NeedPHI) { | |||
846 | NewOperands.push_back(I0->getOperand(O)); | |||
847 | continue; | |||
848 | } | |||
849 | ||||
850 | // Create a new PHI in the successor block and populate it. | |||
851 | auto *Op = I0->getOperand(O); | |||
852 | assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!")((void)0); | |||
853 | auto *PN = PHINode::Create(Op->getType(), Insts.size(), | |||
854 | Op->getName() + ".sink", &BBEnd->front()); | |||
855 | for (auto *I : Insts) | |||
856 | PN->addIncoming(I->getOperand(O), I->getParent()); | |||
857 | NewOperands.push_back(PN); | |||
858 | } | |||
859 | ||||
860 | // Arbitrarily use I0 as the new "common" instruction; remap its operands | |||
861 | // and move it to the start of the successor block. | |||
862 | for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) | |||
863 | I0->getOperandUse(O).set(NewOperands[O]); | |||
864 | I0->moveBefore(&*BBEnd->getFirstInsertionPt()); | |||
865 | ||||
866 | // Update metadata and IR flags. | |||
867 | for (auto *I : Insts) | |||
868 | if (I != I0) { | |||
869 | combineMetadataForCSE(I0, I, true); | |||
870 | I0->andIRFlags(I); | |||
871 | } | |||
872 | ||||
873 | for (auto *I : Insts) | |||
874 | if (I != I0) | |||
875 | I->replaceAllUsesWith(I0); | |||
876 | foldPointlessPHINodes(BBEnd); | |||
877 | ||||
878 | // Finally nuke all instructions apart from the common instruction. | |||
879 | for (auto *I : Insts) | |||
880 | if (I != I0) | |||
881 | I->eraseFromParent(); | |||
882 | ||||
883 | NumRemoved += Insts.size() - 1; | |||
884 | } | |||
885 | ||||
886 | //////////////////////////////////////////////////////////////////////////////// | |||
887 | // Pass machinery / boilerplate | |||
888 | ||||
889 | class GVNSinkLegacyPass : public FunctionPass { | |||
890 | public: | |||
891 | static char ID; | |||
892 | ||||
893 | GVNSinkLegacyPass() : FunctionPass(ID) { | |||
894 | initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
895 | } | |||
896 | ||||
897 | bool runOnFunction(Function &F) override { | |||
898 | if (skipFunction(F)) | |||
899 | return false; | |||
900 | GVNSink G; | |||
901 | return G.run(F); | |||
902 | } | |||
903 | ||||
904 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
905 | AU.addPreserved<GlobalsAAWrapperPass>(); | |||
906 | } | |||
907 | }; | |||
908 | ||||
909 | } // end anonymous namespace | |||
910 | ||||
911 | PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { | |||
912 | GVNSink G; | |||
913 | if (!G.run(F)) | |||
914 | return PreservedAnalyses::all(); | |||
915 | return PreservedAnalyses::none(); | |||
916 | } | |||
917 | ||||
918 | char GVNSinkLegacyPass::ID = 0; | |||
919 | ||||
920 | INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",static void *initializeGVNSinkLegacyPassPassOnce(PassRegistry &Registry) { | |||
921 | "Early GVN sinking of Expressions", false, false)static void *initializeGVNSinkLegacyPassPassOnce(PassRegistry &Registry) { | |||
922 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
923 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)initializePostDominatorTreeWrapperPassPass(Registry); | |||
924 | INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",PassInfo *PI = new PassInfo( "Early GVN sinking of Expressions" , "gvn-sink", &GVNSinkLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<GVNSinkLegacyPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeGVNSinkLegacyPassPassFlag; void llvm::initializeGVNSinkLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeGVNSinkLegacyPassPassFlag , initializeGVNSinkLegacyPassPassOnce, std::ref(Registry)); } | |||
925 | "Early GVN sinking of Expressions", false, false)PassInfo *PI = new PassInfo( "Early GVN sinking of Expressions" , "gvn-sink", &GVNSinkLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<GVNSinkLegacyPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeGVNSinkLegacyPassPassFlag; void llvm::initializeGVNSinkLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeGVNSinkLegacyPassPassFlag , initializeGVNSinkLegacyPassPassOnce, std::ref(Registry)); } | |||
926 | ||||
927 | FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } |
1 | //===- Allocator.h - Simple memory allocation abstraction -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This file defines the BumpPtrAllocator interface. BumpPtrAllocator conforms |
11 | /// to the LLVM "Allocator" concept and is similar to MallocAllocator, but |
12 | /// objects cannot be deallocated. Their lifetime is tied to the lifetime of the |
13 | /// allocator. |
14 | /// |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #ifndef LLVM_SUPPORT_ALLOCATOR_H |
18 | #define LLVM_SUPPORT_ALLOCATOR_H |
19 | |
20 | #include "llvm/ADT/Optional.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/Support/Alignment.h" |
23 | #include "llvm/Support/AllocatorBase.h" |
24 | #include "llvm/Support/Compiler.h" |
25 | #include "llvm/Support/ErrorHandling.h" |
26 | #include "llvm/Support/MathExtras.h" |
27 | #include "llvm/Support/MemAlloc.h" |
28 | #include <algorithm> |
29 | #include <cassert> |
30 | #include <cstddef> |
31 | #include <cstdint> |
32 | #include <cstdlib> |
33 | #include <iterator> |
34 | #include <type_traits> |
35 | #include <utility> |
36 | |
37 | namespace llvm { |
38 | |
39 | namespace detail { |
40 | |
41 | // We call out to an external function to actually print the message as the |
42 | // printing code uses Allocator.h in its implementation. |
43 | void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated, |
44 | size_t TotalMemory); |
45 | |
46 | } // end namespace detail |
47 | |
48 | /// Allocate memory in an ever growing pool, as if by bump-pointer. |
49 | /// |
50 | /// This isn't strictly a bump-pointer allocator as it uses backing slabs of |
51 | /// memory rather than relying on a boundless contiguous heap. However, it has |
52 | /// bump-pointer semantics in that it is a monotonically growing pool of memory |
53 | /// where every allocation is found by merely allocating the next N bytes in |
54 | /// the slab, or the next N bytes in the next slab. |
55 | /// |
56 | /// Note that this also has a threshold for forcing allocations above a certain |
57 | /// size into their own slab. |
58 | /// |
59 | /// The BumpPtrAllocatorImpl template defaults to using a MallocAllocator |
60 | /// object, which wraps malloc, to allocate memory, but it can be changed to |
61 | /// use a custom allocator. |
62 | /// |
63 | /// The GrowthDelay specifies after how many allocated slabs the allocator |
64 | /// increases the size of the slabs. |
65 | template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096, |
66 | size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128> |
67 | class BumpPtrAllocatorImpl |
68 | : public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize, |
69 | SizeThreshold, GrowthDelay>>, |
70 | private AllocatorT { |
71 | public: |
72 | static_assert(SizeThreshold <= SlabSize, |
73 | "The SizeThreshold must be at most the SlabSize to ensure " |
74 | "that objects larger than a slab go into their own memory " |
75 | "allocation."); |
76 | static_assert(GrowthDelay > 0, |
77 | "GrowthDelay must be at least 1 which already increases the" |
78 | "slab size after each allocated slab."); |
79 | |
80 | BumpPtrAllocatorImpl() = default; |
81 | |
82 | template <typename T> |
83 | BumpPtrAllocatorImpl(T &&Allocator) |
84 | : AllocatorT(std::forward<T &&>(Allocator)) {} |
85 | |
86 | // Manually implement a move constructor as we must clear the old allocator's |
87 | // slabs as a matter of correctness. |
88 | BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) |
89 | : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr), |
90 | End(Old.End), Slabs(std::move(Old.Slabs)), |
91 | CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), |
92 | BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { |
93 | Old.CurPtr = Old.End = nullptr; |
94 | Old.BytesAllocated = 0; |
95 | Old.Slabs.clear(); |
96 | Old.CustomSizedSlabs.clear(); |
97 | } |
98 | |
99 | ~BumpPtrAllocatorImpl() { |
100 | DeallocateSlabs(Slabs.begin(), Slabs.end()); |
101 | DeallocateCustomSizedSlabs(); |
102 | } |
103 | |
104 | BumpPtrAllocatorImpl &operator=(BumpPtrAllocatorImpl &&RHS) { |
105 | DeallocateSlabs(Slabs.begin(), Slabs.end()); |
106 | DeallocateCustomSizedSlabs(); |
107 | |
108 | CurPtr = RHS.CurPtr; |
109 | End = RHS.End; |
110 | BytesAllocated = RHS.BytesAllocated; |
111 | RedZoneSize = RHS.RedZoneSize; |
112 | Slabs = std::move(RHS.Slabs); |
113 | CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); |
114 | AllocatorT::operator=(static_cast<AllocatorT &&>(RHS)); |
115 | |
116 | RHS.CurPtr = RHS.End = nullptr; |
117 | RHS.BytesAllocated = 0; |
118 | RHS.Slabs.clear(); |
119 | RHS.CustomSizedSlabs.clear(); |
120 | return *this; |
121 | } |
122 | |
123 | /// Deallocate all but the current slab and reset the current pointer |
124 | /// to the beginning of it, freeing all memory allocated so far. |
125 | void Reset() { |
126 | // Deallocate all but the first slab, and deallocate all custom-sized slabs. |
127 | DeallocateCustomSizedSlabs(); |
128 | CustomSizedSlabs.clear(); |
129 | |
130 | if (Slabs.empty()) |
131 | return; |
132 | |
133 | // Reset the state. |
134 | BytesAllocated = 0; |
135 | CurPtr = (char *)Slabs.front(); |
136 | End = CurPtr + SlabSize; |
137 | |
138 | __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0)); |
139 | DeallocateSlabs(std::next(Slabs.begin()), Slabs.end()); |
140 | Slabs.erase(std::next(Slabs.begin()), Slabs.end()); |
141 | } |
142 | |
143 | /// Allocate space at the specified alignment. |
144 | LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void * |
145 | Allocate(size_t Size, Align Alignment) { |
146 | // Keep track of how many bytes we've allocated. |
147 | BytesAllocated += Size; |
148 | |
149 | size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment); |
150 | assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow")((void)0); |
151 | |
152 | size_t SizeToAllocate = Size; |
153 | #if LLVM_ADDRESS_SANITIZER_BUILD0 |
154 | // Add trailing bytes as a "red zone" under ASan. |
155 | SizeToAllocate += RedZoneSize; |
156 | #endif |
157 | |
158 | // Check if we have enough space. |
159 | if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)) { |
160 | char *AlignedPtr = CurPtr + Adjustment; |
161 | CurPtr = AlignedPtr + SizeToAllocate; |
162 | // Update the allocation point of this memory block in MemorySanitizer. |
163 | // Without this, MemorySanitizer messages for values originated from here |
164 | // will point to the allocation of the entire slab. |
165 | __msan_allocated_memory(AlignedPtr, Size); |
166 | // Similarly, tell ASan about this space. |
167 | __asan_unpoison_memory_region(AlignedPtr, Size); |
168 | return AlignedPtr; |
169 | } |
170 | |
171 | // If Size is really big, allocate a separate slab for it. |
172 | size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; |
173 | if (PaddedSize > SizeThreshold) { |
174 | void *NewSlab = |
175 | AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t)); |
176 | // We own the new slab and don't want anyone reading anyting other than |
177 | // pieces returned from this method. So poison the whole slab. |
178 | __asan_poison_memory_region(NewSlab, PaddedSize); |
179 | CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize)); |
180 | |
181 | uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment); |
182 | assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize)((void)0); |
183 | char *AlignedPtr = (char*)AlignedAddr; |
184 | __msan_allocated_memory(AlignedPtr, Size); |
185 | __asan_unpoison_memory_region(AlignedPtr, Size); |
186 | return AlignedPtr; |
187 | } |
188 | |
189 | // Otherwise, start a new slab and try again. |
190 | StartNewSlab(); |
191 | uintptr_t AlignedAddr = alignAddr(CurPtr, Alignment); |
192 | assert(AlignedAddr + SizeToAllocate <= (uintptr_t)End &&((void)0) |
193 | "Unable to allocate memory!")((void)0); |
194 | char *AlignedPtr = (char*)AlignedAddr; |
195 | CurPtr = AlignedPtr + SizeToAllocate; |
196 | __msan_allocated_memory(AlignedPtr, Size); |
197 | __asan_unpoison_memory_region(AlignedPtr, Size); |
198 | return AlignedPtr; |
199 | } |
200 | |
201 | inline LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void * |
202 | Allocate(size_t Size, size_t Alignment) { |
203 | assert(Alignment > 0 && "0-byte alignment is not allowed. Use 1 instead.")((void)0); |
204 | return Allocate(Size, Align(Alignment)); |
205 | } |
206 | |
207 | // Pull in base class overloads. |
208 | using AllocatorBase<BumpPtrAllocatorImpl>::Allocate; |
209 | |
210 | // Bump pointer allocators are expected to never free their storage; and |
211 | // clients expect pointers to remain valid for non-dereferencing uses even |
212 | // after deallocation. |
213 | void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { |
214 | __asan_poison_memory_region(Ptr, Size); |
215 | } |
216 | |
217 | // Pull in base class overloads. |
218 | using AllocatorBase<BumpPtrAllocatorImpl>::Deallocate; |
219 | |
220 | size_t GetNumSlabs() const { return Slabs.size() + CustomSizedSlabs.size(); } |
221 | |
222 | /// \return An index uniquely and reproducibly identifying |
223 | /// an input pointer \p Ptr in the given allocator. |
224 | /// The returned value is negative iff the object is inside a custom-size |
225 | /// slab. |
226 | /// Returns an empty optional if the pointer is not found in the allocator. |
227 | llvm::Optional<int64_t> identifyObject(const void *Ptr) { |
228 | const char *P = static_cast<const char *>(Ptr); |
229 | int64_t InSlabIdx = 0; |
230 | for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) { |
231 | const char *S = static_cast<const char *>(Slabs[Idx]); |
232 | if (P >= S && P < S + computeSlabSize(Idx)) |
233 | return InSlabIdx + static_cast<int64_t>(P - S); |
234 | InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx)); |
235 | } |
236 | |
237 | // Use negative index to denote custom sized slabs. |
238 | int64_t InCustomSizedSlabIdx = -1; |
239 | for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) { |
240 | const char *S = static_cast<const char *>(CustomSizedSlabs[Idx].first); |
241 | size_t Size = CustomSizedSlabs[Idx].second; |
242 | if (P >= S && P < S + Size) |
243 | return InCustomSizedSlabIdx - static_cast<int64_t>(P - S); |
244 | InCustomSizedSlabIdx -= static_cast<int64_t>(Size); |
245 | } |
246 | return None; |
247 | } |
248 | |
249 | /// A wrapper around identifyObject that additionally asserts that |
250 | /// the object is indeed within the allocator. |
251 | /// \return An index uniquely and reproducibly identifying |
252 | /// an input pointer \p Ptr in the given allocator. |
253 | int64_t identifyKnownObject(const void *Ptr) { |
254 | Optional<int64_t> Out = identifyObject(Ptr); |
255 | assert(Out && "Wrong allocator used")((void)0); |
256 | return *Out; |
257 | } |
258 | |
259 | /// A wrapper around identifyKnownObject. Accepts type information |
260 | /// about the object and produces a smaller identifier by relying on |
261 | /// the alignment information. Note that sub-classes may have different |
262 | /// alignment, so the most base class should be passed as template parameter |
263 | /// in order to obtain correct results. For that reason automatic template |
264 | /// parameter deduction is disabled. |
265 | /// \return An index uniquely and reproducibly identifying |
266 | /// an input pointer \p Ptr in the given allocator. This identifier is |
267 | /// different from the ones produced by identifyObject and |
268 | /// identifyAlignedObject. |
269 | template <typename T> |
270 | int64_t identifyKnownAlignedObject(const void *Ptr) { |
271 | int64_t Out = identifyKnownObject(Ptr); |
272 | assert(Out % alignof(T) == 0 && "Wrong alignment information")((void)0); |
273 | return Out / alignof(T); |
274 | } |
275 | |
276 | size_t getTotalMemory() const { |
277 | size_t TotalMemory = 0; |
278 | for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I) |
279 | TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I)); |
280 | for (auto &PtrAndSize : CustomSizedSlabs) |
281 | TotalMemory += PtrAndSize.second; |
282 | return TotalMemory; |
283 | } |
284 | |
285 | size_t getBytesAllocated() const { return BytesAllocated; } |
286 | |
287 | void setRedZoneSize(size_t NewSize) { |
288 | RedZoneSize = NewSize; |
289 | } |
290 | |
291 | void PrintStats() const { |
292 | detail::printBumpPtrAllocatorStats(Slabs.size(), BytesAllocated, |
293 | getTotalMemory()); |
294 | } |
295 | |
296 | private: |
297 | /// The current pointer into the current slab. |
298 | /// |
299 | /// This points to the next free byte in the slab. |
300 | char *CurPtr = nullptr; |
301 | |
302 | /// The end of the current slab. |
303 | char *End = nullptr; |
304 | |
305 | /// The slabs allocated so far. |
306 | SmallVector<void *, 4> Slabs; |
307 | |
308 | /// Custom-sized slabs allocated for too-large allocation requests. |
309 | SmallVector<std::pair<void *, size_t>, 0> CustomSizedSlabs; |
310 | |
311 | /// How many bytes we've allocated. |
312 | /// |
313 | /// Used so that we can compute how much space was wasted. |
314 | size_t BytesAllocated = 0; |
315 | |
316 | /// The number of bytes to put between allocations when running under |
317 | /// a sanitizer. |
318 | size_t RedZoneSize = 1; |
319 | |
320 | static size_t computeSlabSize(unsigned SlabIdx) { |
321 | // Scale the actual allocated slab size based on the number of slabs |
322 | // allocated. Every GrowthDelay slabs allocated, we double |
323 | // the allocated size to reduce allocation frequency, but saturate at |
324 | // multiplying the slab size by 2^30. |
325 | return SlabSize * |
326 | ((size_t)1 << std::min<size_t>(30, SlabIdx / GrowthDelay)); |
327 | } |
328 | |
329 | /// Allocate a new slab and move the bump pointers over into the new |
330 | /// slab, modifying CurPtr and End. |
331 | void StartNewSlab() { |
332 | size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); |
333 | |
334 | void *NewSlab = |
335 | AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t)); |
336 | // We own the new slab and don't want anyone reading anything other than |
337 | // pieces returned from this method. So poison the whole slab. |
338 | __asan_poison_memory_region(NewSlab, AllocatedSlabSize); |
339 | |
340 | Slabs.push_back(NewSlab); |
341 | CurPtr = (char *)(NewSlab); |
342 | End = ((char *)NewSlab) + AllocatedSlabSize; |
343 | } |
344 | |
345 | /// Deallocate a sequence of slabs. |
346 | void DeallocateSlabs(SmallVectorImpl<void *>::iterator I, |
347 | SmallVectorImpl<void *>::iterator E) { |
348 | for (; I != E; ++I) { |
349 | size_t AllocatedSlabSize = |
350 | computeSlabSize(std::distance(Slabs.begin(), I)); |
351 | AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); |
352 | } |
353 | } |
354 | |
355 | /// Deallocate all memory for custom sized slabs. |
356 | void DeallocateCustomSizedSlabs() { |
357 | for (auto &PtrAndSize : CustomSizedSlabs) { |
358 | void *Ptr = PtrAndSize.first; |
359 | size_t Size = PtrAndSize.second; |
360 | AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t)); |
361 | } |
362 | } |
363 | |
364 | template <typename T> friend class SpecificBumpPtrAllocator; |
365 | }; |
366 | |
367 | /// The standard BumpPtrAllocator which just uses the default template |
368 | /// parameters. |
369 | typedef BumpPtrAllocatorImpl<> BumpPtrAllocator; |
370 | |
371 | /// A BumpPtrAllocator that allows only elements of a specific type to be |
372 | /// allocated. |
373 | /// |
374 | /// This allows calling the destructor in DestroyAll() and when the allocator is |
375 | /// destroyed. |
376 | template <typename T> class SpecificBumpPtrAllocator { |
377 | BumpPtrAllocator Allocator; |
378 | |
379 | public: |
380 | SpecificBumpPtrAllocator() { |
381 | // Because SpecificBumpPtrAllocator walks the memory to call destructors, |
382 | // it can't have red zones between allocations. |
383 | Allocator.setRedZoneSize(0); |
384 | } |
385 | SpecificBumpPtrAllocator(SpecificBumpPtrAllocator &&Old) |
386 | : Allocator(std::move(Old.Allocator)) {} |
387 | ~SpecificBumpPtrAllocator() { DestroyAll(); } |
388 | |
389 | SpecificBumpPtrAllocator &operator=(SpecificBumpPtrAllocator &&RHS) { |
390 | Allocator = std::move(RHS.Allocator); |
391 | return *this; |
392 | } |
393 | |
394 | /// Call the destructor of each allocated object and deallocate all but the |
395 | /// current slab and reset the current pointer to the beginning of it, freeing |
396 | /// all memory allocated so far. |
397 | void DestroyAll() { |
398 | auto DestroyElements = [](char *Begin, char *End) { |
399 | assert(Begin == (char *)alignAddr(Begin, Align::Of<T>()))((void)0); |
400 | for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T)) |
401 | reinterpret_cast<T *>(Ptr)->~T(); |
402 | }; |
403 | |
404 | for (auto I = Allocator.Slabs.begin(), E = Allocator.Slabs.end(); I != E; |
405 | ++I) { |
406 | size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( |
407 | std::distance(Allocator.Slabs.begin(), I)); |
408 | char *Begin = (char *)alignAddr(*I, Align::Of<T>()); |
409 | char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr |
410 | : (char *)*I + AllocatedSlabSize; |
411 | |
412 | DestroyElements(Begin, End); |
413 | } |
414 | |
415 | for (auto &PtrAndSize : Allocator.CustomSizedSlabs) { |
416 | void *Ptr = PtrAndSize.first; |
417 | size_t Size = PtrAndSize.second; |
418 | DestroyElements((char *)alignAddr(Ptr, Align::Of<T>()), |
419 | (char *)Ptr + Size); |
420 | } |
421 | |
422 | Allocator.Reset(); |
423 | } |
424 | |
425 | /// Allocate space for an array of objects without constructing them. |
426 | T *Allocate(size_t num = 1) { return Allocator.Allocate<T>(num); } |
427 | }; |
428 | |
429 | } // end namespace llvm |
430 | |
431 | template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold, |
432 | size_t GrowthDelay> |
433 | void * |
434 | operator new(size_t Size, |
435 | llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, SizeThreshold, |
436 | GrowthDelay> &Allocator) { |
437 | return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size), |
438 | alignof(std::max_align_t))); |
439 | } |
440 | |
441 | template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold, |
442 | size_t GrowthDelay> |
443 | void operator delete(void *, |
444 | llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, |
445 | SizeThreshold, GrowthDelay> &) { |
446 | } |
447 | |
448 | #endif // LLVM_SUPPORT_ALLOCATOR_H |
1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains types to represent alignments. | |||
10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
11 | // invalid manipulations. | |||
12 | // | |||
13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
15 | // | |||
16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
17 | // you can get the underlying Align type by using the getValue() method. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
23 | ||||
24 | #include "llvm/ADT/Optional.h" | |||
25 | #include "llvm/Support/MathExtras.h" | |||
26 | #include <cassert> | |||
27 | #ifndef NDEBUG1 | |||
28 | #include <string> | |||
29 | #endif // NDEBUG | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
35 | ||||
36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
37 | /// alignment. | |||
38 | /// It is suitable for use as static global constants. | |||
39 | struct Align { | |||
40 | private: | |||
41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
42 | /// ShiftValue is less than 64 by construction. | |||
43 | ||||
44 | friend struct MaybeAlign; | |||
45 | friend unsigned Log2(Align); | |||
46 | friend bool operator==(Align Lhs, Align Rhs); | |||
47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
50 | friend bool operator<(Align Lhs, Align Rhs); | |||
51 | friend bool operator>(Align Lhs, Align Rhs); | |||
52 | friend unsigned encode(struct MaybeAlign A); | |||
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
54 | ||||
55 | /// A trivial type to allow construction of constexpr Align. | |||
56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
57 | /// definition of constexpr assign operators. | |||
58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
62 | struct LogValue { | |||
63 | uint8_t Log; | |||
64 | }; | |||
65 | ||||
66 | public: | |||
67 | /// Default is byte-aligned. | |||
68 | constexpr Align() = default; | |||
69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
70 | /// checks have been performed when building `Other`. | |||
71 | constexpr Align(const Align &Other) = default; | |||
72 | constexpr Align(Align &&Other) = default; | |||
73 | Align &operator=(const Align &Other) = default; | |||
74 | Align &operator=(Align &&Other) = default; | |||
75 | ||||
76 | explicit Align(uint64_t Value) { | |||
77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
79 | ShiftValue = Log2_64(Value); | |||
80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
81 | } | |||
82 | ||||
83 | /// This is a hole in the type system and should not be abused. | |||
84 | /// Needed to interact with C for instance. | |||
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
86 | ||||
87 | /// Allow constructions of constexpr Align. | |||
88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
90 | } | |||
91 | ||||
92 | /// Allow constructions of constexpr Align from types. | |||
93 | /// Compile time equivalent to Align(alignof(T)). | |||
94 | template <typename T> constexpr static LogValue Of() { | |||
95 | return Constant<std::alignment_of<T>::value>(); | |||
96 | } | |||
97 | ||||
98 | /// Constexpr constructor from LogValue type. | |||
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
100 | }; | |||
101 | ||||
102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
103 | inline Align assumeAligned(uint64_t Value) { | |||
104 | return Value ? Align(Value) : Align(); | |||
105 | } | |||
106 | ||||
107 | /// This struct is a compact representation of a valid (power of two) or | |||
108 | /// undefined (0) alignment. | |||
109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
110 | private: | |||
111 | using UP = llvm::Optional<Align>; | |||
112 | ||||
113 | public: | |||
114 | /// Default is undefined. | |||
115 | MaybeAlign() = default; | |||
116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
117 | /// checks have been performed when building `Other`. | |||
118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
122 | ||||
123 | /// Use llvm::Optional<Align> constructor. | |||
124 | using UP::UP; | |||
125 | ||||
126 | explicit MaybeAlign(uint64_t Value) { | |||
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
129 | if (Value) | |||
130 | emplace(Value); | |||
131 | } | |||
132 | ||||
133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
135 | }; | |||
136 | ||||
137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
139 | return SizeInBytes % Lhs.value() == 0; | |||
140 | } | |||
141 | ||||
142 | /// Checks that Addr is a multiple of the alignment. | |||
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
145 | } | |||
146 | ||||
147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
149 | const uint64_t Value = A.value(); | |||
150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
151 | ||||
152 | // The division followed by a multiplication can be thought of as a right | |||
153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
155 | // are just zero. | |||
156 | ||||
157 | // Most compilers can generate this code but the pattern may be missed when | |||
158 | // multiple functions gets inlined. | |||
159 | return (Size + Value - 1) & ~(Value - 1U); | |||
160 | } | |||
161 | ||||
162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
165 | /// Skew mod \p A'. | |||
166 | /// | |||
167 | /// Examples: | |||
168 | /// \code | |||
169 | /// alignTo(5, Align(8), 7) = 7 | |||
170 | /// alignTo(17, Align(8), 1) = 17 | |||
171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
172 | /// \endcode | |||
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
174 | const uint64_t Value = A.value(); | |||
175 | Skew %= Value; | |||
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
177 | } | |||
178 | ||||
179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
180 | /// Returns `Size` if current alignment is undefined. | |||
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
183 | } | |||
184 | ||||
185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
189 | ArithAddr &&((void)0) | |||
190 | "Overflow")((void)0); | |||
191 | return alignTo(ArithAddr, Alignment); | |||
192 | } | |||
193 | ||||
194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
195 | /// or equal to \p Value and is a multiple of \p Align. | |||
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
197 | return alignTo(Value, Alignment) - Value; | |||
198 | } | |||
199 | ||||
200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
201 | /// bytes, rounding up. | |||
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
204 | } | |||
205 | ||||
206 | /// Returns the log2 of the alignment. | |||
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
208 | ||||
209 | /// Returns the alignment that satisfies both alignments. | |||
210 | /// Same semantic as MinAlign. | |||
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
212 | ||||
213 | /// Returns the alignment that satisfies both alignments. | |||
214 | /// Same semantic as MinAlign. | |||
215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
216 | return Align(MinAlign(A.value(), Offset)); | |||
217 | } | |||
218 | ||||
219 | /// Returns the alignment that satisfies both alignments. | |||
220 | /// Same semantic as MinAlign. | |||
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
223 | } | |||
224 | ||||
225 | /// Returns the alignment that satisfies both alignments. | |||
226 | /// Same semantic as MinAlign. | |||
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
229 | } | |||
230 | ||||
231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
233 | ||||
234 | /// Dual operation of the encode function above. | |||
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
236 | if (Value == 0) | |||
237 | return MaybeAlign(); | |||
238 | Align Out; | |||
239 | Out.ShiftValue = Value - 1; | |||
240 | return Out; | |||
241 | } | |||
242 | ||||
243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
244 | /// definition. | |||
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
246 | ||||
247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
250 | return Lhs.value() == Rhs; | |||
251 | } | |||
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
254 | return Lhs.value() != Rhs; | |||
255 | } | |||
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
258 | return Lhs.value() <= Rhs; | |||
259 | } | |||
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
262 | return Lhs.value() >= Rhs; | |||
263 | } | |||
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
266 | return Lhs.value() < Rhs; | |||
267 | } | |||
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
270 | return Lhs.value() > Rhs; | |||
271 | } | |||
272 | ||||
273 | /// Comparisons between MaybeAlign and scalars. | |||
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
276 | } | |||
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
279 | } | |||
280 | ||||
281 | /// Comparisons operators between Align. | |||
282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
284 | } | |||
285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
287 | } | |||
288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
290 | } | |||
291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
293 | } | |||
294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
296 | } | |||
297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
299 | } | |||
300 | ||||
301 | // Don't allow relational comparisons with MaybeAlign. | |||
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
306 | ||||
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
311 | ||||
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
316 | ||||
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
319 | return Align(Lhs.value() * Rhs); | |||
320 | } | |||
321 | ||||
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
325 | } | |||
326 | ||||
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
329 | "Divisor must be positive and a power of 2")((void)0); | |||
330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
331 | return Align(Lhs.value() / Divisor); | |||
332 | } | |||
333 | ||||
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
336 | "Divisor must be positive and a power of 2")((void)0); | |||
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
338 | } | |||
339 | ||||
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
342 | } | |||
343 | ||||
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
346 | } | |||
347 | ||||
348 | #ifndef NDEBUG1 | |||
349 | // For usage in LLVM_DEBUG macros. | |||
350 | inline std::string DebugStr(const Align &A) { | |||
351 | return std::to_string(A.value()); | |||
352 | } | |||
353 | // For usage in LLVM_DEBUG macros. | |||
354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
355 | if (MA) | |||
356 | return std::to_string(MA->value()); | |||
357 | return "None"; | |||
358 | } | |||
359 | #endif // NDEBUG | |||
360 | ||||
361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
362 | ||||
363 | } // namespace llvm | |||
364 | ||||
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |