File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// This transformation implements the well known scalar replacement of | |||
10 | /// aggregates transformation. It tries to identify promotable elements of an | |||
11 | /// aggregate alloca, and promote them to registers. It will also try to | |||
12 | /// convert uses of an element (or set of elements) of an alloca into a vector | |||
13 | /// or bitfield-style integer scalar if appropriate. | |||
14 | /// | |||
15 | /// It works to do this with minimal slicing of the alloca so that regions | |||
16 | /// which are merely transferred in and out of external memory remain unchanged | |||
17 | /// and are not decomposed to scalar code. | |||
18 | /// | |||
19 | /// Because this also performs alloca promotion, it can be thought of as also | |||
20 | /// serving the purpose of SSA formation. The algorithm iterates on the | |||
21 | /// function until all opportunities for promotion have been realized. | |||
22 | /// | |||
23 | //===----------------------------------------------------------------------===// | |||
24 | ||||
25 | #include "llvm/Transforms/Scalar/SROA.h" | |||
26 | #include "llvm/ADT/APInt.h" | |||
27 | #include "llvm/ADT/ArrayRef.h" | |||
28 | #include "llvm/ADT/DenseMap.h" | |||
29 | #include "llvm/ADT/PointerIntPair.h" | |||
30 | #include "llvm/ADT/STLExtras.h" | |||
31 | #include "llvm/ADT/SetVector.h" | |||
32 | #include "llvm/ADT/SmallBitVector.h" | |||
33 | #include "llvm/ADT/SmallPtrSet.h" | |||
34 | #include "llvm/ADT/SmallVector.h" | |||
35 | #include "llvm/ADT/Statistic.h" | |||
36 | #include "llvm/ADT/StringRef.h" | |||
37 | #include "llvm/ADT/Twine.h" | |||
38 | #include "llvm/ADT/iterator.h" | |||
39 | #include "llvm/ADT/iterator_range.h" | |||
40 | #include "llvm/Analysis/AssumptionCache.h" | |||
41 | #include "llvm/Analysis/GlobalsModRef.h" | |||
42 | #include "llvm/Analysis/Loads.h" | |||
43 | #include "llvm/Analysis/PtrUseVisitor.h" | |||
44 | #include "llvm/Config/llvm-config.h" | |||
45 | #include "llvm/IR/BasicBlock.h" | |||
46 | #include "llvm/IR/Constant.h" | |||
47 | #include "llvm/IR/ConstantFolder.h" | |||
48 | #include "llvm/IR/Constants.h" | |||
49 | #include "llvm/IR/DIBuilder.h" | |||
50 | #include "llvm/IR/DataLayout.h" | |||
51 | #include "llvm/IR/DebugInfoMetadata.h" | |||
52 | #include "llvm/IR/DerivedTypes.h" | |||
53 | #include "llvm/IR/Dominators.h" | |||
54 | #include "llvm/IR/Function.h" | |||
55 | #include "llvm/IR/GetElementPtrTypeIterator.h" | |||
56 | #include "llvm/IR/GlobalAlias.h" | |||
57 | #include "llvm/IR/IRBuilder.h" | |||
58 | #include "llvm/IR/InstVisitor.h" | |||
59 | #include "llvm/IR/InstrTypes.h" | |||
60 | #include "llvm/IR/Instruction.h" | |||
61 | #include "llvm/IR/Instructions.h" | |||
62 | #include "llvm/IR/IntrinsicInst.h" | |||
63 | #include "llvm/IR/Intrinsics.h" | |||
64 | #include "llvm/IR/LLVMContext.h" | |||
65 | #include "llvm/IR/Metadata.h" | |||
66 | #include "llvm/IR/Module.h" | |||
67 | #include "llvm/IR/Operator.h" | |||
68 | #include "llvm/IR/PassManager.h" | |||
69 | #include "llvm/IR/Type.h" | |||
70 | #include "llvm/IR/Use.h" | |||
71 | #include "llvm/IR/User.h" | |||
72 | #include "llvm/IR/Value.h" | |||
73 | #include "llvm/InitializePasses.h" | |||
74 | #include "llvm/Pass.h" | |||
75 | #include "llvm/Support/Casting.h" | |||
76 | #include "llvm/Support/CommandLine.h" | |||
77 | #include "llvm/Support/Compiler.h" | |||
78 | #include "llvm/Support/Debug.h" | |||
79 | #include "llvm/Support/ErrorHandling.h" | |||
80 | #include "llvm/Support/MathExtras.h" | |||
81 | #include "llvm/Support/raw_ostream.h" | |||
82 | #include "llvm/Transforms/Scalar.h" | |||
83 | #include "llvm/Transforms/Utils/Local.h" | |||
84 | #include "llvm/Transforms/Utils/PromoteMemToReg.h" | |||
85 | #include <algorithm> | |||
86 | #include <cassert> | |||
87 | #include <chrono> | |||
88 | #include <cstddef> | |||
89 | #include <cstdint> | |||
90 | #include <cstring> | |||
91 | #include <iterator> | |||
92 | #include <string> | |||
93 | #include <tuple> | |||
94 | #include <utility> | |||
95 | #include <vector> | |||
96 | ||||
97 | using namespace llvm; | |||
98 | using namespace llvm::sroa; | |||
99 | ||||
100 | #define DEBUG_TYPE"sroa" "sroa" | |||
101 | ||||
102 | STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement")static llvm::Statistic NumAllocasAnalyzed = {"sroa", "NumAllocasAnalyzed" , "Number of allocas analyzed for replacement"}; | |||
103 | STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed")static llvm::Statistic NumAllocaPartitions = {"sroa", "NumAllocaPartitions" , "Number of alloca partitions formed"}; | |||
104 | STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca")static llvm::Statistic MaxPartitionsPerAlloca = {"sroa", "MaxPartitionsPerAlloca" , "Maximum number of partitions per alloca"}; | |||
105 | STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten")static llvm::Statistic NumAllocaPartitionUses = {"sroa", "NumAllocaPartitionUses" , "Number of alloca partition uses rewritten"}; | |||
106 | STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition")static llvm::Statistic MaxUsesPerAllocaPartition = {"sroa", "MaxUsesPerAllocaPartition" , "Maximum number of uses of a partition"}; | |||
107 | STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced")static llvm::Statistic NumNewAllocas = {"sroa", "NumNewAllocas" , "Number of new, smaller allocas introduced"}; | |||
108 | STATISTIC(NumPromoted, "Number of allocas promoted to SSA values")static llvm::Statistic NumPromoted = {"sroa", "NumPromoted", "Number of allocas promoted to SSA values" }; | |||
109 | STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion")static llvm::Statistic NumLoadsSpeculated = {"sroa", "NumLoadsSpeculated" , "Number of loads speculated to allow promotion"}; | |||
110 | STATISTIC(NumDeleted, "Number of instructions deleted")static llvm::Statistic NumDeleted = {"sroa", "NumDeleted", "Number of instructions deleted" }; | |||
111 | STATISTIC(NumVectorized, "Number of vectorized aggregates")static llvm::Statistic NumVectorized = {"sroa", "NumVectorized" , "Number of vectorized aggregates"}; | |||
112 | ||||
113 | /// Hidden option to experiment with completely strict handling of inbounds | |||
114 | /// GEPs. | |||
115 | static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), | |||
116 | cl::Hidden); | |||
117 | ||||
118 | namespace { | |||
119 | ||||
120 | /// A custom IRBuilder inserter which prefixes all names, but only in | |||
121 | /// Assert builds. | |||
122 | class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter { | |||
123 | std::string Prefix; | |||
124 | ||||
125 | const Twine getNameWithPrefix(const Twine &Name) const { | |||
126 | return Name.isTriviallyEmpty() ? Name : Prefix + Name; | |||
127 | } | |||
128 | ||||
129 | public: | |||
130 | void SetNamePrefix(const Twine &P) { Prefix = P.str(); } | |||
131 | ||||
132 | void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, | |||
133 | BasicBlock::iterator InsertPt) const override { | |||
134 | IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB, | |||
135 | InsertPt); | |||
136 | } | |||
137 | }; | |||
138 | ||||
139 | /// Provide a type for IRBuilder that drops names in release builds. | |||
140 | using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>; | |||
141 | ||||
142 | /// A used slice of an alloca. | |||
143 | /// | |||
144 | /// This structure represents a slice of an alloca used by some instruction. It | |||
145 | /// stores both the begin and end offsets of this use, a pointer to the use | |||
146 | /// itself, and a flag indicating whether we can classify the use as splittable | |||
147 | /// or not when forming partitions of the alloca. | |||
148 | class Slice { | |||
149 | /// The beginning offset of the range. | |||
150 | uint64_t BeginOffset = 0; | |||
151 | ||||
152 | /// The ending offset, not included in the range. | |||
153 | uint64_t EndOffset = 0; | |||
154 | ||||
155 | /// Storage for both the use of this slice and whether it can be | |||
156 | /// split. | |||
157 | PointerIntPair<Use *, 1, bool> UseAndIsSplittable; | |||
158 | ||||
159 | public: | |||
160 | Slice() = default; | |||
161 | ||||
162 | Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) | |||
163 | : BeginOffset(BeginOffset), EndOffset(EndOffset), | |||
164 | UseAndIsSplittable(U, IsSplittable) {} | |||
165 | ||||
166 | uint64_t beginOffset() const { return BeginOffset; } | |||
167 | uint64_t endOffset() const { return EndOffset; } | |||
168 | ||||
169 | bool isSplittable() const { return UseAndIsSplittable.getInt(); } | |||
170 | void makeUnsplittable() { UseAndIsSplittable.setInt(false); } | |||
171 | ||||
172 | Use *getUse() const { return UseAndIsSplittable.getPointer(); } | |||
173 | ||||
174 | bool isDead() const { return getUse() == nullptr; } | |||
175 | void kill() { UseAndIsSplittable.setPointer(nullptr); } | |||
176 | ||||
177 | /// Support for ordering ranges. | |||
178 | /// | |||
179 | /// This provides an ordering over ranges such that start offsets are | |||
180 | /// always increasing, and within equal start offsets, the end offsets are | |||
181 | /// decreasing. Thus the spanning range comes first in a cluster with the | |||
182 | /// same start position. | |||
183 | bool operator<(const Slice &RHS) const { | |||
184 | if (beginOffset() < RHS.beginOffset()) | |||
185 | return true; | |||
186 | if (beginOffset() > RHS.beginOffset()) | |||
187 | return false; | |||
188 | if (isSplittable() != RHS.isSplittable()) | |||
189 | return !isSplittable(); | |||
190 | if (endOffset() > RHS.endOffset()) | |||
191 | return true; | |||
192 | return false; | |||
193 | } | |||
194 | ||||
195 | /// Support comparison with a single offset to allow binary searches. | |||
196 | friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(const Slice &LHS, | |||
197 | uint64_t RHSOffset) { | |||
198 | return LHS.beginOffset() < RHSOffset; | |||
199 | } | |||
200 | friend LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) bool operator<(uint64_t LHSOffset, | |||
201 | const Slice &RHS) { | |||
202 | return LHSOffset < RHS.beginOffset(); | |||
203 | } | |||
204 | ||||
205 | bool operator==(const Slice &RHS) const { | |||
206 | return isSplittable() == RHS.isSplittable() && | |||
207 | beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset(); | |||
208 | } | |||
209 | bool operator!=(const Slice &RHS) const { return !operator==(RHS); } | |||
210 | }; | |||
211 | ||||
212 | } // end anonymous namespace | |||
213 | ||||
214 | /// Representation of the alloca slices. | |||
215 | /// | |||
216 | /// This class represents the slices of an alloca which are formed by its | |||
217 | /// various uses. If a pointer escapes, we can't fully build a representation | |||
218 | /// for the slices used and we reflect that in this structure. The uses are | |||
219 | /// stored, sorted by increasing beginning offset and with unsplittable slices | |||
220 | /// starting at a particular offset before splittable slices. | |||
221 | class llvm::sroa::AllocaSlices { | |||
222 | public: | |||
223 | /// Construct the slices of a particular alloca. | |||
224 | AllocaSlices(const DataLayout &DL, AllocaInst &AI); | |||
225 | ||||
226 | /// Test whether a pointer to the allocation escapes our analysis. | |||
227 | /// | |||
228 | /// If this is true, the slices are never fully built and should be | |||
229 | /// ignored. | |||
230 | bool isEscaped() const { return PointerEscapingInstr; } | |||
231 | ||||
232 | /// Support for iterating over the slices. | |||
233 | /// @{ | |||
234 | using iterator = SmallVectorImpl<Slice>::iterator; | |||
235 | using range = iterator_range<iterator>; | |||
236 | ||||
237 | iterator begin() { return Slices.begin(); } | |||
238 | iterator end() { return Slices.end(); } | |||
239 | ||||
240 | using const_iterator = SmallVectorImpl<Slice>::const_iterator; | |||
241 | using const_range = iterator_range<const_iterator>; | |||
242 | ||||
243 | const_iterator begin() const { return Slices.begin(); } | |||
244 | const_iterator end() const { return Slices.end(); } | |||
245 | /// @} | |||
246 | ||||
247 | /// Erase a range of slices. | |||
248 | void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); } | |||
249 | ||||
250 | /// Insert new slices for this alloca. | |||
251 | /// | |||
252 | /// This moves the slices into the alloca's slices collection, and re-sorts | |||
253 | /// everything so that the usual ordering properties of the alloca's slices | |||
254 | /// hold. | |||
255 | void insert(ArrayRef<Slice> NewSlices) { | |||
256 | int OldSize = Slices.size(); | |||
257 | Slices.append(NewSlices.begin(), NewSlices.end()); | |||
258 | auto SliceI = Slices.begin() + OldSize; | |||
259 | llvm::sort(SliceI, Slices.end()); | |||
260 | std::inplace_merge(Slices.begin(), SliceI, Slices.end()); | |||
261 | } | |||
262 | ||||
263 | // Forward declare the iterator and range accessor for walking the | |||
264 | // partitions. | |||
265 | class partition_iterator; | |||
266 | iterator_range<partition_iterator> partitions(); | |||
267 | ||||
268 | /// Access the dead users for this alloca. | |||
269 | ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; } | |||
270 | ||||
271 | /// Access Uses that should be dropped if the alloca is promotable. | |||
272 | ArrayRef<Use *> getDeadUsesIfPromotable() const { | |||
273 | return DeadUseIfPromotable; | |||
274 | } | |||
275 | ||||
276 | /// Access the dead operands referring to this alloca. | |||
277 | /// | |||
278 | /// These are operands which have cannot actually be used to refer to the | |||
279 | /// alloca as they are outside its range and the user doesn't correct for | |||
280 | /// that. These mostly consist of PHI node inputs and the like which we just | |||
281 | /// need to replace with undef. | |||
282 | ArrayRef<Use *> getDeadOperands() const { return DeadOperands; } | |||
283 | ||||
284 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
285 | void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; | |||
286 | void printSlice(raw_ostream &OS, const_iterator I, | |||
287 | StringRef Indent = " ") const; | |||
288 | void printUse(raw_ostream &OS, const_iterator I, | |||
289 | StringRef Indent = " ") const; | |||
290 | void print(raw_ostream &OS) const; | |||
291 | void dump(const_iterator I) const; | |||
292 | void dump() const; | |||
293 | #endif | |||
294 | ||||
295 | private: | |||
296 | template <typename DerivedT, typename RetT = void> class BuilderBase; | |||
297 | class SliceBuilder; | |||
298 | ||||
299 | friend class AllocaSlices::SliceBuilder; | |||
300 | ||||
301 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
302 | /// Handle to alloca instruction to simplify method interfaces. | |||
303 | AllocaInst &AI; | |||
304 | #endif | |||
305 | ||||
306 | /// The instruction responsible for this alloca not having a known set | |||
307 | /// of slices. | |||
308 | /// | |||
309 | /// When an instruction (potentially) escapes the pointer to the alloca, we | |||
310 | /// store a pointer to that here and abort trying to form slices of the | |||
311 | /// alloca. This will be null if the alloca slices are analyzed successfully. | |||
312 | Instruction *PointerEscapingInstr; | |||
313 | ||||
314 | /// The slices of the alloca. | |||
315 | /// | |||
316 | /// We store a vector of the slices formed by uses of the alloca here. This | |||
317 | /// vector is sorted by increasing begin offset, and then the unsplittable | |||
318 | /// slices before the splittable ones. See the Slice inner class for more | |||
319 | /// details. | |||
320 | SmallVector<Slice, 8> Slices; | |||
321 | ||||
322 | /// Instructions which will become dead if we rewrite the alloca. | |||
323 | /// | |||
324 | /// Note that these are not separated by slice. This is because we expect an | |||
325 | /// alloca to be completely rewritten or not rewritten at all. If rewritten, | |||
326 | /// all these instructions can simply be removed and replaced with undef as | |||
327 | /// they come from outside of the allocated space. | |||
328 | SmallVector<Instruction *, 8> DeadUsers; | |||
329 | ||||
330 | /// Uses which will become dead if can promote the alloca. | |||
331 | SmallVector<Use *, 8> DeadUseIfPromotable; | |||
332 | ||||
333 | /// Operands which will become dead if we rewrite the alloca. | |||
334 | /// | |||
335 | /// These are operands that in their particular use can be replaced with | |||
336 | /// undef when we rewrite the alloca. These show up in out-of-bounds inputs | |||
337 | /// to PHI nodes and the like. They aren't entirely dead (there might be | |||
338 | /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we | |||
339 | /// want to swap this particular input for undef to simplify the use lists of | |||
340 | /// the alloca. | |||
341 | SmallVector<Use *, 8> DeadOperands; | |||
342 | }; | |||
343 | ||||
344 | /// A partition of the slices. | |||
345 | /// | |||
346 | /// An ephemeral representation for a range of slices which can be viewed as | |||
347 | /// a partition of the alloca. This range represents a span of the alloca's | |||
348 | /// memory which cannot be split, and provides access to all of the slices | |||
349 | /// overlapping some part of the partition. | |||
350 | /// | |||
351 | /// Objects of this type are produced by traversing the alloca's slices, but | |||
352 | /// are only ephemeral and not persistent. | |||
353 | class llvm::sroa::Partition { | |||
354 | private: | |||
355 | friend class AllocaSlices; | |||
356 | friend class AllocaSlices::partition_iterator; | |||
357 | ||||
358 | using iterator = AllocaSlices::iterator; | |||
359 | ||||
360 | /// The beginning and ending offsets of the alloca for this | |||
361 | /// partition. | |||
362 | uint64_t BeginOffset = 0, EndOffset = 0; | |||
363 | ||||
364 | /// The start and end iterators of this partition. | |||
365 | iterator SI, SJ; | |||
366 | ||||
367 | /// A collection of split slice tails overlapping the partition. | |||
368 | SmallVector<Slice *, 4> SplitTails; | |||
369 | ||||
370 | /// Raw constructor builds an empty partition starting and ending at | |||
371 | /// the given iterator. | |||
372 | Partition(iterator SI) : SI(SI), SJ(SI) {} | |||
373 | ||||
374 | public: | |||
375 | /// The start offset of this partition. | |||
376 | /// | |||
377 | /// All of the contained slices start at or after this offset. | |||
378 | uint64_t beginOffset() const { return BeginOffset; } | |||
379 | ||||
380 | /// The end offset of this partition. | |||
381 | /// | |||
382 | /// All of the contained slices end at or before this offset. | |||
383 | uint64_t endOffset() const { return EndOffset; } | |||
384 | ||||
385 | /// The size of the partition. | |||
386 | /// | |||
387 | /// Note that this can never be zero. | |||
388 | uint64_t size() const { | |||
389 | assert(BeginOffset < EndOffset && "Partitions must span some bytes!")((void)0); | |||
390 | return EndOffset - BeginOffset; | |||
391 | } | |||
392 | ||||
393 | /// Test whether this partition contains no slices, and merely spans | |||
394 | /// a region occupied by split slices. | |||
395 | bool empty() const { return SI == SJ; } | |||
396 | ||||
397 | /// \name Iterate slices that start within the partition. | |||
398 | /// These may be splittable or unsplittable. They have a begin offset >= the | |||
399 | /// partition begin offset. | |||
400 | /// @{ | |||
401 | // FIXME: We should probably define a "concat_iterator" helper and use that | |||
402 | // to stitch together pointee_iterators over the split tails and the | |||
403 | // contiguous iterators of the partition. That would give a much nicer | |||
404 | // interface here. We could then additionally expose filtered iterators for | |||
405 | // split, unsplit, and unsplittable splices based on the usage patterns. | |||
406 | iterator begin() const { return SI; } | |||
407 | iterator end() const { return SJ; } | |||
408 | /// @} | |||
409 | ||||
410 | /// Get the sequence of split slice tails. | |||
411 | /// | |||
412 | /// These tails are of slices which start before this partition but are | |||
413 | /// split and overlap into the partition. We accumulate these while forming | |||
414 | /// partitions. | |||
415 | ArrayRef<Slice *> splitSliceTails() const { return SplitTails; } | |||
416 | }; | |||
417 | ||||
418 | /// An iterator over partitions of the alloca's slices. | |||
419 | /// | |||
420 | /// This iterator implements the core algorithm for partitioning the alloca's | |||
421 | /// slices. It is a forward iterator as we don't support backtracking for | |||
422 | /// efficiency reasons, and re-use a single storage area to maintain the | |||
423 | /// current set of split slices. | |||
424 | /// | |||
425 | /// It is templated on the slice iterator type to use so that it can operate | |||
426 | /// with either const or non-const slice iterators. | |||
427 | class AllocaSlices::partition_iterator | |||
428 | : public iterator_facade_base<partition_iterator, std::forward_iterator_tag, | |||
429 | Partition> { | |||
430 | friend class AllocaSlices; | |||
431 | ||||
432 | /// Most of the state for walking the partitions is held in a class | |||
433 | /// with a nice interface for examining them. | |||
434 | Partition P; | |||
435 | ||||
436 | /// We need to keep the end of the slices to know when to stop. | |||
437 | AllocaSlices::iterator SE; | |||
438 | ||||
439 | /// We also need to keep track of the maximum split end offset seen. | |||
440 | /// FIXME: Do we really? | |||
441 | uint64_t MaxSplitSliceEndOffset = 0; | |||
442 | ||||
443 | /// Sets the partition to be empty at given iterator, and sets the | |||
444 | /// end iterator. | |||
445 | partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) | |||
446 | : P(SI), SE(SE) { | |||
447 | // If not already at the end, advance our state to form the initial | |||
448 | // partition. | |||
449 | if (SI != SE) | |||
450 | advance(); | |||
451 | } | |||
452 | ||||
453 | /// Advance the iterator to the next partition. | |||
454 | /// | |||
455 | /// Requires that the iterator not be at the end of the slices. | |||
456 | void advance() { | |||
457 | assert((P.SI != SE || !P.SplitTails.empty()) &&((void)0) | |||
458 | "Cannot advance past the end of the slices!")((void)0); | |||
459 | ||||
460 | // Clear out any split uses which have ended. | |||
461 | if (!P.SplitTails.empty()) { | |||
462 | if (P.EndOffset >= MaxSplitSliceEndOffset) { | |||
463 | // If we've finished all splits, this is easy. | |||
464 | P.SplitTails.clear(); | |||
465 | MaxSplitSliceEndOffset = 0; | |||
466 | } else { | |||
467 | // Remove the uses which have ended in the prior partition. This | |||
468 | // cannot change the max split slice end because we just checked that | |||
469 | // the prior partition ended prior to that max. | |||
470 | llvm::erase_if(P.SplitTails, | |||
471 | [&](Slice *S) { return S->endOffset() <= P.EndOffset; }); | |||
472 | assert(llvm::any_of(P.SplitTails,((void)0) | |||
473 | [&](Slice *S) {((void)0) | |||
474 | return S->endOffset() == MaxSplitSliceEndOffset;((void)0) | |||
475 | }) &&((void)0) | |||
476 | "Could not find the current max split slice offset!")((void)0); | |||
477 | assert(llvm::all_of(P.SplitTails,((void)0) | |||
478 | [&](Slice *S) {((void)0) | |||
479 | return S->endOffset() <= MaxSplitSliceEndOffset;((void)0) | |||
480 | }) &&((void)0) | |||
481 | "Max split slice end offset is not actually the max!")((void)0); | |||
482 | } | |||
483 | } | |||
484 | ||||
485 | // If P.SI is already at the end, then we've cleared the split tail and | |||
486 | // now have an end iterator. | |||
487 | if (P.SI == SE) { | |||
488 | assert(P.SplitTails.empty() && "Failed to clear the split slices!")((void)0); | |||
489 | return; | |||
490 | } | |||
491 | ||||
492 | // If we had a non-empty partition previously, set up the state for | |||
493 | // subsequent partitions. | |||
494 | if (P.SI != P.SJ) { | |||
495 | // Accumulate all the splittable slices which started in the old | |||
496 | // partition into the split list. | |||
497 | for (Slice &S : P) | |||
498 | if (S.isSplittable() && S.endOffset() > P.EndOffset) { | |||
499 | P.SplitTails.push_back(&S); | |||
500 | MaxSplitSliceEndOffset = | |||
501 | std::max(S.endOffset(), MaxSplitSliceEndOffset); | |||
502 | } | |||
503 | ||||
504 | // Start from the end of the previous partition. | |||
505 | P.SI = P.SJ; | |||
506 | ||||
507 | // If P.SI is now at the end, we at most have a tail of split slices. | |||
508 | if (P.SI == SE) { | |||
509 | P.BeginOffset = P.EndOffset; | |||
510 | P.EndOffset = MaxSplitSliceEndOffset; | |||
511 | return; | |||
512 | } | |||
513 | ||||
514 | // If the we have split slices and the next slice is after a gap and is | |||
515 | // not splittable immediately form an empty partition for the split | |||
516 | // slices up until the next slice begins. | |||
517 | if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset && | |||
518 | !P.SI->isSplittable()) { | |||
519 | P.BeginOffset = P.EndOffset; | |||
520 | P.EndOffset = P.SI->beginOffset(); | |||
521 | return; | |||
522 | } | |||
523 | } | |||
524 | ||||
525 | // OK, we need to consume new slices. Set the end offset based on the | |||
526 | // current slice, and step SJ past it. The beginning offset of the | |||
527 | // partition is the beginning offset of the next slice unless we have | |||
528 | // pre-existing split slices that are continuing, in which case we begin | |||
529 | // at the prior end offset. | |||
530 | P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset; | |||
531 | P.EndOffset = P.SI->endOffset(); | |||
532 | ++P.SJ; | |||
533 | ||||
534 | // There are two strategies to form a partition based on whether the | |||
535 | // partition starts with an unsplittable slice or a splittable slice. | |||
536 | if (!P.SI->isSplittable()) { | |||
537 | // When we're forming an unsplittable region, it must always start at | |||
538 | // the first slice and will extend through its end. | |||
539 | assert(P.BeginOffset == P.SI->beginOffset())((void)0); | |||
540 | ||||
541 | // Form a partition including all of the overlapping slices with this | |||
542 | // unsplittable slice. | |||
543 | while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { | |||
544 | if (!P.SJ->isSplittable()) | |||
545 | P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); | |||
546 | ++P.SJ; | |||
547 | } | |||
548 | ||||
549 | // We have a partition across a set of overlapping unsplittable | |||
550 | // partitions. | |||
551 | return; | |||
552 | } | |||
553 | ||||
554 | // If we're starting with a splittable slice, then we need to form | |||
555 | // a synthetic partition spanning it and any other overlapping splittable | |||
556 | // splices. | |||
557 | assert(P.SI->isSplittable() && "Forming a splittable partition!")((void)0); | |||
558 | ||||
559 | // Collect all of the overlapping splittable slices. | |||
560 | while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && | |||
561 | P.SJ->isSplittable()) { | |||
562 | P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); | |||
563 | ++P.SJ; | |||
564 | } | |||
565 | ||||
566 | // Back upiP.EndOffset if we ended the span early when encountering an | |||
567 | // unsplittable slice. This synthesizes the early end offset of | |||
568 | // a partition spanning only splittable slices. | |||
569 | if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { | |||
570 | assert(!P.SJ->isSplittable())((void)0); | |||
571 | P.EndOffset = P.SJ->beginOffset(); | |||
572 | } | |||
573 | } | |||
574 | ||||
575 | public: | |||
576 | bool operator==(const partition_iterator &RHS) const { | |||
577 | assert(SE == RHS.SE &&((void)0) | |||
578 | "End iterators don't match between compared partition iterators!")((void)0); | |||
579 | ||||
580 | // The observed positions of partitions is marked by the P.SI iterator and | |||
581 | // the emptiness of the split slices. The latter is only relevant when | |||
582 | // P.SI == SE, as the end iterator will additionally have an empty split | |||
583 | // slices list, but the prior may have the same P.SI and a tail of split | |||
584 | // slices. | |||
585 | if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) { | |||
586 | assert(P.SJ == RHS.P.SJ &&((void)0) | |||
587 | "Same set of slices formed two different sized partitions!")((void)0); | |||
588 | assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&((void)0) | |||
589 | "Same slice position with differently sized non-empty split "((void)0) | |||
590 | "slice tails!")((void)0); | |||
591 | return true; | |||
592 | } | |||
593 | return false; | |||
594 | } | |||
595 | ||||
596 | partition_iterator &operator++() { | |||
597 | advance(); | |||
598 | return *this; | |||
599 | } | |||
600 | ||||
601 | Partition &operator*() { return P; } | |||
602 | }; | |||
603 | ||||
604 | /// A forward range over the partitions of the alloca's slices. | |||
605 | /// | |||
606 | /// This accesses an iterator range over the partitions of the alloca's | |||
607 | /// slices. It computes these partitions on the fly based on the overlapping | |||
608 | /// offsets of the slices and the ability to split them. It will visit "empty" | |||
609 | /// partitions to cover regions of the alloca only accessed via split | |||
610 | /// slices. | |||
611 | iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() { | |||
612 | return make_range(partition_iterator(begin(), end()), | |||
613 | partition_iterator(end(), end())); | |||
614 | } | |||
615 | ||||
616 | static Value *foldSelectInst(SelectInst &SI) { | |||
617 | // If the condition being selected on is a constant or the same value is | |||
618 | // being selected between, fold the select. Yes this does (rarely) happen | |||
619 | // early on. | |||
620 | if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) | |||
621 | return SI.getOperand(1 + CI->isZero()); | |||
622 | if (SI.getOperand(1) == SI.getOperand(2)) | |||
623 | return SI.getOperand(1); | |||
624 | ||||
625 | return nullptr; | |||
626 | } | |||
627 | ||||
628 | /// A helper that folds a PHI node or a select. | |||
629 | static Value *foldPHINodeOrSelectInst(Instruction &I) { | |||
630 | if (PHINode *PN = dyn_cast<PHINode>(&I)) { | |||
631 | // If PN merges together the same value, return that value. | |||
632 | return PN->hasConstantValue(); | |||
633 | } | |||
634 | return foldSelectInst(cast<SelectInst>(I)); | |||
635 | } | |||
636 | ||||
637 | /// Builder for the alloca slices. | |||
638 | /// | |||
639 | /// This class builds a set of alloca slices by recursively visiting the uses | |||
640 | /// of an alloca and making a slice for each load and store at each offset. | |||
641 | class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { | |||
642 | friend class PtrUseVisitor<SliceBuilder>; | |||
643 | friend class InstVisitor<SliceBuilder>; | |||
644 | ||||
645 | using Base = PtrUseVisitor<SliceBuilder>; | |||
646 | ||||
647 | const uint64_t AllocSize; | |||
648 | AllocaSlices &AS; | |||
649 | ||||
650 | SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap; | |||
651 | SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes; | |||
652 | ||||
653 | /// Set to de-duplicate dead instructions found in the use walk. | |||
654 | SmallPtrSet<Instruction *, 4> VisitedDeadInsts; | |||
655 | ||||
656 | public: | |||
657 | SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) | |||
658 | : PtrUseVisitor<SliceBuilder>(DL), | |||
659 | AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()), | |||
660 | AS(AS) {} | |||
661 | ||||
662 | private: | |||
663 | void markAsDead(Instruction &I) { | |||
664 | if (VisitedDeadInsts.insert(&I).second) | |||
665 | AS.DeadUsers.push_back(&I); | |||
666 | } | |||
667 | ||||
668 | void insertUse(Instruction &I, const APInt &Offset, uint64_t Size, | |||
669 | bool IsSplittable = false) { | |||
670 | // Completely skip uses which have a zero size or start either before or | |||
671 | // past the end of the allocation. | |||
672 | if (Size == 0 || Offset.uge(AllocSize)) { | |||
673 | LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"do { } while (false) | |||
674 | << Offsetdo { } while (false) | |||
675 | << " which has zero size or starts outside of the "do { } while (false) | |||
676 | << AllocSize << " byte alloca:\n"do { } while (false) | |||
677 | << " alloca: " << AS.AI << "\n"do { } while (false) | |||
678 | << " use: " << I << "\n")do { } while (false); | |||
679 | return markAsDead(I); | |||
680 | } | |||
681 | ||||
682 | uint64_t BeginOffset = Offset.getZExtValue(); | |||
683 | uint64_t EndOffset = BeginOffset + Size; | |||
684 | ||||
685 | // Clamp the end offset to the end of the allocation. Note that this is | |||
686 | // formulated to handle even the case where "BeginOffset + Size" overflows. | |||
687 | // This may appear superficially to be something we could ignore entirely, | |||
688 | // but that is not so! There may be widened loads or PHI-node uses where | |||
689 | // some instructions are dead but not others. We can't completely ignore | |||
690 | // them, and so have to record at least the information here. | |||
691 | assert(AllocSize >= BeginOffset)((void)0); // Established above. | |||
692 | if (Size > AllocSize - BeginOffset) { | |||
693 | LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"do { } while (false) | |||
694 | << Offset << " to remain within the " << AllocSizedo { } while (false) | |||
695 | << " byte alloca:\n"do { } while (false) | |||
696 | << " alloca: " << AS.AI << "\n"do { } while (false) | |||
697 | << " use: " << I << "\n")do { } while (false); | |||
698 | EndOffset = AllocSize; | |||
699 | } | |||
700 | ||||
701 | AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable)); | |||
702 | } | |||
703 | ||||
704 | void visitBitCastInst(BitCastInst &BC) { | |||
705 | if (BC.use_empty()) | |||
706 | return markAsDead(BC); | |||
707 | ||||
708 | return Base::visitBitCastInst(BC); | |||
709 | } | |||
710 | ||||
711 | void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { | |||
712 | if (ASC.use_empty()) | |||
713 | return markAsDead(ASC); | |||
714 | ||||
715 | return Base::visitAddrSpaceCastInst(ASC); | |||
716 | } | |||
717 | ||||
718 | void visitGetElementPtrInst(GetElementPtrInst &GEPI) { | |||
719 | if (GEPI.use_empty()) | |||
720 | return markAsDead(GEPI); | |||
721 | ||||
722 | if (SROAStrictInbounds && GEPI.isInBounds()) { | |||
723 | // FIXME: This is a manually un-factored variant of the basic code inside | |||
724 | // of GEPs with checking of the inbounds invariant specified in the | |||
725 | // langref in a very strict sense. If we ever want to enable | |||
726 | // SROAStrictInbounds, this code should be factored cleanly into | |||
727 | // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds | |||
728 | // by writing out the code here where we have the underlying allocation | |||
729 | // size readily available. | |||
730 | APInt GEPOffset = Offset; | |||
731 | const DataLayout &DL = GEPI.getModule()->getDataLayout(); | |||
732 | for (gep_type_iterator GTI = gep_type_begin(GEPI), | |||
733 | GTE = gep_type_end(GEPI); | |||
734 | GTI != GTE; ++GTI) { | |||
735 | ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); | |||
736 | if (!OpC) | |||
737 | break; | |||
738 | ||||
739 | // Handle a struct index, which adds its field offset to the pointer. | |||
740 | if (StructType *STy = GTI.getStructTypeOrNull()) { | |||
741 | unsigned ElementIdx = OpC->getZExtValue(); | |||
742 | const StructLayout *SL = DL.getStructLayout(STy); | |||
743 | GEPOffset += | |||
744 | APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)); | |||
745 | } else { | |||
746 | // For array or vector indices, scale the index by the size of the | |||
747 | // type. | |||
748 | APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); | |||
749 | GEPOffset += | |||
750 | Index * | |||
751 | APInt(Offset.getBitWidth(), | |||
752 | DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize()); | |||
753 | } | |||
754 | ||||
755 | // If this index has computed an intermediate pointer which is not | |||
756 | // inbounds, then the result of the GEP is a poison value and we can | |||
757 | // delete it and all uses. | |||
758 | if (GEPOffset.ugt(AllocSize)) | |||
759 | return markAsDead(GEPI); | |||
760 | } | |||
761 | } | |||
762 | ||||
763 | return Base::visitGetElementPtrInst(GEPI); | |||
764 | } | |||
765 | ||||
766 | void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, | |||
767 | uint64_t Size, bool IsVolatile) { | |||
768 | // We allow splitting of non-volatile loads and stores where the type is an | |||
769 | // integer type. These may be used to implement 'memcpy' or other "transfer | |||
770 | // of bits" patterns. | |||
771 | bool IsSplittable = | |||
772 | Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty); | |||
773 | ||||
774 | insertUse(I, Offset, Size, IsSplittable); | |||
775 | } | |||
776 | ||||
777 | void visitLoadInst(LoadInst &LI) { | |||
778 | assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&((void)0) | |||
779 | "All simple FCA loads should have been pre-split")((void)0); | |||
780 | ||||
781 | if (!IsOffsetKnown) | |||
782 | return PI.setAborted(&LI); | |||
783 | ||||
784 | if (LI.isVolatile() && | |||
785 | LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) | |||
786 | return PI.setAborted(&LI); | |||
787 | ||||
788 | if (isa<ScalableVectorType>(LI.getType())) | |||
789 | return PI.setAborted(&LI); | |||
790 | ||||
791 | uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize(); | |||
792 | return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); | |||
793 | } | |||
794 | ||||
795 | void visitStoreInst(StoreInst &SI) { | |||
796 | Value *ValOp = SI.getValueOperand(); | |||
797 | if (ValOp == *U) | |||
798 | return PI.setEscapedAndAborted(&SI); | |||
799 | if (!IsOffsetKnown) | |||
800 | return PI.setAborted(&SI); | |||
801 | ||||
802 | if (SI.isVolatile() && | |||
803 | SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) | |||
804 | return PI.setAborted(&SI); | |||
805 | ||||
806 | if (isa<ScalableVectorType>(ValOp->getType())) | |||
807 | return PI.setAborted(&SI); | |||
808 | ||||
809 | uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize(); | |||
810 | ||||
811 | // If this memory access can be shown to *statically* extend outside the | |||
812 | // bounds of the allocation, it's behavior is undefined, so simply | |||
813 | // ignore it. Note that this is more strict than the generic clamping | |||
814 | // behavior of insertUse. We also try to handle cases which might run the | |||
815 | // risk of overflow. | |||
816 | // FIXME: We should instead consider the pointer to have escaped if this | |||
817 | // function is being instrumented for addressing bugs or race conditions. | |||
818 | if (Size > AllocSize || Offset.ugt(AllocSize - Size)) { | |||
819 | LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"do { } while (false) | |||
820 | << Offset << " which extends past the end of the "do { } while (false) | |||
821 | << AllocSize << " byte alloca:\n"do { } while (false) | |||
822 | << " alloca: " << AS.AI << "\n"do { } while (false) | |||
823 | << " use: " << SI << "\n")do { } while (false); | |||
824 | return markAsDead(SI); | |||
825 | } | |||
826 | ||||
827 | assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&((void)0) | |||
828 | "All simple FCA stores should have been pre-split")((void)0); | |||
829 | handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); | |||
830 | } | |||
831 | ||||
832 | void visitMemSetInst(MemSetInst &II) { | |||
833 | assert(II.getRawDest() == *U && "Pointer use is not the destination?")((void)0); | |||
834 | ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); | |||
835 | if ((Length && Length->getValue() == 0) || | |||
836 | (IsOffsetKnown && Offset.uge(AllocSize))) | |||
837 | // Zero-length mem transfer intrinsics can be ignored entirely. | |||
838 | return markAsDead(II); | |||
839 | ||||
840 | if (!IsOffsetKnown) | |||
841 | return PI.setAborted(&II); | |||
842 | ||||
843 | // Don't replace this with a store with a different address space. TODO: | |||
844 | // Use a store with the casted new alloca? | |||
845 | if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace()) | |||
846 | return PI.setAborted(&II); | |||
847 | ||||
848 | insertUse(II, Offset, Length ? Length->getLimitedValue() | |||
849 | : AllocSize - Offset.getLimitedValue(), | |||
850 | (bool)Length); | |||
851 | } | |||
852 | ||||
853 | void visitMemTransferInst(MemTransferInst &II) { | |||
854 | ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); | |||
855 | if (Length && Length->getValue() == 0) | |||
856 | // Zero-length mem transfer intrinsics can be ignored entirely. | |||
857 | return markAsDead(II); | |||
858 | ||||
859 | // Because we can visit these intrinsics twice, also check to see if the | |||
860 | // first time marked this instruction as dead. If so, skip it. | |||
861 | if (VisitedDeadInsts.count(&II)) | |||
862 | return; | |||
863 | ||||
864 | if (!IsOffsetKnown) | |||
865 | return PI.setAborted(&II); | |||
866 | ||||
867 | // Don't replace this with a load/store with a different address space. | |||
868 | // TODO: Use a store with the casted new alloca? | |||
869 | if (II.isVolatile() && | |||
870 | (II.getDestAddressSpace() != DL.getAllocaAddrSpace() || | |||
871 | II.getSourceAddressSpace() != DL.getAllocaAddrSpace())) | |||
872 | return PI.setAborted(&II); | |||
873 | ||||
874 | // This side of the transfer is completely out-of-bounds, and so we can | |||
875 | // nuke the entire transfer. However, we also need to nuke the other side | |||
876 | // if already added to our partitions. | |||
877 | // FIXME: Yet another place we really should bypass this when | |||
878 | // instrumenting for ASan. | |||
879 | if (Offset.uge(AllocSize)) { | |||
880 | SmallDenseMap<Instruction *, unsigned>::iterator MTPI = | |||
881 | MemTransferSliceMap.find(&II); | |||
882 | if (MTPI != MemTransferSliceMap.end()) | |||
883 | AS.Slices[MTPI->second].kill(); | |||
884 | return markAsDead(II); | |||
885 | } | |||
886 | ||||
887 | uint64_t RawOffset = Offset.getLimitedValue(); | |||
888 | uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; | |||
889 | ||||
890 | // Check for the special case where the same exact value is used for both | |||
891 | // source and dest. | |||
892 | if (*U == II.getRawDest() && *U == II.getRawSource()) { | |||
893 | // For non-volatile transfers this is a no-op. | |||
894 | if (!II.isVolatile()) | |||
895 | return markAsDead(II); | |||
896 | ||||
897 | return insertUse(II, Offset, Size, /*IsSplittable=*/false); | |||
898 | } | |||
899 | ||||
900 | // If we have seen both source and destination for a mem transfer, then | |||
901 | // they both point to the same alloca. | |||
902 | bool Inserted; | |||
903 | SmallDenseMap<Instruction *, unsigned>::iterator MTPI; | |||
904 | std::tie(MTPI, Inserted) = | |||
905 | MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size())); | |||
906 | unsigned PrevIdx = MTPI->second; | |||
907 | if (!Inserted) { | |||
908 | Slice &PrevP = AS.Slices[PrevIdx]; | |||
909 | ||||
910 | // Check if the begin offsets match and this is a non-volatile transfer. | |||
911 | // In that case, we can completely elide the transfer. | |||
912 | if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) { | |||
913 | PrevP.kill(); | |||
914 | return markAsDead(II); | |||
915 | } | |||
916 | ||||
917 | // Otherwise we have an offset transfer within the same alloca. We can't | |||
918 | // split those. | |||
919 | PrevP.makeUnsplittable(); | |||
920 | } | |||
921 | ||||
922 | // Insert the use now that we've fixed up the splittable nature. | |||
923 | insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length); | |||
924 | ||||
925 | // Check that we ended up with a valid index in the map. | |||
926 | assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&((void)0) | |||
927 | "Map index doesn't point back to a slice with this user.")((void)0); | |||
928 | } | |||
929 | ||||
930 | // Disable SRoA for any intrinsics except for lifetime invariants and | |||
931 | // invariant group. | |||
932 | // FIXME: What about debug intrinsics? This matches old behavior, but | |||
933 | // doesn't make sense. | |||
934 | void visitIntrinsicInst(IntrinsicInst &II) { | |||
935 | if (II.isDroppable()) { | |||
936 | AS.DeadUseIfPromotable.push_back(U); | |||
937 | return; | |||
938 | } | |||
939 | ||||
940 | if (!IsOffsetKnown) | |||
941 | return PI.setAborted(&II); | |||
942 | ||||
943 | if (II.isLifetimeStartOrEnd()) { | |||
944 | ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); | |||
945 | uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), | |||
946 | Length->getLimitedValue()); | |||
947 | insertUse(II, Offset, Size, true); | |||
948 | return; | |||
949 | } | |||
950 | ||||
951 | if (II.isLaunderOrStripInvariantGroup()) { | |||
952 | enqueueUsers(II); | |||
953 | return; | |||
954 | } | |||
955 | ||||
956 | Base::visitIntrinsicInst(II); | |||
957 | } | |||
958 | ||||
959 | Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { | |||
960 | // We consider any PHI or select that results in a direct load or store of | |||
961 | // the same offset to be a viable use for slicing purposes. These uses | |||
962 | // are considered unsplittable and the size is the maximum loaded or stored | |||
963 | // size. | |||
964 | SmallPtrSet<Instruction *, 4> Visited; | |||
965 | SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; | |||
966 | Visited.insert(Root); | |||
967 | Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); | |||
968 | const DataLayout &DL = Root->getModule()->getDataLayout(); | |||
969 | // If there are no loads or stores, the access is dead. We mark that as | |||
970 | // a size zero access. | |||
971 | Size = 0; | |||
972 | do { | |||
973 | Instruction *I, *UsedI; | |||
974 | std::tie(UsedI, I) = Uses.pop_back_val(); | |||
975 | ||||
976 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||
977 | Size = std::max(Size, | |||
978 | DL.getTypeStoreSize(LI->getType()).getFixedSize()); | |||
979 | continue; | |||
980 | } | |||
981 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||
982 | Value *Op = SI->getOperand(0); | |||
983 | if (Op == UsedI) | |||
984 | return SI; | |||
985 | Size = std::max(Size, | |||
986 | DL.getTypeStoreSize(Op->getType()).getFixedSize()); | |||
987 | continue; | |||
988 | } | |||
989 | ||||
990 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { | |||
991 | if (!GEP->hasAllZeroIndices()) | |||
992 | return GEP; | |||
993 | } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && | |||
994 | !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) { | |||
995 | return I; | |||
996 | } | |||
997 | ||||
998 | for (User *U : I->users()) | |||
999 | if (Visited.insert(cast<Instruction>(U)).second) | |||
1000 | Uses.push_back(std::make_pair(I, cast<Instruction>(U))); | |||
1001 | } while (!Uses.empty()); | |||
1002 | ||||
1003 | return nullptr; | |||
1004 | } | |||
1005 | ||||
1006 | void visitPHINodeOrSelectInst(Instruction &I) { | |||
1007 | assert(isa<PHINode>(I) || isa<SelectInst>(I))((void)0); | |||
1008 | if (I.use_empty()) | |||
1009 | return markAsDead(I); | |||
1010 | ||||
1011 | // TODO: We could use SimplifyInstruction here to fold PHINodes and | |||
1012 | // SelectInsts. However, doing so requires to change the current | |||
1013 | // dead-operand-tracking mechanism. For instance, suppose neither loading | |||
1014 | // from %U nor %other traps. Then "load (select undef, %U, %other)" does not | |||
1015 | // trap either. However, if we simply replace %U with undef using the | |||
1016 | // current dead-operand-tracking mechanism, "load (select undef, undef, | |||
1017 | // %other)" may trap because the select may return the first operand | |||
1018 | // "undef". | |||
1019 | if (Value *Result = foldPHINodeOrSelectInst(I)) { | |||
1020 | if (Result == *U) | |||
1021 | // If the result of the constant fold will be the pointer, recurse | |||
1022 | // through the PHI/select as if we had RAUW'ed it. | |||
1023 | enqueueUsers(I); | |||
1024 | else | |||
1025 | // Otherwise the operand to the PHI/select is dead, and we can replace | |||
1026 | // it with undef. | |||
1027 | AS.DeadOperands.push_back(U); | |||
1028 | ||||
1029 | return; | |||
1030 | } | |||
1031 | ||||
1032 | if (!IsOffsetKnown) | |||
1033 | return PI.setAborted(&I); | |||
1034 | ||||
1035 | // See if we already have computed info on this node. | |||
1036 | uint64_t &Size = PHIOrSelectSizes[&I]; | |||
1037 | if (!Size) { | |||
1038 | // This is a new PHI/Select, check for an unsafe use of it. | |||
1039 | if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size)) | |||
1040 | return PI.setAborted(UnsafeI); | |||
1041 | } | |||
1042 | ||||
1043 | // For PHI and select operands outside the alloca, we can't nuke the entire | |||
1044 | // phi or select -- the other side might still be relevant, so we special | |||
1045 | // case them here and use a separate structure to track the operands | |||
1046 | // themselves which should be replaced with undef. | |||
1047 | // FIXME: This should instead be escaped in the event we're instrumenting | |||
1048 | // for address sanitization. | |||
1049 | if (Offset.uge(AllocSize)) { | |||
1050 | AS.DeadOperands.push_back(U); | |||
1051 | return; | |||
1052 | } | |||
1053 | ||||
1054 | insertUse(I, Offset, Size); | |||
1055 | } | |||
1056 | ||||
1057 | void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); } | |||
1058 | ||||
1059 | void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } | |||
1060 | ||||
1061 | /// Disable SROA entirely if there are unhandled users of the alloca. | |||
1062 | void visitInstruction(Instruction &I) { PI.setAborted(&I); } | |||
1063 | }; | |||
1064 | ||||
1065 | AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) | |||
1066 | : | |||
1067 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
1068 | AI(AI), | |||
1069 | #endif | |||
1070 | PointerEscapingInstr(nullptr) { | |||
1071 | SliceBuilder PB(DL, AI, *this); | |||
1072 | SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); | |||
1073 | if (PtrI.isEscaped() || PtrI.isAborted()) { | |||
1074 | // FIXME: We should sink the escape vs. abort info into the caller nicely, | |||
1075 | // possibly by just storing the PtrInfo in the AllocaSlices. | |||
1076 | PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst() | |||
1077 | : PtrI.getAbortingInst(); | |||
1078 | assert(PointerEscapingInstr && "Did not track a bad instruction")((void)0); | |||
1079 | return; | |||
1080 | } | |||
1081 | ||||
1082 | llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); | |||
1083 | ||||
1084 | // Sort the uses. This arranges for the offsets to be in ascending order, | |||
1085 | // and the sizes to be in descending order. | |||
1086 | llvm::stable_sort(Slices); | |||
1087 | } | |||
1088 | ||||
1089 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
1090 | ||||
1091 | void AllocaSlices::print(raw_ostream &OS, const_iterator I, | |||
1092 | StringRef Indent) const { | |||
1093 | printSlice(OS, I, Indent); | |||
1094 | OS << "\n"; | |||
1095 | printUse(OS, I, Indent); | |||
1096 | } | |||
1097 | ||||
1098 | void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I, | |||
1099 | StringRef Indent) const { | |||
1100 | OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")" | |||
1101 | << " slice #" << (I - begin()) | |||
1102 | << (I->isSplittable() ? " (splittable)" : ""); | |||
1103 | } | |||
1104 | ||||
1105 | void AllocaSlices::printUse(raw_ostream &OS, const_iterator I, | |||
1106 | StringRef Indent) const { | |||
1107 | OS << Indent << " used by: " << *I->getUse()->getUser() << "\n"; | |||
1108 | } | |||
1109 | ||||
1110 | void AllocaSlices::print(raw_ostream &OS) const { | |||
1111 | if (PointerEscapingInstr) { | |||
1112 | OS << "Can't analyze slices for alloca: " << AI << "\n" | |||
1113 | << " A pointer to this alloca escaped by:\n" | |||
1114 | << " " << *PointerEscapingInstr << "\n"; | |||
1115 | return; | |||
1116 | } | |||
1117 | ||||
1118 | OS << "Slices of alloca: " << AI << "\n"; | |||
1119 | for (const_iterator I = begin(), E = end(); I != E; ++I) | |||
1120 | print(OS, I); | |||
1121 | } | |||
1122 | ||||
1123 | LLVM_DUMP_METHOD__attribute__((noinline)) void AllocaSlices::dump(const_iterator I) const { | |||
1124 | print(dbgs(), I); | |||
1125 | } | |||
1126 | LLVM_DUMP_METHOD__attribute__((noinline)) void AllocaSlices::dump() const { print(dbgs()); } | |||
1127 | ||||
1128 | #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
1129 | ||||
1130 | /// Walk the range of a partitioning looking for a common type to cover this | |||
1131 | /// sequence of slices. | |||
1132 | static std::pair<Type *, IntegerType *> | |||
1133 | findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, | |||
1134 | uint64_t EndOffset) { | |||
1135 | Type *Ty = nullptr; | |||
1136 | bool TyIsCommon = true; | |||
1137 | IntegerType *ITy = nullptr; | |||
1138 | ||||
1139 | // Note that we need to look at *every* alloca slice's Use to ensure we | |||
1140 | // always get consistent results regardless of the order of slices. | |||
1141 | for (AllocaSlices::const_iterator I = B; I != E; ++I) { | |||
1142 | Use *U = I->getUse(); | |||
1143 | if (isa<IntrinsicInst>(*U->getUser())) | |||
1144 | continue; | |||
1145 | if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) | |||
1146 | continue; | |||
1147 | ||||
1148 | Type *UserTy = nullptr; | |||
1149 | if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||
1150 | UserTy = LI->getType(); | |||
1151 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||
1152 | UserTy = SI->getValueOperand()->getType(); | |||
1153 | } | |||
1154 | ||||
1155 | if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) { | |||
1156 | // If the type is larger than the partition, skip it. We only encounter | |||
1157 | // this for split integer operations where we want to use the type of the | |||
1158 | // entity causing the split. Also skip if the type is not a byte width | |||
1159 | // multiple. | |||
1160 | if (UserITy->getBitWidth() % 8 != 0 || | |||
1161 | UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset())) | |||
1162 | continue; | |||
1163 | ||||
1164 | // Track the largest bitwidth integer type used in this way in case there | |||
1165 | // is no common type. | |||
1166 | if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth()) | |||
1167 | ITy = UserITy; | |||
1168 | } | |||
1169 | ||||
1170 | // To avoid depending on the order of slices, Ty and TyIsCommon must not | |||
1171 | // depend on types skipped above. | |||
1172 | if (!UserTy || (Ty && Ty != UserTy)) | |||
1173 | TyIsCommon = false; // Give up on anything but an iN type. | |||
1174 | else | |||
1175 | Ty = UserTy; | |||
1176 | } | |||
1177 | ||||
1178 | return {TyIsCommon ? Ty : nullptr, ITy}; | |||
1179 | } | |||
1180 | ||||
1181 | /// PHI instructions that use an alloca and are subsequently loaded can be | |||
1182 | /// rewritten to load both input pointers in the pred blocks and then PHI the | |||
1183 | /// results, allowing the load of the alloca to be promoted. | |||
1184 | /// From this: | |||
1185 | /// %P2 = phi [i32* %Alloca, i32* %Other] | |||
1186 | /// %V = load i32* %P2 | |||
1187 | /// to: | |||
1188 | /// %V1 = load i32* %Alloca -> will be mem2reg'd | |||
1189 | /// ... | |||
1190 | /// %V2 = load i32* %Other | |||
1191 | /// ... | |||
1192 | /// %V = phi [i32 %V1, i32 %V2] | |||
1193 | /// | |||
1194 | /// We can do this to a select if its only uses are loads and if the operands | |||
1195 | /// to the select can be loaded unconditionally. | |||
1196 | /// | |||
1197 | /// FIXME: This should be hoisted into a generic utility, likely in | |||
1198 | /// Transforms/Util/Local.h | |||
1199 | static bool isSafePHIToSpeculate(PHINode &PN) { | |||
1200 | const DataLayout &DL = PN.getModule()->getDataLayout(); | |||
1201 | ||||
1202 | // For now, we can only do this promotion if the load is in the same block | |||
1203 | // as the PHI, and if there are no stores between the phi and load. | |||
1204 | // TODO: Allow recursive phi users. | |||
1205 | // TODO: Allow stores. | |||
1206 | BasicBlock *BB = PN.getParent(); | |||
1207 | Align MaxAlign; | |||
1208 | uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType()); | |||
1209 | APInt MaxSize(APWidth, 0); | |||
1210 | bool HaveLoad = false; | |||
1211 | for (User *U : PN.users()) { | |||
1212 | LoadInst *LI = dyn_cast<LoadInst>(U); | |||
1213 | if (!LI || !LI->isSimple()) | |||
1214 | return false; | |||
1215 | ||||
1216 | // For now we only allow loads in the same block as the PHI. This is | |||
1217 | // a common case that happens when instcombine merges two loads through | |||
1218 | // a PHI. | |||
1219 | if (LI->getParent() != BB) | |||
1220 | return false; | |||
1221 | ||||
1222 | // Ensure that there are no instructions between the PHI and the load that | |||
1223 | // could store. | |||
1224 | for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI) | |||
1225 | if (BBI->mayWriteToMemory()) | |||
1226 | return false; | |||
1227 | ||||
1228 | uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize(); | |||
1229 | MaxAlign = std::max(MaxAlign, LI->getAlign()); | |||
1230 | MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize; | |||
1231 | HaveLoad = true; | |||
1232 | } | |||
1233 | ||||
1234 | if (!HaveLoad) | |||
1235 | return false; | |||
1236 | ||||
1237 | // We can only transform this if it is safe to push the loads into the | |||
1238 | // predecessor blocks. The only thing to watch out for is that we can't put | |||
1239 | // a possibly trapping load in the predecessor if it is a critical edge. | |||
1240 | for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { | |||
1241 | Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator(); | |||
1242 | Value *InVal = PN.getIncomingValue(Idx); | |||
1243 | ||||
1244 | // If the value is produced by the terminator of the predecessor (an | |||
1245 | // invoke) or it has side-effects, there is no valid place to put a load | |||
1246 | // in the predecessor. | |||
1247 | if (TI == InVal || TI->mayHaveSideEffects()) | |||
1248 | return false; | |||
1249 | ||||
1250 | // If the predecessor has a single successor, then the edge isn't | |||
1251 | // critical. | |||
1252 | if (TI->getNumSuccessors() == 1) | |||
1253 | continue; | |||
1254 | ||||
1255 | // If this pointer is always safe to load, or if we can prove that there | |||
1256 | // is already a load in the block, then we can move the load to the pred | |||
1257 | // block. | |||
1258 | if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI)) | |||
1259 | continue; | |||
1260 | ||||
1261 | return false; | |||
1262 | } | |||
1263 | ||||
1264 | return true; | |||
1265 | } | |||
1266 | ||||
1267 | static void speculatePHINodeLoads(PHINode &PN) { | |||
1268 | LLVM_DEBUG(dbgs() << " original: " << PN << "\n")do { } while (false); | |||
1269 | ||||
1270 | LoadInst *SomeLoad = cast<LoadInst>(PN.user_back()); | |||
1271 | Type *LoadTy = SomeLoad->getType(); | |||
1272 | IRBuilderTy PHIBuilder(&PN); | |||
1273 | PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), | |||
1274 | PN.getName() + ".sroa.speculated"); | |||
1275 | ||||
1276 | // Get the AA tags and alignment to use from one of the loads. It does not | |||
1277 | // matter which one we get and if any differ. | |||
1278 | AAMDNodes AATags; | |||
1279 | SomeLoad->getAAMetadata(AATags); | |||
1280 | Align Alignment = SomeLoad->getAlign(); | |||
1281 | ||||
1282 | // Rewrite all loads of the PN to use the new PHI. | |||
1283 | while (!PN.use_empty()) { | |||
1284 | LoadInst *LI = cast<LoadInst>(PN.user_back()); | |||
1285 | LI->replaceAllUsesWith(NewPN); | |||
1286 | LI->eraseFromParent(); | |||
1287 | } | |||
1288 | ||||
1289 | // Inject loads into all of the pred blocks. | |||
1290 | DenseMap<BasicBlock*, Value*> InjectedLoads; | |||
1291 | for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { | |||
1292 | BasicBlock *Pred = PN.getIncomingBlock(Idx); | |||
1293 | Value *InVal = PN.getIncomingValue(Idx); | |||
1294 | ||||
1295 | // A PHI node is allowed to have multiple (duplicated) entries for the same | |||
1296 | // basic block, as long as the value is the same. So if we already injected | |||
1297 | // a load in the predecessor, then we should reuse the same load for all | |||
1298 | // duplicated entries. | |||
1299 | if (Value* V = InjectedLoads.lookup(Pred)) { | |||
1300 | NewPN->addIncoming(V, Pred); | |||
1301 | continue; | |||
1302 | } | |||
1303 | ||||
1304 | Instruction *TI = Pred->getTerminator(); | |||
1305 | IRBuilderTy PredBuilder(TI); | |||
1306 | ||||
1307 | LoadInst *Load = PredBuilder.CreateAlignedLoad( | |||
1308 | LoadTy, InVal, Alignment, | |||
1309 | (PN.getName() + ".sroa.speculate.load." + Pred->getName())); | |||
1310 | ++NumLoadsSpeculated; | |||
1311 | if (AATags) | |||
1312 | Load->setAAMetadata(AATags); | |||
1313 | NewPN->addIncoming(Load, Pred); | |||
1314 | InjectedLoads[Pred] = Load; | |||
1315 | } | |||
1316 | ||||
1317 | LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n")do { } while (false); | |||
1318 | PN.eraseFromParent(); | |||
1319 | } | |||
1320 | ||||
1321 | /// Select instructions that use an alloca and are subsequently loaded can be | |||
1322 | /// rewritten to load both input pointers and then select between the result, | |||
1323 | /// allowing the load of the alloca to be promoted. | |||
1324 | /// From this: | |||
1325 | /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other | |||
1326 | /// %V = load i32* %P2 | |||
1327 | /// to: | |||
1328 | /// %V1 = load i32* %Alloca -> will be mem2reg'd | |||
1329 | /// %V2 = load i32* %Other | |||
1330 | /// %V = select i1 %cond, i32 %V1, i32 %V2 | |||
1331 | /// | |||
1332 | /// We can do this to a select if its only uses are loads and if the operand | |||
1333 | /// to the select can be loaded unconditionally. | |||
1334 | static bool isSafeSelectToSpeculate(SelectInst &SI) { | |||
1335 | Value *TValue = SI.getTrueValue(); | |||
1336 | Value *FValue = SI.getFalseValue(); | |||
1337 | const DataLayout &DL = SI.getModule()->getDataLayout(); | |||
1338 | ||||
1339 | for (User *U : SI.users()) { | |||
1340 | LoadInst *LI = dyn_cast<LoadInst>(U); | |||
1341 | if (!LI || !LI->isSimple()) | |||
1342 | return false; | |||
1343 | ||||
1344 | // Both operands to the select need to be dereferenceable, either | |||
1345 | // absolutely (e.g. allocas) or at this point because we can see other | |||
1346 | // accesses to it. | |||
1347 | if (!isSafeToLoadUnconditionally(TValue, LI->getType(), | |||
1348 | LI->getAlign(), DL, LI)) | |||
1349 | return false; | |||
1350 | if (!isSafeToLoadUnconditionally(FValue, LI->getType(), | |||
1351 | LI->getAlign(), DL, LI)) | |||
1352 | return false; | |||
1353 | } | |||
1354 | ||||
1355 | return true; | |||
1356 | } | |||
1357 | ||||
1358 | static void speculateSelectInstLoads(SelectInst &SI) { | |||
1359 | LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { } while (false); | |||
1360 | ||||
1361 | IRBuilderTy IRB(&SI); | |||
1362 | Value *TV = SI.getTrueValue(); | |||
1363 | Value *FV = SI.getFalseValue(); | |||
1364 | // Replace the loads of the select with a select of two loads. | |||
1365 | while (!SI.use_empty()) { | |||
1366 | LoadInst *LI = cast<LoadInst>(SI.user_back()); | |||
1367 | assert(LI->isSimple() && "We only speculate simple loads")((void)0); | |||
1368 | ||||
1369 | IRB.SetInsertPoint(LI); | |||
1370 | LoadInst *TL = IRB.CreateLoad(LI->getType(), TV, | |||
1371 | LI->getName() + ".sroa.speculate.load.true"); | |||
1372 | LoadInst *FL = IRB.CreateLoad(LI->getType(), FV, | |||
1373 | LI->getName() + ".sroa.speculate.load.false"); | |||
1374 | NumLoadsSpeculated += 2; | |||
1375 | ||||
1376 | // Transfer alignment and AA info if present. | |||
1377 | TL->setAlignment(LI->getAlign()); | |||
1378 | FL->setAlignment(LI->getAlign()); | |||
1379 | ||||
1380 | AAMDNodes Tags; | |||
1381 | LI->getAAMetadata(Tags); | |||
1382 | if (Tags) { | |||
1383 | TL->setAAMetadata(Tags); | |||
1384 | FL->setAAMetadata(Tags); | |||
1385 | } | |||
1386 | ||||
1387 | Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, | |||
1388 | LI->getName() + ".sroa.speculated"); | |||
1389 | ||||
1390 | LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n")do { } while (false); | |||
1391 | LI->replaceAllUsesWith(V); | |||
1392 | LI->eraseFromParent(); | |||
1393 | } | |||
1394 | SI.eraseFromParent(); | |||
1395 | } | |||
1396 | ||||
1397 | /// Build a GEP out of a base pointer and indices. | |||
1398 | /// | |||
1399 | /// This will return the BasePtr if that is valid, or build a new GEP | |||
1400 | /// instruction using the IRBuilder if GEP-ing is needed. | |||
1401 | static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, | |||
1402 | SmallVectorImpl<Value *> &Indices, | |||
1403 | const Twine &NamePrefix) { | |||
1404 | if (Indices.empty()) | |||
1405 | return BasePtr; | |||
1406 | ||||
1407 | // A single zero index is a no-op, so check for this and avoid building a GEP | |||
1408 | // in that case. | |||
1409 | if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) | |||
1410 | return BasePtr; | |||
1411 | ||||
1412 | return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(), | |||
1413 | BasePtr, Indices, NamePrefix + "sroa_idx"); | |||
1414 | } | |||
1415 | ||||
1416 | /// Get a natural GEP off of the BasePtr walking through Ty toward | |||
1417 | /// TargetTy without changing the offset of the pointer. | |||
1418 | /// | |||
1419 | /// This routine assumes we've already established a properly offset GEP with | |||
1420 | /// Indices, and arrived at the Ty type. The goal is to continue to GEP with | |||
1421 | /// zero-indices down through type layers until we find one the same as | |||
1422 | /// TargetTy. If we can't find one with the same type, we at least try to use | |||
1423 | /// one with the same size. If none of that works, we just produce the GEP as | |||
1424 | /// indicated by Indices to have the correct offset. | |||
1425 | static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, | |||
1426 | Value *BasePtr, Type *Ty, Type *TargetTy, | |||
1427 | SmallVectorImpl<Value *> &Indices, | |||
1428 | const Twine &NamePrefix) { | |||
1429 | if (Ty == TargetTy) | |||
1430 | return buildGEP(IRB, BasePtr, Indices, NamePrefix); | |||
1431 | ||||
1432 | // Offset size to use for the indices. | |||
1433 | unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType()); | |||
1434 | ||||
1435 | // See if we can descend into a struct and locate a field with the correct | |||
1436 | // type. | |||
1437 | unsigned NumLayers = 0; | |||
1438 | Type *ElementTy = Ty; | |||
1439 | do { | |||
1440 | if (ElementTy->isPointerTy()) | |||
1441 | break; | |||
1442 | ||||
1443 | if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) { | |||
1444 | ElementTy = ArrayTy->getElementType(); | |||
1445 | Indices.push_back(IRB.getIntN(OffsetSize, 0)); | |||
1446 | } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) { | |||
1447 | ElementTy = VectorTy->getElementType(); | |||
1448 | Indices.push_back(IRB.getInt32(0)); | |||
1449 | } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { | |||
1450 | if (STy->element_begin() == STy->element_end()) | |||
1451 | break; // Nothing left to descend into. | |||
1452 | ElementTy = *STy->element_begin(); | |||
1453 | Indices.push_back(IRB.getInt32(0)); | |||
1454 | } else { | |||
1455 | break; | |||
1456 | } | |||
1457 | ++NumLayers; | |||
1458 | } while (ElementTy != TargetTy); | |||
1459 | if (ElementTy != TargetTy) | |||
1460 | Indices.erase(Indices.end() - NumLayers, Indices.end()); | |||
1461 | ||||
1462 | return buildGEP(IRB, BasePtr, Indices, NamePrefix); | |||
1463 | } | |||
1464 | ||||
1465 | /// Recursively compute indices for a natural GEP. | |||
1466 | /// | |||
1467 | /// This is the recursive step for getNaturalGEPWithOffset that walks down the | |||
1468 | /// element types adding appropriate indices for the GEP. | |||
1469 | static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, | |||
1470 | Value *Ptr, Type *Ty, APInt &Offset, | |||
1471 | Type *TargetTy, | |||
1472 | SmallVectorImpl<Value *> &Indices, | |||
1473 | const Twine &NamePrefix) { | |||
1474 | if (Offset == 0) | |||
1475 | return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, | |||
1476 | NamePrefix); | |||
1477 | ||||
1478 | // We can't recurse through pointer types. | |||
1479 | if (Ty->isPointerTy()) | |||
1480 | return nullptr; | |||
1481 | ||||
1482 | // We try to analyze GEPs over vectors here, but note that these GEPs are | |||
1483 | // extremely poorly defined currently. The long-term goal is to remove GEPing | |||
1484 | // over a vector from the IR completely. | |||
1485 | if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { | |||
1486 | unsigned ElementSizeInBits = | |||
1487 | DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize(); | |||
1488 | if (ElementSizeInBits % 8 != 0) { | |||
1489 | // GEPs over non-multiple of 8 size vector elements are invalid. | |||
1490 | return nullptr; | |||
1491 | } | |||
1492 | APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); | |||
1493 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||
1494 | if (NumSkippedElements.ugt(cast<FixedVectorType>(VecTy)->getNumElements())) | |||
1495 | return nullptr; | |||
1496 | Offset -= NumSkippedElements * ElementSize; | |||
1497 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||
1498 | return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), | |||
1499 | Offset, TargetTy, Indices, NamePrefix); | |||
1500 | } | |||
1501 | ||||
1502 | if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { | |||
1503 | Type *ElementTy = ArrTy->getElementType(); | |||
1504 | APInt ElementSize(Offset.getBitWidth(), | |||
1505 | DL.getTypeAllocSize(ElementTy).getFixedSize()); | |||
1506 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||
1507 | if (NumSkippedElements.ugt(ArrTy->getNumElements())) | |||
1508 | return nullptr; | |||
1509 | ||||
1510 | Offset -= NumSkippedElements * ElementSize; | |||
1511 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||
1512 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||
1513 | Indices, NamePrefix); | |||
1514 | } | |||
1515 | ||||
1516 | StructType *STy = dyn_cast<StructType>(Ty); | |||
1517 | if (!STy) | |||
1518 | return nullptr; | |||
1519 | ||||
1520 | const StructLayout *SL = DL.getStructLayout(STy); | |||
1521 | uint64_t StructOffset = Offset.getZExtValue(); | |||
1522 | if (StructOffset >= SL->getSizeInBytes()) | |||
1523 | return nullptr; | |||
1524 | unsigned Index = SL->getElementContainingOffset(StructOffset); | |||
1525 | Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); | |||
1526 | Type *ElementTy = STy->getElementType(Index); | |||
1527 | if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize())) | |||
1528 | return nullptr; // The offset points into alignment padding. | |||
1529 | ||||
1530 | Indices.push_back(IRB.getInt32(Index)); | |||
1531 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||
1532 | Indices, NamePrefix); | |||
1533 | } | |||
1534 | ||||
1535 | /// Get a natural GEP from a base pointer to a particular offset and | |||
1536 | /// resulting in a particular type. | |||
1537 | /// | |||
1538 | /// The goal is to produce a "natural" looking GEP that works with the existing | |||
1539 | /// composite types to arrive at the appropriate offset and element type for | |||
1540 | /// a pointer. TargetTy is the element type the returned GEP should point-to if | |||
1541 | /// possible. We recurse by decreasing Offset, adding the appropriate index to | |||
1542 | /// Indices, and setting Ty to the result subtype. | |||
1543 | /// | |||
1544 | /// If no natural GEP can be constructed, this function returns null. | |||
1545 | static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, | |||
1546 | Value *Ptr, APInt Offset, Type *TargetTy, | |||
1547 | SmallVectorImpl<Value *> &Indices, | |||
1548 | const Twine &NamePrefix) { | |||
1549 | PointerType *Ty = cast<PointerType>(Ptr->getType()); | |||
1550 | ||||
1551 | // Don't consider any GEPs through an i8* as natural unless the TargetTy is | |||
1552 | // an i8. | |||
1553 | if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) | |||
1554 | return nullptr; | |||
1555 | ||||
1556 | Type *ElementTy = Ty->getElementType(); | |||
1557 | if (!ElementTy->isSized()) | |||
1558 | return nullptr; // We can't GEP through an unsized element. | |||
1559 | if (isa<ScalableVectorType>(ElementTy)) | |||
1560 | return nullptr; | |||
1561 | APInt ElementSize(Offset.getBitWidth(), | |||
1562 | DL.getTypeAllocSize(ElementTy).getFixedSize()); | |||
1563 | if (ElementSize == 0) | |||
1564 | return nullptr; // Zero-length arrays can't help us build a natural GEP. | |||
1565 | APInt NumSkippedElements = Offset.sdiv(ElementSize); | |||
1566 | ||||
1567 | Offset -= NumSkippedElements * ElementSize; | |||
1568 | Indices.push_back(IRB.getInt(NumSkippedElements)); | |||
1569 | return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, | |||
1570 | Indices, NamePrefix); | |||
1571 | } | |||
1572 | ||||
1573 | /// Compute an adjusted pointer from Ptr by Offset bytes where the | |||
1574 | /// resulting pointer has PointerTy. | |||
1575 | /// | |||
1576 | /// This tries very hard to compute a "natural" GEP which arrives at the offset | |||
1577 | /// and produces the pointer type desired. Where it cannot, it will try to use | |||
1578 | /// the natural GEP to arrive at the offset and bitcast to the type. Where that | |||
1579 | /// fails, it will try to use an existing i8* and GEP to the byte offset and | |||
1580 | /// bitcast to the type. | |||
1581 | /// | |||
1582 | /// The strategy for finding the more natural GEPs is to peel off layers of the | |||
1583 | /// pointer, walking back through bit casts and GEPs, searching for a base | |||
1584 | /// pointer from which we can compute a natural GEP with the desired | |||
1585 | /// properties. The algorithm tries to fold as many constant indices into | |||
1586 | /// a single GEP as possible, thus making each GEP more independent of the | |||
1587 | /// surrounding code. | |||
1588 | static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, | |||
1589 | APInt Offset, Type *PointerTy, | |||
1590 | const Twine &NamePrefix) { | |||
1591 | // Even though we don't look through PHI nodes, we could be called on an | |||
1592 | // instruction in an unreachable block, which may be on a cycle. | |||
1593 | SmallPtrSet<Value *, 4> Visited; | |||
1594 | Visited.insert(Ptr); | |||
1595 | SmallVector<Value *, 4> Indices; | |||
1596 | ||||
1597 | // We may end up computing an offset pointer that has the wrong type. If we | |||
1598 | // never are able to compute one directly that has the correct type, we'll | |||
1599 | // fall back to it, so keep it and the base it was computed from around here. | |||
1600 | Value *OffsetPtr = nullptr; | |||
1601 | Value *OffsetBasePtr; | |||
1602 | ||||
1603 | // Remember any i8 pointer we come across to re-use if we need to do a raw | |||
1604 | // byte offset. | |||
1605 | Value *Int8Ptr = nullptr; | |||
1606 | APInt Int8PtrOffset(Offset.getBitWidth(), 0); | |||
1607 | ||||
1608 | PointerType *TargetPtrTy = cast<PointerType>(PointerTy); | |||
1609 | Type *TargetTy = TargetPtrTy->getElementType(); | |||
1610 | ||||
1611 | // As `addrspacecast` is , `Ptr` (the storage pointer) may have different | |||
1612 | // address space from the expected `PointerTy` (the pointer to be used). | |||
1613 | // Adjust the pointer type based the original storage pointer. | |||
1614 | auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace(); | |||
1615 | PointerTy = TargetTy->getPointerTo(AS); | |||
1616 | ||||
1617 | do { | |||
1618 | // First fold any existing GEPs into the offset. | |||
1619 | while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { | |||
1620 | APInt GEPOffset(Offset.getBitWidth(), 0); | |||
1621 | if (!GEP->accumulateConstantOffset(DL, GEPOffset)) | |||
1622 | break; | |||
1623 | Offset += GEPOffset; | |||
1624 | Ptr = GEP->getPointerOperand(); | |||
1625 | if (!Visited.insert(Ptr).second) | |||
1626 | break; | |||
1627 | } | |||
1628 | ||||
1629 | // See if we can perform a natural GEP here. | |||
1630 | Indices.clear(); | |||
1631 | if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy, | |||
1632 | Indices, NamePrefix)) { | |||
1633 | // If we have a new natural pointer at the offset, clear out any old | |||
1634 | // offset pointer we computed. Unless it is the base pointer or | |||
1635 | // a non-instruction, we built a GEP we don't need. Zap it. | |||
1636 | if (OffsetPtr && OffsetPtr != OffsetBasePtr) | |||
1637 | if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) { | |||
1638 | assert(I->use_empty() && "Built a GEP with uses some how!")((void)0); | |||
1639 | I->eraseFromParent(); | |||
1640 | } | |||
1641 | OffsetPtr = P; | |||
1642 | OffsetBasePtr = Ptr; | |||
1643 | // If we also found a pointer of the right type, we're done. | |||
1644 | if (P->getType() == PointerTy) | |||
1645 | break; | |||
1646 | } | |||
1647 | ||||
1648 | // Stash this pointer if we've found an i8*. | |||
1649 | if (Ptr->getType()->isIntegerTy(8)) { | |||
1650 | Int8Ptr = Ptr; | |||
1651 | Int8PtrOffset = Offset; | |||
1652 | } | |||
1653 | ||||
1654 | // Peel off a layer of the pointer and update the offset appropriately. | |||
1655 | if (Operator::getOpcode(Ptr) == Instruction::BitCast) { | |||
1656 | Ptr = cast<Operator>(Ptr)->getOperand(0); | |||
1657 | } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { | |||
1658 | if (GA->isInterposable()) | |||
1659 | break; | |||
1660 | Ptr = GA->getAliasee(); | |||
1661 | } else { | |||
1662 | break; | |||
1663 | } | |||
1664 | assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!")((void)0); | |||
1665 | } while (Visited.insert(Ptr).second); | |||
1666 | ||||
1667 | if (!OffsetPtr) { | |||
1668 | if (!Int8Ptr) { | |||
1669 | Int8Ptr = IRB.CreateBitCast( | |||
1670 | Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()), | |||
1671 | NamePrefix + "sroa_raw_cast"); | |||
1672 | Int8PtrOffset = Offset; | |||
1673 | } | |||
1674 | ||||
1675 | OffsetPtr = Int8PtrOffset == 0 | |||
1676 | ? Int8Ptr | |||
1677 | : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, | |||
1678 | IRB.getInt(Int8PtrOffset), | |||
1679 | NamePrefix + "sroa_raw_idx"); | |||
1680 | } | |||
1681 | Ptr = OffsetPtr; | |||
1682 | ||||
1683 | // On the off chance we were targeting i8*, guard the bitcast here. | |||
1684 | if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) { | |||
1685 | Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, | |||
1686 | TargetPtrTy, | |||
1687 | NamePrefix + "sroa_cast"); | |||
1688 | } | |||
1689 | ||||
1690 | return Ptr; | |||
1691 | } | |||
1692 | ||||
1693 | /// Compute the adjusted alignment for a load or store from an offset. | |||
1694 | static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { | |||
1695 | return commonAlignment(getLoadStoreAlignment(I), Offset); | |||
1696 | } | |||
1697 | ||||
1698 | /// Test whether we can convert a value from the old to the new type. | |||
1699 | /// | |||
1700 | /// This predicate should be used to guard calls to convertValue in order to | |||
1701 | /// ensure that we only try to convert viable values. The strategy is that we | |||
1702 | /// will peel off single element struct and array wrappings to get to an | |||
1703 | /// underlying value, and convert that value. | |||
1704 | static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { | |||
1705 | if (OldTy == NewTy) | |||
1706 | return true; | |||
1707 | ||||
1708 | // For integer types, we can't handle any bit-width differences. This would | |||
1709 | // break both vector conversions with extension and introduce endianness | |||
1710 | // issues when in conjunction with loads and stores. | |||
1711 | if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) { | |||
1712 | assert(cast<IntegerType>(OldTy)->getBitWidth() !=((void)0) | |||
1713 | cast<IntegerType>(NewTy)->getBitWidth() &&((void)0) | |||
1714 | "We can't have the same bitwidth for different int types")((void)0); | |||
1715 | return false; | |||
1716 | } | |||
1717 | ||||
1718 | if (DL.getTypeSizeInBits(NewTy).getFixedSize() != | |||
1719 | DL.getTypeSizeInBits(OldTy).getFixedSize()) | |||
1720 | return false; | |||
1721 | if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) | |||
1722 | return false; | |||
1723 | ||||
1724 | // We can convert pointers to integers and vice-versa. Same for vectors | |||
1725 | // of pointers and integers. | |||
1726 | OldTy = OldTy->getScalarType(); | |||
1727 | NewTy = NewTy->getScalarType(); | |||
1728 | if (NewTy->isPointerTy() || OldTy->isPointerTy()) { | |||
1729 | if (NewTy->isPointerTy() && OldTy->isPointerTy()) { | |||
1730 | unsigned OldAS = OldTy->getPointerAddressSpace(); | |||
1731 | unsigned NewAS = NewTy->getPointerAddressSpace(); | |||
1732 | // Convert pointers if they are pointers from the same address space or | |||
1733 | // different integral (not non-integral) address spaces with the same | |||
1734 | // pointer size. | |||
1735 | return OldAS == NewAS || | |||
1736 | (!DL.isNonIntegralAddressSpace(OldAS) && | |||
1737 | !DL.isNonIntegralAddressSpace(NewAS) && | |||
1738 | DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS)); | |||
1739 | } | |||
1740 | ||||
1741 | // We can convert integers to integral pointers, but not to non-integral | |||
1742 | // pointers. | |||
1743 | if (OldTy->isIntegerTy()) | |||
1744 | return !DL.isNonIntegralPointerType(NewTy); | |||
1745 | ||||
1746 | // We can convert integral pointers to integers, but non-integral pointers | |||
1747 | // need to remain pointers. | |||
1748 | if (!DL.isNonIntegralPointerType(OldTy)) | |||
1749 | return NewTy->isIntegerTy(); | |||
1750 | ||||
1751 | return false; | |||
1752 | } | |||
1753 | ||||
1754 | return true; | |||
1755 | } | |||
1756 | ||||
1757 | /// Generic routine to convert an SSA value to a value of a different | |||
1758 | /// type. | |||
1759 | /// | |||
1760 | /// This will try various different casting techniques, such as bitcasts, | |||
1761 | /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test | |||
1762 | /// two types for viability with this routine. | |||
1763 | static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, | |||
1764 | Type *NewTy) { | |||
1765 | Type *OldTy = V->getType(); | |||
1766 | assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type")((void)0); | |||
1767 | ||||
1768 | if (OldTy == NewTy) | |||
1769 | return V; | |||
1770 | ||||
1771 | assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&((void)0) | |||
1772 | "Integer types must be the exact same to convert.")((void)0); | |||
1773 | ||||
1774 | // See if we need inttoptr for this type pair. May require additional bitcast. | |||
1775 | if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { | |||
1776 | // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* | |||
1777 | // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*> | |||
1778 | // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*> | |||
1779 | // Directly handle i64 to i8* | |||
1780 | return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), | |||
1781 | NewTy); | |||
1782 | } | |||
1783 | ||||
1784 | // See if we need ptrtoint for this type pair. May require additional bitcast. | |||
1785 | if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { | |||
1786 | // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 | |||
1787 | // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32> | |||
1788 | // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32> | |||
1789 | // Expand i8* to i64 --> i8* to i64 to i64 | |||
1790 | return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), | |||
1791 | NewTy); | |||
1792 | } | |||
1793 | ||||
1794 | if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) { | |||
1795 | unsigned OldAS = OldTy->getPointerAddressSpace(); | |||
1796 | unsigned NewAS = NewTy->getPointerAddressSpace(); | |||
1797 | // To convert pointers with different address spaces (they are already | |||
1798 | // checked convertible, i.e. they have the same pointer size), so far we | |||
1799 | // cannot use `bitcast` (which has restrict on the same address space) or | |||
1800 | // `addrspacecast` (which is not always no-op casting). Instead, use a pair | |||
1801 | // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit | |||
1802 | // size. | |||
1803 | if (OldAS != NewAS) { | |||
1804 | assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS))((void)0); | |||
1805 | return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), | |||
1806 | NewTy); | |||
1807 | } | |||
1808 | } | |||
1809 | ||||
1810 | return IRB.CreateBitCast(V, NewTy); | |||
1811 | } | |||
1812 | ||||
1813 | /// Test whether the given slice use can be promoted to a vector. | |||
1814 | /// | |||
1815 | /// This function is called to test each entry in a partition which is slated | |||
1816 | /// for a single slice. | |||
1817 | static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, | |||
1818 | VectorType *Ty, | |||
1819 | uint64_t ElementSize, | |||
1820 | const DataLayout &DL) { | |||
1821 | // First validate the slice offsets. | |||
1822 | uint64_t BeginOffset = | |||
1823 | std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset(); | |||
1824 | uint64_t BeginIndex = BeginOffset / ElementSize; | |||
1825 | if (BeginIndex * ElementSize != BeginOffset || | |||
1826 | BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements()) | |||
1827 | return false; | |||
1828 | uint64_t EndOffset = | |||
1829 | std::min(S.endOffset(), P.endOffset()) - P.beginOffset(); | |||
1830 | uint64_t EndIndex = EndOffset / ElementSize; | |||
1831 | if (EndIndex * ElementSize != EndOffset || | |||
1832 | EndIndex > cast<FixedVectorType>(Ty)->getNumElements()) | |||
1833 | return false; | |||
1834 | ||||
1835 | assert(EndIndex > BeginIndex && "Empty vector!")((void)0); | |||
1836 | uint64_t NumElements = EndIndex - BeginIndex; | |||
1837 | Type *SliceTy = (NumElements == 1) | |||
1838 | ? Ty->getElementType() | |||
1839 | : FixedVectorType::get(Ty->getElementType(), NumElements); | |||
1840 | ||||
1841 | Type *SplitIntTy = | |||
1842 | Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); | |||
1843 | ||||
1844 | Use *U = S.getUse(); | |||
1845 | ||||
1846 | if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { | |||
1847 | if (MI->isVolatile()) | |||
1848 | return false; | |||
1849 | if (!S.isSplittable()) | |||
1850 | return false; // Skip any unsplittable intrinsics. | |||
1851 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { | |||
1852 | if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) | |||
1853 | return false; | |||
1854 | } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { | |||
1855 | // Disable vector promotion when there are loads or stores of an FCA. | |||
1856 | return false; | |||
1857 | } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||
1858 | if (LI->isVolatile()) | |||
1859 | return false; | |||
1860 | Type *LTy = LI->getType(); | |||
1861 | if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { | |||
1862 | assert(LTy->isIntegerTy())((void)0); | |||
1863 | LTy = SplitIntTy; | |||
1864 | } | |||
1865 | if (!canConvertValue(DL, SliceTy, LTy)) | |||
1866 | return false; | |||
1867 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||
1868 | if (SI->isVolatile()) | |||
1869 | return false; | |||
1870 | Type *STy = SI->getValueOperand()->getType(); | |||
1871 | if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { | |||
1872 | assert(STy->isIntegerTy())((void)0); | |||
1873 | STy = SplitIntTy; | |||
1874 | } | |||
1875 | if (!canConvertValue(DL, STy, SliceTy)) | |||
1876 | return false; | |||
1877 | } else { | |||
1878 | return false; | |||
1879 | } | |||
1880 | ||||
1881 | return true; | |||
1882 | } | |||
1883 | ||||
1884 | /// Test whether the given alloca partitioning and range of slices can be | |||
1885 | /// promoted to a vector. | |||
1886 | /// | |||
1887 | /// This is a quick test to check whether we can rewrite a particular alloca | |||
1888 | /// partition (and its newly formed alloca) into a vector alloca with only | |||
1889 | /// whole-vector loads and stores such that it could be promoted to a vector | |||
1890 | /// SSA value. We only can ensure this for a limited set of operations, and we | |||
1891 | /// don't want to do the rewrites unless we are confident that the result will | |||
1892 | /// be promotable, so we have an early test here. | |||
1893 | static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { | |||
1894 | // Collect the candidate types for vector-based promotion. Also track whether | |||
1895 | // we have different element types. | |||
1896 | SmallVector<VectorType *, 4> CandidateTys; | |||
1897 | Type *CommonEltTy = nullptr; | |||
1898 | bool HaveCommonEltTy = true; | |||
1899 | auto CheckCandidateType = [&](Type *Ty) { | |||
1900 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { | |||
1901 | // Return if bitcast to vectors is different for total size in bits. | |||
1902 | if (!CandidateTys.empty()) { | |||
1903 | VectorType *V = CandidateTys[0]; | |||
1904 | if (DL.getTypeSizeInBits(VTy).getFixedSize() != | |||
1905 | DL.getTypeSizeInBits(V).getFixedSize()) { | |||
1906 | CandidateTys.clear(); | |||
1907 | return; | |||
1908 | } | |||
1909 | } | |||
1910 | CandidateTys.push_back(VTy); | |||
1911 | if (!CommonEltTy) | |||
1912 | CommonEltTy = VTy->getElementType(); | |||
1913 | else if (CommonEltTy != VTy->getElementType()) | |||
1914 | HaveCommonEltTy = false; | |||
1915 | } | |||
1916 | }; | |||
1917 | // Consider any loads or stores that are the exact size of the slice. | |||
1918 | for (const Slice &S : P) | |||
1919 | if (S.beginOffset() == P.beginOffset() && | |||
1920 | S.endOffset() == P.endOffset()) { | |||
1921 | if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser())) | |||
1922 | CheckCandidateType(LI->getType()); | |||
1923 | else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) | |||
1924 | CheckCandidateType(SI->getValueOperand()->getType()); | |||
1925 | } | |||
1926 | ||||
1927 | // If we didn't find a vector type, nothing to do here. | |||
1928 | if (CandidateTys.empty()) | |||
1929 | return nullptr; | |||
1930 | ||||
1931 | // Remove non-integer vector types if we had multiple common element types. | |||
1932 | // FIXME: It'd be nice to replace them with integer vector types, but we can't | |||
1933 | // do that until all the backends are known to produce good code for all | |||
1934 | // integer vector types. | |||
1935 | if (!HaveCommonEltTy) { | |||
1936 | llvm::erase_if(CandidateTys, [](VectorType *VTy) { | |||
1937 | return !VTy->getElementType()->isIntegerTy(); | |||
1938 | }); | |||
1939 | ||||
1940 | // If there were no integer vector types, give up. | |||
1941 | if (CandidateTys.empty()) | |||
1942 | return nullptr; | |||
1943 | ||||
1944 | // Rank the remaining candidate vector types. This is easy because we know | |||
1945 | // they're all integer vectors. We sort by ascending number of elements. | |||
1946 | auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) { | |||
1947 | (void)DL; | |||
1948 | assert(DL.getTypeSizeInBits(RHSTy).getFixedSize() ==((void)0) | |||
1949 | DL.getTypeSizeInBits(LHSTy).getFixedSize() &&((void)0) | |||
1950 | "Cannot have vector types of different sizes!")((void)0); | |||
1951 | assert(RHSTy->getElementType()->isIntegerTy() &&((void)0) | |||
1952 | "All non-integer types eliminated!")((void)0); | |||
1953 | assert(LHSTy->getElementType()->isIntegerTy() &&((void)0) | |||
1954 | "All non-integer types eliminated!")((void)0); | |||
1955 | return cast<FixedVectorType>(RHSTy)->getNumElements() < | |||
1956 | cast<FixedVectorType>(LHSTy)->getNumElements(); | |||
1957 | }; | |||
1958 | llvm::sort(CandidateTys, RankVectorTypes); | |||
1959 | CandidateTys.erase( | |||
1960 | std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes), | |||
1961 | CandidateTys.end()); | |||
1962 | } else { | |||
1963 | // The only way to have the same element type in every vector type is to | |||
1964 | // have the same vector type. Check that and remove all but one. | |||
1965 | #ifndef NDEBUG1 | |||
1966 | for (VectorType *VTy : CandidateTys) { | |||
1967 | assert(VTy->getElementType() == CommonEltTy &&((void)0) | |||
1968 | "Unaccounted for element type!")((void)0); | |||
1969 | assert(VTy == CandidateTys[0] &&((void)0) | |||
1970 | "Different vector types with the same element type!")((void)0); | |||
1971 | } | |||
1972 | #endif | |||
1973 | CandidateTys.resize(1); | |||
1974 | } | |||
1975 | ||||
1976 | // Try each vector type, and return the one which works. | |||
1977 | auto CheckVectorTypeForPromotion = [&](VectorType *VTy) { | |||
1978 | uint64_t ElementSize = | |||
1979 | DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize(); | |||
1980 | ||||
1981 | // While the definition of LLVM vectors is bitpacked, we don't support sizes | |||
1982 | // that aren't byte sized. | |||
1983 | if (ElementSize % 8) | |||
1984 | return false; | |||
1985 | assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&((void)0) | |||
1986 | "vector size not a multiple of element size?")((void)0); | |||
1987 | ElementSize /= 8; | |||
1988 | ||||
1989 | for (const Slice &S : P) | |||
1990 | if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL)) | |||
1991 | return false; | |||
1992 | ||||
1993 | for (const Slice *S : P.splitSliceTails()) | |||
1994 | if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL)) | |||
1995 | return false; | |||
1996 | ||||
1997 | return true; | |||
1998 | }; | |||
1999 | for (VectorType *VTy : CandidateTys) | |||
2000 | if (CheckVectorTypeForPromotion(VTy)) | |||
2001 | return VTy; | |||
2002 | ||||
2003 | return nullptr; | |||
2004 | } | |||
2005 | ||||
2006 | /// Test whether a slice of an alloca is valid for integer widening. | |||
2007 | /// | |||
2008 | /// This implements the necessary checking for the \c isIntegerWideningViable | |||
2009 | /// test below on a single slice of the alloca. | |||
2010 | static bool isIntegerWideningViableForSlice(const Slice &S, | |||
2011 | uint64_t AllocBeginOffset, | |||
2012 | Type *AllocaTy, | |||
2013 | const DataLayout &DL, | |||
2014 | bool &WholeAllocaOp) { | |||
2015 | uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedSize(); | |||
2016 | ||||
2017 | uint64_t RelBegin = S.beginOffset() - AllocBeginOffset; | |||
2018 | uint64_t RelEnd = S.endOffset() - AllocBeginOffset; | |||
2019 | ||||
2020 | // We can't reasonably handle cases where the load or store extends past | |||
2021 | // the end of the alloca's type and into its padding. | |||
2022 | if (RelEnd > Size) | |||
2023 | return false; | |||
2024 | ||||
2025 | Use *U = S.getUse(); | |||
2026 | ||||
2027 | if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { | |||
2028 | if (LI->isVolatile()) | |||
2029 | return false; | |||
2030 | // We can't handle loads that extend past the allocated memory. | |||
2031 | if (DL.getTypeStoreSize(LI->getType()).getFixedSize() > Size) | |||
2032 | return false; | |||
2033 | // So far, AllocaSliceRewriter does not support widening split slice tails | |||
2034 | // in rewriteIntegerLoad. | |||
2035 | if (S.beginOffset() < AllocBeginOffset) | |||
2036 | return false; | |||
2037 | // Note that we don't count vector loads or stores as whole-alloca | |||
2038 | // operations which enable integer widening because we would prefer to use | |||
2039 | // vector widening instead. | |||
2040 | if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size) | |||
2041 | WholeAllocaOp = true; | |||
2042 | if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { | |||
2043 | if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize()) | |||
2044 | return false; | |||
2045 | } else if (RelBegin != 0 || RelEnd != Size || | |||
2046 | !canConvertValue(DL, AllocaTy, LI->getType())) { | |||
2047 | // Non-integer loads need to be convertible from the alloca type so that | |||
2048 | // they are promotable. | |||
2049 | return false; | |||
2050 | } | |||
2051 | } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { | |||
2052 | Type *ValueTy = SI->getValueOperand()->getType(); | |||
2053 | if (SI->isVolatile()) | |||
2054 | return false; | |||
2055 | // We can't handle stores that extend past the allocated memory. | |||
2056 | if (DL.getTypeStoreSize(ValueTy).getFixedSize() > Size) | |||
2057 | return false; | |||
2058 | // So far, AllocaSliceRewriter does not support widening split slice tails | |||
2059 | // in rewriteIntegerStore. | |||
2060 | if (S.beginOffset() < AllocBeginOffset) | |||
2061 | return false; | |||
2062 | // Note that we don't count vector loads or stores as whole-alloca | |||
2063 | // operations which enable integer widening because we would prefer to use | |||
2064 | // vector widening instead. | |||
2065 | if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size) | |||
2066 | WholeAllocaOp = true; | |||
2067 | if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { | |||
2068 | if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize()) | |||
2069 | return false; | |||
2070 | } else if (RelBegin != 0 || RelEnd != Size || | |||
2071 | !canConvertValue(DL, ValueTy, AllocaTy)) { | |||
2072 | // Non-integer stores need to be convertible to the alloca type so that | |||
2073 | // they are promotable. | |||
2074 | return false; | |||
2075 | } | |||
2076 | } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { | |||
2077 | if (MI->isVolatile() || !isa<Constant>(MI->getLength())) | |||
2078 | return false; | |||
2079 | if (!S.isSplittable()) | |||
2080 | return false; // Skip any unsplittable intrinsics. | |||
2081 | } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { | |||
2082 | if (!II->isLifetimeStartOrEnd() && !II->isDroppable()) | |||
2083 | return false; | |||
2084 | } else { | |||
2085 | return false; | |||
2086 | } | |||
2087 | ||||
2088 | return true; | |||
2089 | } | |||
2090 | ||||
2091 | /// Test whether the given alloca partition's integer operations can be | |||
2092 | /// widened to promotable ones. | |||
2093 | /// | |||
2094 | /// This is a quick test to check whether we can rewrite the integer loads and | |||
2095 | /// stores to a particular alloca into wider loads and stores and be able to | |||
2096 | /// promote the resulting alloca. | |||
2097 | static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, | |||
2098 | const DataLayout &DL) { | |||
2099 | uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedSize(); | |||
2100 | // Don't create integer types larger than the maximum bitwidth. | |||
2101 | if (SizeInBits > IntegerType::MAX_INT_BITS) | |||
2102 | return false; | |||
2103 | ||||
2104 | // Don't try to handle allocas with bit-padding. | |||
2105 | if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize()) | |||
2106 | return false; | |||
2107 | ||||
2108 | // We need to ensure that an integer type with the appropriate bitwidth can | |||
2109 | // be converted to the alloca type, whatever that is. We don't want to force | |||
2110 | // the alloca itself to have an integer type if there is a more suitable one. | |||
2111 | Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits); | |||
2112 | if (!canConvertValue(DL, AllocaTy, IntTy) || | |||
2113 | !canConvertValue(DL, IntTy, AllocaTy)) | |||
2114 | return false; | |||
2115 | ||||
2116 | // While examining uses, we ensure that the alloca has a covering load or | |||
2117 | // store. We don't want to widen the integer operations only to fail to | |||
2118 | // promote due to some other unsplittable entry (which we may make splittable | |||
2119 | // later). However, if there are only splittable uses, go ahead and assume | |||
2120 | // that we cover the alloca. | |||
2121 | // FIXME: We shouldn't consider split slices that happen to start in the | |||
2122 | // partition here... | |||
2123 | bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits); | |||
2124 | ||||
2125 | for (const Slice &S : P) | |||
2126 | if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL, | |||
2127 | WholeAllocaOp)) | |||
2128 | return false; | |||
2129 | ||||
2130 | for (const Slice *S : P.splitSliceTails()) | |||
2131 | if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL, | |||
2132 | WholeAllocaOp)) | |||
2133 | return false; | |||
2134 | ||||
2135 | return WholeAllocaOp; | |||
2136 | } | |||
2137 | ||||
2138 | static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, | |||
2139 | IntegerType *Ty, uint64_t Offset, | |||
2140 | const Twine &Name) { | |||
2141 | LLVM_DEBUG(dbgs() << " start: " << *V << "\n")do { } while (false); | |||
2142 | IntegerType *IntTy = cast<IntegerType>(V->getType()); | |||
2143 | assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=((void)0) | |||
2144 | DL.getTypeStoreSize(IntTy).getFixedSize() &&((void)0) | |||
2145 | "Element extends past full value")((void)0); | |||
2146 | uint64_t ShAmt = 8 * Offset; | |||
2147 | if (DL.isBigEndian()) | |||
2148 | ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() - | |||
2149 | DL.getTypeStoreSize(Ty).getFixedSize() - Offset); | |||
2150 | if (ShAmt) { | |||
2151 | V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); | |||
2152 | LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n")do { } while (false); | |||
2153 | } | |||
2154 | assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&((void)0) | |||
2155 | "Cannot extract to a larger integer!")((void)0); | |||
2156 | if (Ty != IntTy) { | |||
2157 | V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); | |||
2158 | LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n")do { } while (false); | |||
2159 | } | |||
2160 | return V; | |||
2161 | } | |||
2162 | ||||
2163 | static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, | |||
2164 | Value *V, uint64_t Offset, const Twine &Name) { | |||
2165 | IntegerType *IntTy = cast<IntegerType>(Old->getType()); | |||
2166 | IntegerType *Ty = cast<IntegerType>(V->getType()); | |||
2167 | assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&((void)0) | |||
2168 | "Cannot insert a larger integer!")((void)0); | |||
2169 | LLVM_DEBUG(dbgs() << " start: " << *V << "\n")do { } while (false); | |||
2170 | if (Ty != IntTy) { | |||
2171 | V = IRB.CreateZExt(V, IntTy, Name + ".ext"); | |||
2172 | LLVM_DEBUG(dbgs() << " extended: " << *V << "\n")do { } while (false); | |||
2173 | } | |||
2174 | assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=((void)0) | |||
2175 | DL.getTypeStoreSize(IntTy).getFixedSize() &&((void)0) | |||
2176 | "Element store outside of alloca store")((void)0); | |||
2177 | uint64_t ShAmt = 8 * Offset; | |||
2178 | if (DL.isBigEndian()) | |||
2179 | ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() - | |||
2180 | DL.getTypeStoreSize(Ty).getFixedSize() - Offset); | |||
2181 | if (ShAmt) { | |||
2182 | V = IRB.CreateShl(V, ShAmt, Name + ".shift"); | |||
2183 | LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n")do { } while (false); | |||
2184 | } | |||
2185 | ||||
2186 | if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { | |||
2187 | APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); | |||
2188 | Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); | |||
2189 | LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n")do { } while (false); | |||
2190 | V = IRB.CreateOr(Old, V, Name + ".insert"); | |||
2191 | LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n")do { } while (false); | |||
2192 | } | |||
2193 | return V; | |||
2194 | } | |||
2195 | ||||
2196 | static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, | |||
2197 | unsigned EndIndex, const Twine &Name) { | |||
2198 | auto *VecTy = cast<FixedVectorType>(V->getType()); | |||
2199 | unsigned NumElements = EndIndex - BeginIndex; | |||
2200 | assert(NumElements <= VecTy->getNumElements() && "Too many elements!")((void)0); | |||
2201 | ||||
2202 | if (NumElements == VecTy->getNumElements()) | |||
2203 | return V; | |||
2204 | ||||
2205 | if (NumElements == 1) { | |||
2206 | V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), | |||
2207 | Name + ".extract"); | |||
2208 | LLVM_DEBUG(dbgs() << " extract: " << *V << "\n")do { } while (false); | |||
2209 | return V; | |||
2210 | } | |||
2211 | ||||
2212 | SmallVector<int, 8> Mask; | |||
2213 | Mask.reserve(NumElements); | |||
2214 | for (unsigned i = BeginIndex; i != EndIndex; ++i) | |||
2215 | Mask.push_back(i); | |||
2216 | V = IRB.CreateShuffleVector(V, Mask, Name + ".extract"); | |||
2217 | LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n")do { } while (false); | |||
2218 | return V; | |||
2219 | } | |||
2220 | ||||
2221 | static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, | |||
2222 | unsigned BeginIndex, const Twine &Name) { | |||
2223 | VectorType *VecTy = cast<VectorType>(Old->getType()); | |||
2224 | assert(VecTy && "Can only insert a vector into a vector")((void)0); | |||
2225 | ||||
2226 | VectorType *Ty = dyn_cast<VectorType>(V->getType()); | |||
2227 | if (!Ty) { | |||
2228 | // Single element to insert. | |||
2229 | V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), | |||
2230 | Name + ".insert"); | |||
2231 | LLVM_DEBUG(dbgs() << " insert: " << *V << "\n")do { } while (false); | |||
2232 | return V; | |||
2233 | } | |||
2234 | ||||
2235 | assert(cast<FixedVectorType>(Ty)->getNumElements() <=((void)0) | |||
2236 | cast<FixedVectorType>(VecTy)->getNumElements() &&((void)0) | |||
2237 | "Too many elements!")((void)0); | |||
2238 | if (cast<FixedVectorType>(Ty)->getNumElements() == | |||
2239 | cast<FixedVectorType>(VecTy)->getNumElements()) { | |||
2240 | assert(V->getType() == VecTy && "Vector type mismatch")((void)0); | |||
2241 | return V; | |||
2242 | } | |||
2243 | unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements(); | |||
2244 | ||||
2245 | // When inserting a smaller vector into the larger to store, we first | |||
2246 | // use a shuffle vector to widen it with undef elements, and then | |||
2247 | // a second shuffle vector to select between the loaded vector and the | |||
2248 | // incoming vector. | |||
2249 | SmallVector<int, 8> Mask; | |||
2250 | Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); | |||
2251 | for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) | |||
2252 | if (i >= BeginIndex && i < EndIndex) | |||
2253 | Mask.push_back(i - BeginIndex); | |||
2254 | else | |||
2255 | Mask.push_back(-1); | |||
2256 | V = IRB.CreateShuffleVector(V, Mask, Name + ".expand"); | |||
2257 | LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n")do { } while (false); | |||
2258 | ||||
2259 | SmallVector<Constant *, 8> Mask2; | |||
2260 | Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements()); | |||
2261 | for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i) | |||
2262 | Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); | |||
2263 | ||||
2264 | V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend"); | |||
2265 | ||||
2266 | LLVM_DEBUG(dbgs() << " blend: " << *V << "\n")do { } while (false); | |||
2267 | return V; | |||
2268 | } | |||
2269 | ||||
2270 | /// Visitor to rewrite instructions using p particular slice of an alloca | |||
2271 | /// to use a new alloca. | |||
2272 | /// | |||
2273 | /// Also implements the rewriting to vector-based accesses when the partition | |||
2274 | /// passes the isVectorPromotionViable predicate. Most of the rewriting logic | |||
2275 | /// lives here. | |||
2276 | class llvm::sroa::AllocaSliceRewriter | |||
2277 | : public InstVisitor<AllocaSliceRewriter, bool> { | |||
2278 | // Befriend the base class so it can delegate to private visit methods. | |||
2279 | friend class InstVisitor<AllocaSliceRewriter, bool>; | |||
2280 | ||||
2281 | using Base = InstVisitor<AllocaSliceRewriter, bool>; | |||
2282 | ||||
2283 | const DataLayout &DL; | |||
2284 | AllocaSlices &AS; | |||
2285 | SROA &Pass; | |||
2286 | AllocaInst &OldAI, &NewAI; | |||
2287 | const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; | |||
2288 | Type *NewAllocaTy; | |||
2289 | ||||
2290 | // This is a convenience and flag variable that will be null unless the new | |||
2291 | // alloca's integer operations should be widened to this integer type due to | |||
2292 | // passing isIntegerWideningViable above. If it is non-null, the desired | |||
2293 | // integer type will be stored here for easy access during rewriting. | |||
2294 | IntegerType *IntTy; | |||
2295 | ||||
2296 | // If we are rewriting an alloca partition which can be written as pure | |||
2297 | // vector operations, we stash extra information here. When VecTy is | |||
2298 | // non-null, we have some strict guarantees about the rewritten alloca: | |||
2299 | // - The new alloca is exactly the size of the vector type here. | |||
2300 | // - The accesses all either map to the entire vector or to a single | |||
2301 | // element. | |||
2302 | // - The set of accessing instructions is only one of those handled above | |||
2303 | // in isVectorPromotionViable. Generally these are the same access kinds | |||
2304 | // which are promotable via mem2reg. | |||
2305 | VectorType *VecTy; | |||
2306 | Type *ElementTy; | |||
2307 | uint64_t ElementSize; | |||
2308 | ||||
2309 | // The original offset of the slice currently being rewritten relative to | |||
2310 | // the original alloca. | |||
2311 | uint64_t BeginOffset = 0; | |||
2312 | uint64_t EndOffset = 0; | |||
2313 | ||||
2314 | // The new offsets of the slice currently being rewritten relative to the | |||
2315 | // original alloca. | |||
2316 | uint64_t NewBeginOffset = 0, NewEndOffset = 0; | |||
2317 | ||||
2318 | uint64_t SliceSize = 0; | |||
2319 | bool IsSplittable = false; | |||
2320 | bool IsSplit = false; | |||
2321 | Use *OldUse = nullptr; | |||
2322 | Instruction *OldPtr = nullptr; | |||
2323 | ||||
2324 | // Track post-rewrite users which are PHI nodes and Selects. | |||
2325 | SmallSetVector<PHINode *, 8> &PHIUsers; | |||
2326 | SmallSetVector<SelectInst *, 8> &SelectUsers; | |||
2327 | ||||
2328 | // Utility IR builder, whose name prefix is setup for each visited use, and | |||
2329 | // the insertion point is set to point to the user. | |||
2330 | IRBuilderTy IRB; | |||
2331 | ||||
2332 | public: | |||
2333 | AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, | |||
2334 | AllocaInst &OldAI, AllocaInst &NewAI, | |||
2335 | uint64_t NewAllocaBeginOffset, | |||
2336 | uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, | |||
2337 | VectorType *PromotableVecTy, | |||
2338 | SmallSetVector<PHINode *, 8> &PHIUsers, | |||
2339 | SmallSetVector<SelectInst *, 8> &SelectUsers) | |||
2340 | : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), | |||
2341 | NewAllocaBeginOffset(NewAllocaBeginOffset), | |||
2342 | NewAllocaEndOffset(NewAllocaEndOffset), | |||
2343 | NewAllocaTy(NewAI.getAllocatedType()), | |||
2344 | IntTy( | |||
2345 | IsIntegerPromotable | |||
2346 | ? Type::getIntNTy(NewAI.getContext(), | |||
2347 | DL.getTypeSizeInBits(NewAI.getAllocatedType()) | |||
2348 | .getFixedSize()) | |||
2349 | : nullptr), | |||
2350 | VecTy(PromotableVecTy), | |||
2351 | ElementTy(VecTy ? VecTy->getElementType() : nullptr), | |||
2352 | ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8 | |||
2353 | : 0), | |||
2354 | PHIUsers(PHIUsers), SelectUsers(SelectUsers), | |||
2355 | IRB(NewAI.getContext(), ConstantFolder()) { | |||
2356 | if (VecTy) { | |||
2357 | assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 &&((void)0) | |||
2358 | "Only multiple-of-8 sized vector elements are viable")((void)0); | |||
2359 | ++NumVectorized; | |||
2360 | } | |||
2361 | assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy))((void)0); | |||
2362 | } | |||
2363 | ||||
2364 | bool visit(AllocaSlices::const_iterator I) { | |||
2365 | bool CanSROA = true; | |||
2366 | BeginOffset = I->beginOffset(); | |||
2367 | EndOffset = I->endOffset(); | |||
2368 | IsSplittable = I->isSplittable(); | |||
2369 | IsSplit = | |||
2370 | BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset; | |||
2371 | LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""))do { } while (false); | |||
2372 | LLVM_DEBUG(AS.printSlice(dbgs(), I, ""))do { } while (false); | |||
2373 | LLVM_DEBUG(dbgs() << "\n")do { } while (false); | |||
2374 | ||||
2375 | // Compute the intersecting offset range. | |||
2376 | assert(BeginOffset < NewAllocaEndOffset)((void)0); | |||
2377 | assert(EndOffset > NewAllocaBeginOffset)((void)0); | |||
2378 | NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset); | |||
2379 | NewEndOffset = std::min(EndOffset, NewAllocaEndOffset); | |||
2380 | ||||
2381 | SliceSize = NewEndOffset - NewBeginOffset; | |||
2382 | ||||
2383 | OldUse = I->getUse(); | |||
2384 | OldPtr = cast<Instruction>(OldUse->get()); | |||
2385 | ||||
2386 | Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); | |||
2387 | IRB.SetInsertPoint(OldUserI); | |||
2388 | IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); | |||
2389 | IRB.getInserter().SetNamePrefix( | |||
2390 | Twine(NewAI.getName()) + "." + Twine(BeginOffset) + "."); | |||
2391 | ||||
2392 | CanSROA &= visit(cast<Instruction>(OldUse->getUser())); | |||
2393 | if (VecTy || IntTy) | |||
2394 | assert(CanSROA)((void)0); | |||
2395 | return CanSROA; | |||
2396 | } | |||
2397 | ||||
2398 | private: | |||
2399 | // Make sure the other visit overloads are visible. | |||
2400 | using Base::visit; | |||
2401 | ||||
2402 | // Every instruction which can end up as a user must have a rewrite rule. | |||
2403 | bool visitInstruction(Instruction &I) { | |||
2404 | LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n")do { } while (false); | |||
2405 | llvm_unreachable("No rewrite rule for this instruction!")__builtin_unreachable(); | |||
2406 | } | |||
2407 | ||||
2408 | Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) { | |||
2409 | // Note that the offset computation can use BeginOffset or NewBeginOffset | |||
2410 | // interchangeably for unsplit slices. | |||
2411 | assert(IsSplit || BeginOffset == NewBeginOffset)((void)0); | |||
2412 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||
2413 | ||||
2414 | #ifndef NDEBUG1 | |||
2415 | StringRef OldName = OldPtr->getName(); | |||
2416 | // Skip through the last '.sroa.' component of the name. | |||
2417 | size_t LastSROAPrefix = OldName.rfind(".sroa."); | |||
2418 | if (LastSROAPrefix != StringRef::npos) { | |||
2419 | OldName = OldName.substr(LastSROAPrefix + strlen(".sroa.")); | |||
2420 | // Look for an SROA slice index. | |||
2421 | size_t IndexEnd = OldName.find_first_not_of("0123456789"); | |||
2422 | if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') { | |||
2423 | // Strip the index and look for the offset. | |||
2424 | OldName = OldName.substr(IndexEnd + 1); | |||
2425 | size_t OffsetEnd = OldName.find_first_not_of("0123456789"); | |||
2426 | if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.') | |||
2427 | // Strip the offset. | |||
2428 | OldName = OldName.substr(OffsetEnd + 1); | |||
2429 | } | |||
2430 | } | |||
2431 | // Strip any SROA suffixes as well. | |||
2432 | OldName = OldName.substr(0, OldName.find(".sroa_")); | |||
2433 | #endif | |||
2434 | ||||
2435 | return getAdjustedPtr(IRB, DL, &NewAI, | |||
2436 | APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset), | |||
2437 | PointerTy, | |||
2438 | #ifndef NDEBUG1 | |||
2439 | Twine(OldName) + "." | |||
2440 | #else | |||
2441 | Twine() | |||
2442 | #endif | |||
2443 | ); | |||
2444 | } | |||
2445 | ||||
2446 | /// Compute suitable alignment to access this slice of the *new* | |||
2447 | /// alloca. | |||
2448 | /// | |||
2449 | /// You can optionally pass a type to this routine and if that type's ABI | |||
2450 | /// alignment is itself suitable, this will return zero. | |||
2451 | Align getSliceAlign() { | |||
2452 | return commonAlignment(NewAI.getAlign(), | |||
2453 | NewBeginOffset - NewAllocaBeginOffset); | |||
2454 | } | |||
2455 | ||||
2456 | unsigned getIndex(uint64_t Offset) { | |||
2457 | assert(VecTy && "Can only call getIndex when rewriting a vector")((void)0); | |||
2458 | uint64_t RelOffset = Offset - NewAllocaBeginOffset; | |||
2459 | assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds")((void)0); | |||
2460 | uint32_t Index = RelOffset / ElementSize; | |||
2461 | assert(Index * ElementSize == RelOffset)((void)0); | |||
2462 | return Index; | |||
2463 | } | |||
2464 | ||||
2465 | void deleteIfTriviallyDead(Value *V) { | |||
2466 | Instruction *I = cast<Instruction>(V); | |||
2467 | if (isInstructionTriviallyDead(I)) | |||
2468 | Pass.DeadInsts.push_back(I); | |||
2469 | } | |||
2470 | ||||
2471 | Value *rewriteVectorizedLoadInst(LoadInst &LI) { | |||
2472 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||
2473 | unsigned EndIndex = getIndex(NewEndOffset); | |||
2474 | assert(EndIndex > BeginIndex && "Empty vector!")((void)0); | |||
2475 | ||||
2476 | LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2477 | NewAI.getAlign(), "load"); | |||
2478 | ||||
2479 | Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, | |||
2480 | LLVMContext::MD_access_group}); | |||
2481 | return extractVector(IRB, Load, BeginIndex, EndIndex, "vec"); | |||
2482 | } | |||
2483 | ||||
2484 | Value *rewriteIntegerLoad(LoadInst &LI) { | |||
2485 | assert(IntTy && "We cannot insert an integer to the alloca")((void)0); | |||
2486 | assert(!LI.isVolatile())((void)0); | |||
2487 | Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2488 | NewAI.getAlign(), "load"); | |||
2489 | V = convertValue(DL, IRB, V, IntTy); | |||
2490 | assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")((void)0); | |||
2491 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||
2492 | if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) { | |||
2493 | IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8); | |||
2494 | V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract"); | |||
2495 | } | |||
2496 | // It is possible that the extracted type is not the load type. This | |||
2497 | // happens if there is a load past the end of the alloca, and as | |||
2498 | // a consequence the slice is narrower but still a candidate for integer | |||
2499 | // lowering. To handle this case, we just zero extend the extracted | |||
2500 | // integer. | |||
2501 | assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&((void)0) | |||
2502 | "Can only handle an extract for an overly wide load")((void)0); | |||
2503 | if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8) | |||
2504 | V = IRB.CreateZExt(V, LI.getType()); | |||
2505 | return V; | |||
2506 | } | |||
2507 | ||||
2508 | bool visitLoadInst(LoadInst &LI) { | |||
2509 | LLVM_DEBUG(dbgs() << " original: " << LI << "\n")do { } while (false); | |||
2510 | Value *OldOp = LI.getOperand(0); | |||
2511 | assert(OldOp == OldPtr)((void)0); | |||
2512 | ||||
2513 | AAMDNodes AATags; | |||
2514 | LI.getAAMetadata(AATags); | |||
2515 | ||||
2516 | unsigned AS = LI.getPointerAddressSpace(); | |||
2517 | ||||
2518 | Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) | |||
2519 | : LI.getType(); | |||
2520 | const bool IsLoadPastEnd = | |||
2521 | DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize; | |||
2522 | bool IsPtrAdjusted = false; | |||
2523 | Value *V; | |||
2524 | if (VecTy) { | |||
2525 | V = rewriteVectorizedLoadInst(LI); | |||
2526 | } else if (IntTy && LI.getType()->isIntegerTy()) { | |||
2527 | V = rewriteIntegerLoad(LI); | |||
2528 | } else if (NewBeginOffset == NewAllocaBeginOffset && | |||
2529 | NewEndOffset == NewAllocaEndOffset && | |||
2530 | (canConvertValue(DL, NewAllocaTy, TargetTy) || | |||
2531 | (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && | |||
2532 | TargetTy->isIntegerTy()))) { | |||
2533 | LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2534 | NewAI.getAlign(), LI.isVolatile(), | |||
2535 | LI.getName()); | |||
2536 | if (AATags) | |||
2537 | NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2538 | if (LI.isVolatile()) | |||
2539 | NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); | |||
2540 | if (NewLI->isAtomic()) | |||
2541 | NewLI->setAlignment(LI.getAlign()); | |||
2542 | ||||
2543 | // Any !nonnull metadata or !range metadata on the old load is also valid | |||
2544 | // on the new load. This is even true in some cases even when the loads | |||
2545 | // are different types, for example by mapping !nonnull metadata to | |||
2546 | // !range metadata by modeling the null pointer constant converted to the | |||
2547 | // integer type. | |||
2548 | // FIXME: Add support for range metadata here. Currently the utilities | |||
2549 | // for this don't propagate range metadata in trivial cases from one | |||
2550 | // integer load to another, don't handle non-addrspace-0 null pointers | |||
2551 | // correctly, and don't have any support for mapping ranges as the | |||
2552 | // integer type becomes winder or narrower. | |||
2553 | if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) | |||
2554 | copyNonnullMetadata(LI, N, *NewLI); | |||
2555 | ||||
2556 | // Try to preserve nonnull metadata | |||
2557 | V = NewLI; | |||
2558 | ||||
2559 | // If this is an integer load past the end of the slice (which means the | |||
2560 | // bytes outside the slice are undef or this load is dead) just forcibly | |||
2561 | // fix the integer size with correct handling of endianness. | |||
2562 | if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy)) | |||
2563 | if (auto *TITy = dyn_cast<IntegerType>(TargetTy)) | |||
2564 | if (AITy->getBitWidth() < TITy->getBitWidth()) { | |||
2565 | V = IRB.CreateZExt(V, TITy, "load.ext"); | |||
2566 | if (DL.isBigEndian()) | |||
2567 | V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(), | |||
2568 | "endian_shift"); | |||
2569 | } | |||
2570 | } else { | |||
2571 | Type *LTy = TargetTy->getPointerTo(AS); | |||
2572 | LoadInst *NewLI = | |||
2573 | IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), | |||
2574 | getSliceAlign(), LI.isVolatile(), LI.getName()); | |||
2575 | if (AATags) | |||
2576 | NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2577 | if (LI.isVolatile()) | |||
2578 | NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); | |||
2579 | NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, | |||
2580 | LLVMContext::MD_access_group}); | |||
2581 | ||||
2582 | V = NewLI; | |||
2583 | IsPtrAdjusted = true; | |||
2584 | } | |||
2585 | V = convertValue(DL, IRB, V, TargetTy); | |||
2586 | ||||
2587 | if (IsSplit) { | |||
2588 | assert(!LI.isVolatile())((void)0); | |||
2589 | assert(LI.getType()->isIntegerTy() &&((void)0) | |||
2590 | "Only integer type loads and stores are split")((void)0); | |||
2591 | assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() &&((void)0) | |||
2592 | "Split load isn't smaller than original load")((void)0); | |||
2593 | assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&((void)0) | |||
2594 | "Non-byte-multiple bit width")((void)0); | |||
2595 | // Move the insertion point just past the load so that we can refer to it. | |||
2596 | IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI))); | |||
2597 | // Create a placeholder value with the same type as LI to use as the | |||
2598 | // basis for the new value. This allows us to replace the uses of LI with | |||
2599 | // the computed value, and then replace the placeholder with LI, leaving | |||
2600 | // LI only used for this computation. | |||
2601 | Value *Placeholder = new LoadInst( | |||
2602 | LI.getType(), UndefValue::get(LI.getType()->getPointerTo(AS)), "", | |||
2603 | false, Align(1)); | |||
2604 | V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset, | |||
2605 | "insert"); | |||
2606 | LI.replaceAllUsesWith(V); | |||
2607 | Placeholder->replaceAllUsesWith(&LI); | |||
2608 | Placeholder->deleteValue(); | |||
2609 | } else { | |||
2610 | LI.replaceAllUsesWith(V); | |||
2611 | } | |||
2612 | ||||
2613 | Pass.DeadInsts.push_back(&LI); | |||
2614 | deleteIfTriviallyDead(OldOp); | |||
2615 | LLVM_DEBUG(dbgs() << " to: " << *V << "\n")do { } while (false); | |||
2616 | return !LI.isVolatile() && !IsPtrAdjusted; | |||
2617 | } | |||
2618 | ||||
2619 | bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp, | |||
2620 | AAMDNodes AATags) { | |||
2621 | if (V->getType() != VecTy) { | |||
2622 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||
2623 | unsigned EndIndex = getIndex(NewEndOffset); | |||
2624 | assert(EndIndex > BeginIndex && "Empty vector!")((void)0); | |||
2625 | unsigned NumElements = EndIndex - BeginIndex; | |||
2626 | assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&((void)0) | |||
2627 | "Too many elements!")((void)0); | |||
2628 | Type *SliceTy = (NumElements == 1) | |||
2629 | ? ElementTy | |||
2630 | : FixedVectorType::get(ElementTy, NumElements); | |||
2631 | if (V->getType() != SliceTy) | |||
2632 | V = convertValue(DL, IRB, V, SliceTy); | |||
2633 | ||||
2634 | // Mix in the existing elements. | |||
2635 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2636 | NewAI.getAlign(), "load"); | |||
2637 | V = insertVector(IRB, Old, V, BeginIndex, "vec"); | |||
2638 | } | |||
2639 | StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); | |||
2640 | Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, | |||
2641 | LLVMContext::MD_access_group}); | |||
2642 | if (AATags) | |||
2643 | Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2644 | Pass.DeadInsts.push_back(&SI); | |||
2645 | ||||
2646 | LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { } while (false); | |||
2647 | return true; | |||
2648 | } | |||
2649 | ||||
2650 | bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) { | |||
2651 | assert(IntTy && "We cannot extract an integer from the alloca")((void)0); | |||
2652 | assert(!SI.isVolatile())((void)0); | |||
2653 | if (DL.getTypeSizeInBits(V->getType()).getFixedSize() != | |||
2654 | IntTy->getBitWidth()) { | |||
2655 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2656 | NewAI.getAlign(), "oldload"); | |||
2657 | Old = convertValue(DL, IRB, Old, IntTy); | |||
2658 | assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset")((void)0); | |||
2659 | uint64_t Offset = BeginOffset - NewAllocaBeginOffset; | |||
2660 | V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert"); | |||
2661 | } | |||
2662 | V = convertValue(DL, IRB, V, NewAllocaTy); | |||
2663 | StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign()); | |||
2664 | Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, | |||
2665 | LLVMContext::MD_access_group}); | |||
2666 | if (AATags) | |||
2667 | Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2668 | Pass.DeadInsts.push_back(&SI); | |||
2669 | LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { } while (false); | |||
2670 | return true; | |||
2671 | } | |||
2672 | ||||
2673 | bool visitStoreInst(StoreInst &SI) { | |||
2674 | LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { } while (false); | |||
2675 | Value *OldOp = SI.getOperand(1); | |||
2676 | assert(OldOp == OldPtr)((void)0); | |||
2677 | ||||
2678 | AAMDNodes AATags; | |||
2679 | SI.getAAMetadata(AATags); | |||
2680 | ||||
2681 | Value *V = SI.getValueOperand(); | |||
2682 | ||||
2683 | // Strip all inbounds GEPs and pointer casts to try to dig out any root | |||
2684 | // alloca that should be re-examined after promoting this alloca. | |||
2685 | if (V->getType()->isPointerTy()) | |||
2686 | if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) | |||
2687 | Pass.PostPromotionWorklist.insert(AI); | |||
2688 | ||||
2689 | if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedSize()) { | |||
2690 | assert(!SI.isVolatile())((void)0); | |||
2691 | assert(V->getType()->isIntegerTy() &&((void)0) | |||
2692 | "Only integer type loads and stores are split")((void)0); | |||
2693 | assert(DL.typeSizeEqualsStoreSize(V->getType()) &&((void)0) | |||
2694 | "Non-byte-multiple bit width")((void)0); | |||
2695 | IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8); | |||
2696 | V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset, | |||
2697 | "extract"); | |||
2698 | } | |||
2699 | ||||
2700 | if (VecTy) | |||
2701 | return rewriteVectorizedStoreInst(V, SI, OldOp, AATags); | |||
2702 | if (IntTy && V->getType()->isIntegerTy()) | |||
2703 | return rewriteIntegerStore(V, SI, AATags); | |||
2704 | ||||
2705 | const bool IsStorePastEnd = | |||
2706 | DL.getTypeStoreSize(V->getType()).getFixedSize() > SliceSize; | |||
2707 | StoreInst *NewSI; | |||
2708 | if (NewBeginOffset == NewAllocaBeginOffset && | |||
2709 | NewEndOffset == NewAllocaEndOffset && | |||
2710 | (canConvertValue(DL, V->getType(), NewAllocaTy) || | |||
2711 | (IsStorePastEnd && NewAllocaTy->isIntegerTy() && | |||
2712 | V->getType()->isIntegerTy()))) { | |||
2713 | // If this is an integer store past the end of slice (and thus the bytes | |||
2714 | // past that point are irrelevant or this is unreachable), truncate the | |||
2715 | // value prior to storing. | |||
2716 | if (auto *VITy = dyn_cast<IntegerType>(V->getType())) | |||
2717 | if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy)) | |||
2718 | if (VITy->getBitWidth() > AITy->getBitWidth()) { | |||
2719 | if (DL.isBigEndian()) | |||
2720 | V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(), | |||
2721 | "endian_shift"); | |||
2722 | V = IRB.CreateTrunc(V, AITy, "load.trunc"); | |||
2723 | } | |||
2724 | ||||
2725 | V = convertValue(DL, IRB, V, NewAllocaTy); | |||
2726 | NewSI = | |||
2727 | IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), SI.isVolatile()); | |||
2728 | } else { | |||
2729 | unsigned AS = SI.getPointerAddressSpace(); | |||
2730 | Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS)); | |||
2731 | NewSI = | |||
2732 | IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile()); | |||
2733 | } | |||
2734 | NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, | |||
2735 | LLVMContext::MD_access_group}); | |||
2736 | if (AATags) | |||
2737 | NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2738 | if (SI.isVolatile()) | |||
2739 | NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); | |||
2740 | if (NewSI->isAtomic()) | |||
2741 | NewSI->setAlignment(SI.getAlign()); | |||
2742 | Pass.DeadInsts.push_back(&SI); | |||
2743 | deleteIfTriviallyDead(OldOp); | |||
2744 | ||||
2745 | LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n")do { } while (false); | |||
2746 | return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); | |||
2747 | } | |||
2748 | ||||
2749 | /// Compute an integer value from splatting an i8 across the given | |||
2750 | /// number of bytes. | |||
2751 | /// | |||
2752 | /// Note that this routine assumes an i8 is a byte. If that isn't true, don't | |||
2753 | /// call this routine. | |||
2754 | /// FIXME: Heed the advice above. | |||
2755 | /// | |||
2756 | /// \param V The i8 value to splat. | |||
2757 | /// \param Size The number of bytes in the output (assuming i8 is one byte) | |||
2758 | Value *getIntegerSplat(Value *V, unsigned Size) { | |||
2759 | assert(Size > 0 && "Expected a positive number of bytes.")((void)0); | |||
2760 | IntegerType *VTy = cast<IntegerType>(V->getType()); | |||
2761 | assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte")((void)0); | |||
2762 | if (Size == 1) | |||
2763 | return V; | |||
2764 | ||||
2765 | Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8); | |||
2766 | V = IRB.CreateMul( | |||
2767 | IRB.CreateZExt(V, SplatIntTy, "zext"), | |||
2768 | ConstantExpr::getUDiv( | |||
2769 | Constant::getAllOnesValue(SplatIntTy), | |||
2770 | ConstantExpr::getZExt(Constant::getAllOnesValue(V->getType()), | |||
2771 | SplatIntTy)), | |||
2772 | "isplat"); | |||
2773 | return V; | |||
2774 | } | |||
2775 | ||||
2776 | /// Compute a vector splat for a given element value. | |||
2777 | Value *getVectorSplat(Value *V, unsigned NumElements) { | |||
2778 | V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); | |||
2779 | LLVM_DEBUG(dbgs() << " splat: " << *V << "\n")do { } while (false); | |||
2780 | return V; | |||
2781 | } | |||
2782 | ||||
2783 | bool visitMemSetInst(MemSetInst &II) { | |||
2784 | LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { } while (false); | |||
2785 | assert(II.getRawDest() == OldPtr)((void)0); | |||
2786 | ||||
2787 | AAMDNodes AATags; | |||
2788 | II.getAAMetadata(AATags); | |||
2789 | ||||
2790 | // If the memset has a variable size, it cannot be split, just adjust the | |||
2791 | // pointer to the new alloca. | |||
2792 | if (!isa<ConstantInt>(II.getLength())) { | |||
2793 | assert(!IsSplit)((void)0); | |||
2794 | assert(NewBeginOffset == BeginOffset)((void)0); | |||
2795 | II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); | |||
2796 | II.setDestAlignment(getSliceAlign()); | |||
2797 | ||||
2798 | deleteIfTriviallyDead(OldPtr); | |||
2799 | return false; | |||
2800 | } | |||
2801 | ||||
2802 | // Record this instruction for deletion. | |||
2803 | Pass.DeadInsts.push_back(&II); | |||
2804 | ||||
2805 | Type *AllocaTy = NewAI.getAllocatedType(); | |||
2806 | Type *ScalarTy = AllocaTy->getScalarType(); | |||
2807 | ||||
2808 | const bool CanContinue = [&]() { | |||
2809 | if (VecTy || IntTy) | |||
2810 | return true; | |||
2811 | if (BeginOffset > NewAllocaBeginOffset || | |||
2812 | EndOffset < NewAllocaEndOffset) | |||
2813 | return false; | |||
2814 | // Length must be in range for FixedVectorType. | |||
2815 | auto *C = cast<ConstantInt>(II.getLength()); | |||
2816 | const uint64_t Len = C->getLimitedValue(); | |||
2817 | if (Len > std::numeric_limits<unsigned>::max()) | |||
2818 | return false; | |||
2819 | auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); | |||
2820 | auto *SrcTy = FixedVectorType::get(Int8Ty, Len); | |||
2821 | return canConvertValue(DL, SrcTy, AllocaTy) && | |||
2822 | DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedSize()); | |||
2823 | }(); | |||
2824 | ||||
2825 | // If this doesn't map cleanly onto the alloca type, and that type isn't | |||
2826 | // a single value type, just emit a memset. | |||
2827 | if (!CanContinue) { | |||
2828 | Type *SizeTy = II.getLength()->getType(); | |||
2829 | Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); | |||
2830 | CallInst *New = IRB.CreateMemSet( | |||
2831 | getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, | |||
2832 | MaybeAlign(getSliceAlign()), II.isVolatile()); | |||
2833 | if (AATags) | |||
2834 | New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2835 | LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { } while (false); | |||
2836 | return false; | |||
2837 | } | |||
2838 | ||||
2839 | // If we can represent this as a simple value, we have to build the actual | |||
2840 | // value to store, which requires expanding the byte present in memset to | |||
2841 | // a sensible representation for the alloca type. This is essentially | |||
2842 | // splatting the byte to a sufficiently wide integer, splatting it across | |||
2843 | // any desired vector width, and bitcasting to the final type. | |||
2844 | Value *V; | |||
2845 | ||||
2846 | if (VecTy) { | |||
2847 | // If this is a memset of a vectorized alloca, insert it. | |||
2848 | assert(ElementTy == ScalarTy)((void)0); | |||
2849 | ||||
2850 | unsigned BeginIndex = getIndex(NewBeginOffset); | |||
2851 | unsigned EndIndex = getIndex(NewEndOffset); | |||
2852 | assert(EndIndex > BeginIndex && "Empty vector!")((void)0); | |||
2853 | unsigned NumElements = EndIndex - BeginIndex; | |||
2854 | assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&((void)0) | |||
2855 | "Too many elements!")((void)0); | |||
2856 | ||||
2857 | Value *Splat = getIntegerSplat( | |||
2858 | II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8); | |||
2859 | Splat = convertValue(DL, IRB, Splat, ElementTy); | |||
2860 | if (NumElements > 1) | |||
2861 | Splat = getVectorSplat(Splat, NumElements); | |||
2862 | ||||
2863 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2864 | NewAI.getAlign(), "oldload"); | |||
2865 | V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); | |||
2866 | } else if (IntTy) { | |||
2867 | // If this is a memset on an alloca where we can widen stores, insert the | |||
2868 | // set integer. | |||
2869 | assert(!II.isVolatile())((void)0); | |||
2870 | ||||
2871 | uint64_t Size = NewEndOffset - NewBeginOffset; | |||
2872 | V = getIntegerSplat(II.getValue(), Size); | |||
2873 | ||||
2874 | if (IntTy && (BeginOffset != NewAllocaBeginOffset || | |||
2875 | EndOffset != NewAllocaBeginOffset)) { | |||
2876 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
2877 | NewAI.getAlign(), "oldload"); | |||
2878 | Old = convertValue(DL, IRB, Old, IntTy); | |||
2879 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||
2880 | V = insertInteger(DL, IRB, Old, V, Offset, "insert"); | |||
2881 | } else { | |||
2882 | assert(V->getType() == IntTy &&((void)0) | |||
2883 | "Wrong type for an alloca wide integer!")((void)0); | |||
2884 | } | |||
2885 | V = convertValue(DL, IRB, V, AllocaTy); | |||
2886 | } else { | |||
2887 | // Established these invariants above. | |||
2888 | assert(NewBeginOffset == NewAllocaBeginOffset)((void)0); | |||
2889 | assert(NewEndOffset == NewAllocaEndOffset)((void)0); | |||
2890 | ||||
2891 | V = getIntegerSplat(II.getValue(), | |||
2892 | DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8); | |||
2893 | if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) | |||
2894 | V = getVectorSplat( | |||
2895 | V, cast<FixedVectorType>(AllocaVecTy)->getNumElements()); | |||
2896 | ||||
2897 | V = convertValue(DL, IRB, V, AllocaTy); | |||
2898 | } | |||
2899 | ||||
2900 | StoreInst *New = | |||
2901 | IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile()); | |||
2902 | New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, | |||
2903 | LLVMContext::MD_access_group}); | |||
2904 | if (AATags) | |||
2905 | New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
2906 | LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { } while (false); | |||
2907 | return !II.isVolatile(); | |||
2908 | } | |||
2909 | ||||
2910 | bool visitMemTransferInst(MemTransferInst &II) { | |||
2911 | // Rewriting of memory transfer instructions can be a bit tricky. We break | |||
2912 | // them into two categories: split intrinsics and unsplit intrinsics. | |||
2913 | ||||
2914 | LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { } while (false); | |||
| ||||
2915 | ||||
2916 | AAMDNodes AATags; | |||
2917 | II.getAAMetadata(AATags); | |||
2918 | ||||
2919 | bool IsDest = &II.getRawDestUse() == OldUse; | |||
2920 | assert((IsDest && II.getRawDest() == OldPtr) ||((void)0) | |||
2921 | (!IsDest && II.getRawSource() == OldPtr))((void)0); | |||
2922 | ||||
2923 | MaybeAlign SliceAlign = getSliceAlign(); | |||
2924 | ||||
2925 | // For unsplit intrinsics, we simply modify the source and destination | |||
2926 | // pointers in place. This isn't just an optimization, it is a matter of | |||
2927 | // correctness. With unsplit intrinsics we may be dealing with transfers | |||
2928 | // within a single alloca before SROA ran, or with transfers that have | |||
2929 | // a variable length. We may also be dealing with memmove instead of | |||
2930 | // memcpy, and so simply updating the pointers is the necessary for us to | |||
2931 | // update both source and dest of a single call. | |||
2932 | if (!IsSplittable) { | |||
2933 | Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||
2934 | if (IsDest) { | |||
2935 | II.setDest(AdjustedPtr); | |||
2936 | II.setDestAlignment(SliceAlign); | |||
2937 | } | |||
2938 | else { | |||
2939 | II.setSource(AdjustedPtr); | |||
2940 | II.setSourceAlignment(SliceAlign); | |||
2941 | } | |||
2942 | ||||
2943 | LLVM_DEBUG(dbgs() << " to: " << II << "\n")do { } while (false); | |||
2944 | deleteIfTriviallyDead(OldPtr); | |||
2945 | return false; | |||
2946 | } | |||
2947 | // For split transfer intrinsics we have an incredibly useful assurance: | |||
2948 | // the source and destination do not reside within the same alloca, and at | |||
2949 | // least one of them does not escape. This means that we can replace | |||
2950 | // memmove with memcpy, and we don't need to worry about all manner of | |||
2951 | // downsides to splitting and transforming the operations. | |||
2952 | ||||
2953 | // If this doesn't map cleanly onto the alloca type, and that type isn't | |||
2954 | // a single value type, just emit a memcpy. | |||
2955 | bool EmitMemCpy = | |||
2956 | !VecTy && !IntTy && | |||
2957 | (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset || | |||
2958 | SliceSize != | |||
2959 | DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() || | |||
2960 | !NewAI.getAllocatedType()->isSingleValueType()); | |||
2961 | ||||
2962 | // If we're just going to emit a memcpy, the alloca hasn't changed, and the | |||
2963 | // size hasn't been shrunk based on analysis of the viable range, this is | |||
2964 | // a no-op. | |||
2965 | if (EmitMemCpy && &OldAI == &NewAI) { | |||
2966 | // Ensure the start lines up. | |||
2967 | assert(NewBeginOffset == BeginOffset)((void)0); | |||
2968 | ||||
2969 | // Rewrite the size as needed. | |||
2970 | if (NewEndOffset != EndOffset) | |||
2971 | II.setLength(ConstantInt::get(II.getLength()->getType(), | |||
2972 | NewEndOffset - NewBeginOffset)); | |||
2973 | return false; | |||
2974 | } | |||
2975 | // Record this instruction for deletion. | |||
2976 | Pass.DeadInsts.push_back(&II); | |||
2977 | ||||
2978 | // Strip all inbounds GEPs and pointer casts to try to dig out any root | |||
2979 | // alloca that should be re-examined after rewriting this instruction. | |||
2980 | Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); | |||
2981 | if (AllocaInst *AI = | |||
2982 | dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) { | |||
2983 | assert(AI != &OldAI && AI != &NewAI &&((void)0) | |||
2984 | "Splittable transfers cannot reach the same alloca on both ends.")((void)0); | |||
2985 | Pass.Worklist.insert(AI); | |||
2986 | } | |||
2987 | ||||
2988 | Type *OtherPtrTy = OtherPtr->getType(); | |||
2989 | unsigned OtherAS = OtherPtrTy->getPointerAddressSpace(); | |||
2990 | ||||
2991 | // Compute the relative offset for the other pointer within the transfer. | |||
2992 | unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS); | |||
2993 | APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset); | |||
2994 | Align OtherAlign = | |||
2995 | (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne(); | |||
2996 | OtherAlign = | |||
2997 | commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue()); | |||
2998 | ||||
2999 | if (EmitMemCpy) { | |||
3000 | // Compute the other pointer, folding as much as possible to produce | |||
3001 | // a single, simple GEP in most cases. | |||
3002 | OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, | |||
3003 | OtherPtr->getName() + "."); | |||
3004 | ||||
3005 | Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||
3006 | Type *SizeTy = II.getLength()->getType(); | |||
3007 | Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); | |||
3008 | ||||
3009 | Value *DestPtr, *SrcPtr; | |||
3010 | MaybeAlign DestAlign, SrcAlign; | |||
3011 | // Note: IsDest is true iff we're copying into the new alloca slice | |||
3012 | if (IsDest) { | |||
3013 | DestPtr = OurPtr; | |||
3014 | DestAlign = SliceAlign; | |||
3015 | SrcPtr = OtherPtr; | |||
3016 | SrcAlign = OtherAlign; | |||
3017 | } else { | |||
3018 | DestPtr = OtherPtr; | |||
3019 | DestAlign = OtherAlign; | |||
3020 | SrcPtr = OurPtr; | |||
3021 | SrcAlign = SliceAlign; | |||
3022 | } | |||
3023 | CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, | |||
3024 | Size, II.isVolatile()); | |||
3025 | if (AATags) | |||
3026 | New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
3027 | LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { } while (false); | |||
3028 | return false; | |||
3029 | } | |||
3030 | ||||
3031 | bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset && | |||
3032 | NewEndOffset == NewAllocaEndOffset; | |||
3033 | uint64_t Size = NewEndOffset - NewBeginOffset; | |||
3034 | unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0; | |||
3035 | unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; | |||
3036 | unsigned NumElements = EndIndex - BeginIndex; | |||
3037 | IntegerType *SubIntTy = | |||
3038 | IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr; | |||
3039 | ||||
3040 | // Reset the other pointer type to match the register type we're going to | |||
3041 | // use, but using the address space of the original other pointer. | |||
3042 | Type *OtherTy; | |||
3043 | if (VecTy && !IsWholeAlloca) { | |||
3044 | if (NumElements == 1) | |||
3045 | OtherTy = VecTy->getElementType(); | |||
3046 | else | |||
3047 | OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements); | |||
3048 | } else if (IntTy && !IsWholeAlloca) { | |||
3049 | OtherTy = SubIntTy; | |||
3050 | } else { | |||
3051 | OtherTy = NewAllocaTy; | |||
3052 | } | |||
3053 | OtherPtrTy = OtherTy->getPointerTo(OtherAS); | |||
3054 | ||||
3055 | Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, | |||
3056 | OtherPtr->getName() + "."); | |||
3057 | MaybeAlign SrcAlign = OtherAlign; | |||
3058 | Value *DstPtr = &NewAI; | |||
3059 | MaybeAlign DstAlign = SliceAlign; | |||
3060 | if (!IsDest) { | |||
3061 | std::swap(SrcPtr, DstPtr); | |||
3062 | std::swap(SrcAlign, DstAlign); | |||
3063 | } | |||
3064 | ||||
3065 | Value *Src; | |||
3066 | if (VecTy && !IsWholeAlloca && !IsDest) { | |||
3067 | Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
3068 | NewAI.getAlign(), "load"); | |||
3069 | Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); | |||
3070 | } else if (IntTy && !IsWholeAlloca && !IsDest) { | |||
3071 | Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
3072 | NewAI.getAlign(), "load"); | |||
3073 | Src = convertValue(DL, IRB, Src, IntTy); | |||
3074 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||
3075 | Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); | |||
3076 | } else { | |||
3077 | LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign, | |||
3078 | II.isVolatile(), "copyload"); | |||
3079 | Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, | |||
3080 | LLVMContext::MD_access_group}); | |||
3081 | if (AATags) | |||
3082 | Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
3083 | Src = Load; | |||
3084 | } | |||
3085 | ||||
3086 | if (VecTy && !IsWholeAlloca && IsDest) { | |||
3087 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
3088 | NewAI.getAlign(), "oldload"); | |||
3089 | Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); | |||
3090 | } else if (IntTy && !IsWholeAlloca && IsDest) { | |||
3091 | Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, | |||
3092 | NewAI.getAlign(), "oldload"); | |||
3093 | Old = convertValue(DL, IRB, Old, IntTy); | |||
3094 | uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; | |||
3095 | Src = insertInteger(DL, IRB, Old, Src, Offset, "insert"); | |||
3096 | Src = convertValue(DL, IRB, Src, NewAllocaTy); | |||
3097 | } | |||
3098 | ||||
3099 | StoreInst *Store = cast<StoreInst>( | |||
3100 | IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); | |||
3101 | Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, | |||
3102 | LLVMContext::MD_access_group}); | |||
3103 | if (AATags) | |||
3104 | Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); | |||
3105 | LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { } while (false); | |||
3106 | return !II.isVolatile(); | |||
3107 | } | |||
3108 | ||||
3109 | bool visitIntrinsicInst(IntrinsicInst &II) { | |||
3110 | assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&((void)0) | |||
3111 | "Unexpected intrinsic!")((void)0); | |||
3112 | LLVM_DEBUG(dbgs() << " original: " << II << "\n")do { } while (false); | |||
3113 | ||||
3114 | // Record this instruction for deletion. | |||
3115 | Pass.DeadInsts.push_back(&II); | |||
3116 | ||||
3117 | if (II.isDroppable()) { | |||
3118 | assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume")((void)0); | |||
3119 | // TODO For now we forget assumed information, this can be improved. | |||
3120 | OldPtr->dropDroppableUsesIn(II); | |||
3121 | return true; | |||
3122 | } | |||
3123 | ||||
3124 | assert(II.getArgOperand(1) == OldPtr)((void)0); | |||
3125 | // Lifetime intrinsics are only promotable if they cover the whole alloca. | |||
3126 | // Therefore, we drop lifetime intrinsics which don't cover the whole | |||
3127 | // alloca. | |||
3128 | // (In theory, intrinsics which partially cover an alloca could be | |||
3129 | // promoted, but PromoteMemToReg doesn't handle that case.) | |||
3130 | // FIXME: Check whether the alloca is promotable before dropping the | |||
3131 | // lifetime intrinsics? | |||
3132 | if (NewBeginOffset != NewAllocaBeginOffset || | |||
3133 | NewEndOffset != NewAllocaEndOffset) | |||
3134 | return true; | |||
3135 | ||||
3136 | ConstantInt *Size = | |||
3137 | ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), | |||
3138 | NewEndOffset - NewBeginOffset); | |||
3139 | // Lifetime intrinsics always expect an i8* so directly get such a pointer | |||
3140 | // for the new alloca slice. | |||
3141 | Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace()); | |||
3142 | Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); | |||
3143 | Value *New; | |||
3144 | if (II.getIntrinsicID() == Intrinsic::lifetime_start) | |||
3145 | New = IRB.CreateLifetimeStart(Ptr, Size); | |||
3146 | else | |||
3147 | New = IRB.CreateLifetimeEnd(Ptr, Size); | |||
3148 | ||||
3149 | (void)New; | |||
3150 | LLVM_DEBUG(dbgs() << " to: " << *New << "\n")do { } while (false); | |||
3151 | ||||
3152 | return true; | |||
3153 | } | |||
3154 | ||||
3155 | void fixLoadStoreAlign(Instruction &Root) { | |||
3156 | // This algorithm implements the same visitor loop as | |||
3157 | // hasUnsafePHIOrSelectUse, and fixes the alignment of each load | |||
3158 | // or store found. | |||
3159 | SmallPtrSet<Instruction *, 4> Visited; | |||
3160 | SmallVector<Instruction *, 4> Uses; | |||
3161 | Visited.insert(&Root); | |||
3162 | Uses.push_back(&Root); | |||
3163 | do { | |||
3164 | Instruction *I = Uses.pop_back_val(); | |||
3165 | ||||
3166 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { | |||
3167 | LI->setAlignment(std::min(LI->getAlign(), getSliceAlign())); | |||
3168 | continue; | |||
3169 | } | |||
3170 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) { | |||
3171 | SI->setAlignment(std::min(SI->getAlign(), getSliceAlign())); | |||
3172 | continue; | |||
3173 | } | |||
3174 | ||||
3175 | assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||((void)0) | |||
3176 | isa<PHINode>(I) || isa<SelectInst>(I) ||((void)0) | |||
3177 | isa<GetElementPtrInst>(I))((void)0); | |||
3178 | for (User *U : I->users()) | |||
3179 | if (Visited.insert(cast<Instruction>(U)).second) | |||
3180 | Uses.push_back(cast<Instruction>(U)); | |||
3181 | } while (!Uses.empty()); | |||
3182 | } | |||
3183 | ||||
3184 | bool visitPHINode(PHINode &PN) { | |||
3185 | LLVM_DEBUG(dbgs() << " original: " << PN << "\n")do { } while (false); | |||
3186 | assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable")((void)0); | |||
3187 | assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable")((void)0); | |||
3188 | ||||
3189 | // We would like to compute a new pointer in only one place, but have it be | |||
3190 | // as local as possible to the PHI. To do that, we re-use the location of | |||
3191 | // the old pointer, which necessarily must be in the right position to | |||
3192 | // dominate the PHI. | |||
3193 | IRBuilderBase::InsertPointGuard Guard(IRB); | |||
3194 | if (isa<PHINode>(OldPtr)) | |||
3195 | IRB.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt()); | |||
3196 | else | |||
3197 | IRB.SetInsertPoint(OldPtr); | |||
3198 | IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc()); | |||
3199 | ||||
3200 | Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||
3201 | // Replace the operands which were using the old pointer. | |||
3202 | std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); | |||
3203 | ||||
3204 | LLVM_DEBUG(dbgs() << " to: " << PN << "\n")do { } while (false); | |||
3205 | deleteIfTriviallyDead(OldPtr); | |||
3206 | ||||
3207 | // Fix the alignment of any loads or stores using this PHI node. | |||
3208 | fixLoadStoreAlign(PN); | |||
3209 | ||||
3210 | // PHIs can't be promoted on their own, but often can be speculated. We | |||
3211 | // check the speculation outside of the rewriter so that we see the | |||
3212 | // fully-rewritten alloca. | |||
3213 | PHIUsers.insert(&PN); | |||
3214 | return true; | |||
3215 | } | |||
3216 | ||||
3217 | bool visitSelectInst(SelectInst &SI) { | |||
3218 | LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { } while (false); | |||
3219 | assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&((void)0) | |||
3220 | "Pointer isn't an operand!")((void)0); | |||
3221 | assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable")((void)0); | |||
3222 | assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable")((void)0); | |||
3223 | ||||
3224 | Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); | |||
3225 | // Replace the operands which were using the old pointer. | |||
3226 | if (SI.getOperand(1) == OldPtr) | |||
3227 | SI.setOperand(1, NewPtr); | |||
3228 | if (SI.getOperand(2) == OldPtr) | |||
3229 | SI.setOperand(2, NewPtr); | |||
3230 | ||||
3231 | LLVM_DEBUG(dbgs() << " to: " << SI << "\n")do { } while (false); | |||
3232 | deleteIfTriviallyDead(OldPtr); | |||
3233 | ||||
3234 | // Fix the alignment of any loads or stores using this select. | |||
3235 | fixLoadStoreAlign(SI); | |||
3236 | ||||
3237 | // Selects can't be promoted on their own, but often can be speculated. We | |||
3238 | // check the speculation outside of the rewriter so that we see the | |||
3239 | // fully-rewritten alloca. | |||
3240 | SelectUsers.insert(&SI); | |||
3241 | return true; | |||
3242 | } | |||
3243 | }; | |||
3244 | ||||
3245 | namespace { | |||
3246 | ||||
3247 | /// Visitor to rewrite aggregate loads and stores as scalar. | |||
3248 | /// | |||
3249 | /// This pass aggressively rewrites all aggregate loads and stores on | |||
3250 | /// a particular pointer (or any pointer derived from it which we can identify) | |||
3251 | /// with scalar loads and stores. | |||
3252 | class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { | |||
3253 | // Befriend the base class so it can delegate to private visit methods. | |||
3254 | friend class InstVisitor<AggLoadStoreRewriter, bool>; | |||
3255 | ||||
3256 | /// Queue of pointer uses to analyze and potentially rewrite. | |||
3257 | SmallVector<Use *, 8> Queue; | |||
3258 | ||||
3259 | /// Set to prevent us from cycling with phi nodes and loops. | |||
3260 | SmallPtrSet<User *, 8> Visited; | |||
3261 | ||||
3262 | /// The current pointer use being rewritten. This is used to dig up the used | |||
3263 | /// value (as opposed to the user). | |||
3264 | Use *U = nullptr; | |||
3265 | ||||
3266 | /// Used to calculate offsets, and hence alignment, of subobjects. | |||
3267 | const DataLayout &DL; | |||
3268 | ||||
3269 | public: | |||
3270 | AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {} | |||
3271 | ||||
3272 | /// Rewrite loads and stores through a pointer and all pointers derived from | |||
3273 | /// it. | |||
3274 | bool rewrite(Instruction &I) { | |||
3275 | LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n")do { } while (false); | |||
3276 | enqueueUsers(I); | |||
3277 | bool Changed = false; | |||
3278 | while (!Queue.empty()) { | |||
3279 | U = Queue.pop_back_val(); | |||
3280 | Changed |= visit(cast<Instruction>(U->getUser())); | |||
3281 | } | |||
3282 | return Changed; | |||
3283 | } | |||
3284 | ||||
3285 | private: | |||
3286 | /// Enqueue all the users of the given instruction for further processing. | |||
3287 | /// This uses a set to de-duplicate users. | |||
3288 | void enqueueUsers(Instruction &I) { | |||
3289 | for (Use &U : I.uses()) | |||
3290 | if (Visited.insert(U.getUser()).second) | |||
3291 | Queue.push_back(&U); | |||
3292 | } | |||
3293 | ||||
3294 | // Conservative default is to not rewrite anything. | |||
3295 | bool visitInstruction(Instruction &I) { return false; } | |||
3296 | ||||
3297 | /// Generic recursive split emission class. | |||
3298 | template <typename Derived> class OpSplitter { | |||
3299 | protected: | |||
3300 | /// The builder used to form new instructions. | |||
3301 | IRBuilderTy IRB; | |||
3302 | ||||
3303 | /// The indices which to be used with insert- or extractvalue to select the | |||
3304 | /// appropriate value within the aggregate. | |||
3305 | SmallVector<unsigned, 4> Indices; | |||
3306 | ||||
3307 | /// The indices to a GEP instruction which will move Ptr to the correct slot | |||
3308 | /// within the aggregate. | |||
3309 | SmallVector<Value *, 4> GEPIndices; | |||
3310 | ||||
3311 | /// The base pointer of the original op, used as a base for GEPing the | |||
3312 | /// split operations. | |||
3313 | Value *Ptr; | |||
3314 | ||||
3315 | /// The base pointee type being GEPed into. | |||
3316 | Type *BaseTy; | |||
3317 | ||||
3318 | /// Known alignment of the base pointer. | |||
3319 | Align BaseAlign; | |||
3320 | ||||
3321 | /// To calculate offset of each component so we can correctly deduce | |||
3322 | /// alignments. | |||
3323 | const DataLayout &DL; | |||
3324 | ||||
3325 | /// Initialize the splitter with an insertion point, Ptr and start with a | |||
3326 | /// single zero GEP index. | |||
3327 | OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, | |||
3328 | Align BaseAlign, const DataLayout &DL) | |||
3329 | : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), | |||
3330 | BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {} | |||
3331 | ||||
3332 | public: | |||
3333 | /// Generic recursive split emission routine. | |||
3334 | /// | |||
3335 | /// This method recursively splits an aggregate op (load or store) into | |||
3336 | /// scalar or vector ops. It splits recursively until it hits a single value | |||
3337 | /// and emits that single value operation via the template argument. | |||
3338 | /// | |||
3339 | /// The logic of this routine relies on GEPs and insertvalue and | |||
3340 | /// extractvalue all operating with the same fundamental index list, merely | |||
3341 | /// formatted differently (GEPs need actual values). | |||
3342 | /// | |||
3343 | /// \param Ty The type being split recursively into smaller ops. | |||
3344 | /// \param Agg The aggregate value being built up or stored, depending on | |||
3345 | /// whether this is splitting a load or a store respectively. | |||
3346 | void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { | |||
3347 | if (Ty->isSingleValueType()) { | |||
3348 | unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); | |||
3349 | return static_cast<Derived *>(this)->emitFunc( | |||
3350 | Ty, Agg, commonAlignment(BaseAlign, Offset), Name); | |||
3351 | } | |||
3352 | ||||
3353 | if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { | |||
3354 | unsigned OldSize = Indices.size(); | |||
3355 | (void)OldSize; | |||
3356 | for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; | |||
3357 | ++Idx) { | |||
3358 | assert(Indices.size() == OldSize && "Did not return to the old size")((void)0); | |||
3359 | Indices.push_back(Idx); | |||
3360 | GEPIndices.push_back(IRB.getInt32(Idx)); | |||
3361 | emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); | |||
3362 | GEPIndices.pop_back(); | |||
3363 | Indices.pop_back(); | |||
3364 | } | |||
3365 | return; | |||
3366 | } | |||
3367 | ||||
3368 | if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||
3369 | unsigned OldSize = Indices.size(); | |||
3370 | (void)OldSize; | |||
3371 | for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; | |||
3372 | ++Idx) { | |||
3373 | assert(Indices.size() == OldSize && "Did not return to the old size")((void)0); | |||
3374 | Indices.push_back(Idx); | |||
3375 | GEPIndices.push_back(IRB.getInt32(Idx)); | |||
3376 | emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); | |||
3377 | GEPIndices.pop_back(); | |||
3378 | Indices.pop_back(); | |||
3379 | } | |||
3380 | return; | |||
3381 | } | |||
3382 | ||||
3383 | llvm_unreachable("Only arrays and structs are aggregate loadable types")__builtin_unreachable(); | |||
3384 | } | |||
3385 | }; | |||
3386 | ||||
3387 | struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { | |||
3388 | AAMDNodes AATags; | |||
3389 | ||||
3390 | LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, | |||
3391 | AAMDNodes AATags, Align BaseAlign, const DataLayout &DL) | |||
3392 | : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, | |||
3393 | DL), | |||
3394 | AATags(AATags) {} | |||
3395 | ||||
3396 | /// Emit a leaf load of a single value. This is called at the leaves of the | |||
3397 | /// recursive emission to actually load values. | |||
3398 | void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { | |||
3399 | assert(Ty->isSingleValueType())((void)0); | |||
3400 | // Load the single value and insert it using the indices. | |||
3401 | Value *GEP = | |||
3402 | IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); | |||
3403 | LoadInst *Load = | |||
3404 | IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); | |||
3405 | ||||
3406 | APInt Offset( | |||
3407 | DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); | |||
3408 | if (AATags && | |||
3409 | GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) | |||
3410 | Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); | |||
3411 | ||||
3412 | Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); | |||
3413 | LLVM_DEBUG(dbgs() << " to: " << *Load << "\n")do { } while (false); | |||
3414 | } | |||
3415 | }; | |||
3416 | ||||
3417 | bool visitLoadInst(LoadInst &LI) { | |||
3418 | assert(LI.getPointerOperand() == *U)((void)0); | |||
3419 | if (!LI.isSimple() || LI.getType()->isSingleValueType()) | |||
3420 | return false; | |||
3421 | ||||
3422 | // We have an aggregate being loaded, split it apart. | |||
3423 | LLVM_DEBUG(dbgs() << " original: " << LI << "\n")do { } while (false); | |||
3424 | AAMDNodes AATags; | |||
3425 | LI.getAAMetadata(AATags); | |||
3426 | LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, | |||
3427 | getAdjustedAlignment(&LI, 0), DL); | |||
3428 | Value *V = UndefValue::get(LI.getType()); | |||
3429 | Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); | |||
3430 | Visited.erase(&LI); | |||
3431 | LI.replaceAllUsesWith(V); | |||
3432 | LI.eraseFromParent(); | |||
3433 | return true; | |||
3434 | } | |||
3435 | ||||
3436 | struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { | |||
3437 | StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, | |||
3438 | AAMDNodes AATags, Align BaseAlign, const DataLayout &DL) | |||
3439 | : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, | |||
3440 | DL), | |||
3441 | AATags(AATags) {} | |||
3442 | AAMDNodes AATags; | |||
3443 | /// Emit a leaf store of a single value. This is called at the leaves of the | |||
3444 | /// recursive emission to actually produce stores. | |||
3445 | void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { | |||
3446 | assert(Ty->isSingleValueType())((void)0); | |||
3447 | // Extract the single value and store it using the indices. | |||
3448 | // | |||
3449 | // The gep and extractvalue values are factored out of the CreateStore | |||
3450 | // call to make the output independent of the argument evaluation order. | |||
3451 | Value *ExtractValue = | |||
3452 | IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); | |||
3453 | Value *InBoundsGEP = | |||
3454 | IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); | |||
3455 | StoreInst *Store = | |||
3456 | IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment); | |||
3457 | ||||
3458 | APInt Offset( | |||
3459 | DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); | |||
3460 | if (AATags && | |||
3461 | GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) | |||
3462 | Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); | |||
3463 | ||||
3464 | LLVM_DEBUG(dbgs() << " to: " << *Store << "\n")do { } while (false); | |||
3465 | } | |||
3466 | }; | |||
3467 | ||||
3468 | bool visitStoreInst(StoreInst &SI) { | |||
3469 | if (!SI.isSimple() || SI.getPointerOperand() != *U) | |||
3470 | return false; | |||
3471 | Value *V = SI.getValueOperand(); | |||
3472 | if (V->getType()->isSingleValueType()) | |||
3473 | return false; | |||
3474 | ||||
3475 | // We have an aggregate being stored, split it apart. | |||
3476 | LLVM_DEBUG(dbgs() << " original: " << SI << "\n")do { } while (false); | |||
3477 | AAMDNodes AATags; | |||
3478 | SI.getAAMetadata(AATags); | |||
3479 | StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, | |||
3480 | getAdjustedAlignment(&SI, 0), DL); | |||
3481 | Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); | |||
3482 | Visited.erase(&SI); | |||
3483 | SI.eraseFromParent(); | |||
3484 | return true; | |||
3485 | } | |||
3486 | ||||
3487 | bool visitBitCastInst(BitCastInst &BC) { | |||
3488 | enqueueUsers(BC); | |||
3489 | return false; | |||
3490 | } | |||
3491 | ||||
3492 | bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { | |||
3493 | enqueueUsers(ASC); | |||
3494 | return false; | |||
3495 | } | |||
3496 | ||||
3497 | // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2) | |||
3498 | bool foldGEPSelect(GetElementPtrInst &GEPI) { | |||
3499 | if (!GEPI.hasAllConstantIndices()) | |||
3500 | return false; | |||
3501 | ||||
3502 | SelectInst *Sel = cast<SelectInst>(GEPI.getPointerOperand()); | |||
3503 | ||||
3504 | LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"do { } while (false) | |||
3505 | << "\n original: " << *Seldo { } while (false) | |||
3506 | << "\n " << GEPI)do { } while (false); | |||
3507 | ||||
3508 | IRBuilderTy Builder(&GEPI); | |||
3509 | SmallVector<Value *, 4> Index(GEPI.indices()); | |||
3510 | bool IsInBounds = GEPI.isInBounds(); | |||
3511 | ||||
3512 | Type *Ty = GEPI.getSourceElementType(); | |||
3513 | Value *True = Sel->getTrueValue(); | |||
3514 | Value *NTrue = | |||
3515 | IsInBounds | |||
3516 | ? Builder.CreateInBoundsGEP(Ty, True, Index, | |||
3517 | True->getName() + ".sroa.gep") | |||
3518 | : Builder.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep"); | |||
3519 | ||||
3520 | Value *False = Sel->getFalseValue(); | |||
3521 | ||||
3522 | Value *NFalse = | |||
3523 | IsInBounds | |||
3524 | ? Builder.CreateInBoundsGEP(Ty, False, Index, | |||
3525 | False->getName() + ".sroa.gep") | |||
3526 | : Builder.CreateGEP(Ty, False, Index, | |||
3527 | False->getName() + ".sroa.gep"); | |||
3528 | ||||
3529 | Value *NSel = Builder.CreateSelect(Sel->getCondition(), NTrue, NFalse, | |||
3530 | Sel->getName() + ".sroa.sel"); | |||
3531 | Visited.erase(&GEPI); | |||
3532 | GEPI.replaceAllUsesWith(NSel); | |||
3533 | GEPI.eraseFromParent(); | |||
3534 | Instruction *NSelI = cast<Instruction>(NSel); | |||
3535 | Visited.insert(NSelI); | |||
3536 | enqueueUsers(*NSelI); | |||
3537 | ||||
3538 | LLVM_DEBUG(dbgs() << "\n to: " << *NTruedo { } while (false) | |||
3539 | << "\n " << *NFalsedo { } while (false) | |||
3540 | << "\n " << *NSel << '\n')do { } while (false); | |||
3541 | ||||
3542 | return true; | |||
3543 | } | |||
3544 | ||||
3545 | // Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2) | |||
3546 | bool foldGEPPhi(GetElementPtrInst &GEPI) { | |||
3547 | if (!GEPI.hasAllConstantIndices()) | |||
3548 | return false; | |||
3549 | ||||
3550 | PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand()); | |||
3551 | if (GEPI.getParent() != PHI->getParent() || | |||
3552 | llvm::any_of(PHI->incoming_values(), [](Value *In) | |||
3553 | { Instruction *I = dyn_cast<Instruction>(In); | |||
3554 | return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) || | |||
3555 | succ_empty(I->getParent()) || | |||
3556 | !I->getParent()->isLegalToHoistInto(); | |||
3557 | })) | |||
3558 | return false; | |||
3559 | ||||
3560 | LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"do { } while (false) | |||
3561 | << "\n original: " << *PHIdo { } while (false) | |||
3562 | << "\n " << GEPIdo { } while (false) | |||
3563 | << "\n to: ")do { } while (false); | |||
3564 | ||||
3565 | SmallVector<Value *, 4> Index(GEPI.indices()); | |||
3566 | bool IsInBounds = GEPI.isInBounds(); | |||
3567 | IRBuilderTy PHIBuilder(GEPI.getParent()->getFirstNonPHI()); | |||
3568 | PHINode *NewPN = PHIBuilder.CreatePHI(GEPI.getType(), | |||
3569 | PHI->getNumIncomingValues(), | |||
3570 | PHI->getName() + ".sroa.phi"); | |||
3571 | for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) { | |||
3572 | BasicBlock *B = PHI->getIncomingBlock(I); | |||
3573 | Value *NewVal = nullptr; | |||
3574 | int Idx = NewPN->getBasicBlockIndex(B); | |||
3575 | if (Idx >= 0) { | |||
3576 | NewVal = NewPN->getIncomingValue(Idx); | |||
3577 | } else { | |||
3578 | Instruction *In = cast<Instruction>(PHI->getIncomingValue(I)); | |||
3579 | ||||
3580 | IRBuilderTy B(In->getParent(), std::next(In->getIterator())); | |||
3581 | Type *Ty = GEPI.getSourceElementType(); | |||
3582 | NewVal = IsInBounds | |||
3583 | ? B.CreateInBoundsGEP(Ty, In, Index, In->getName() + ".sroa.gep") | |||
3584 | : B.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep"); | |||
3585 | } | |||
3586 | NewPN->addIncoming(NewVal, B); | |||
3587 | } | |||
3588 | ||||
3589 | Visited.erase(&GEPI); | |||
3590 | GEPI.replaceAllUsesWith(NewPN); | |||
3591 | GEPI.eraseFromParent(); | |||
3592 | Visited.insert(NewPN); | |||
3593 | enqueueUsers(*NewPN); | |||
3594 | ||||
3595 | LLVM_DEBUG(for (Value *In : NewPN->incoming_values())do { } while (false) | |||
3596 | dbgs() << "\n " << *In;do { } while (false) | |||
3597 | dbgs() << "\n " << *NewPN << '\n')do { } while (false); | |||
3598 | ||||
3599 | return true; | |||
3600 | } | |||
3601 | ||||
3602 | bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { | |||
3603 | if (isa<SelectInst>(GEPI.getPointerOperand()) && | |||
3604 | foldGEPSelect(GEPI)) | |||
3605 | return true; | |||
3606 | ||||
3607 | if (isa<PHINode>(GEPI.getPointerOperand()) && | |||
3608 | foldGEPPhi(GEPI)) | |||
3609 | return true; | |||
3610 | ||||
3611 | enqueueUsers(GEPI); | |||
3612 | return false; | |||
3613 | } | |||
3614 | ||||
3615 | bool visitPHINode(PHINode &PN) { | |||
3616 | enqueueUsers(PN); | |||
3617 | return false; | |||
3618 | } | |||
3619 | ||||
3620 | bool visitSelectInst(SelectInst &SI) { | |||
3621 | enqueueUsers(SI); | |||
3622 | return false; | |||
3623 | } | |||
3624 | }; | |||
3625 | ||||
3626 | } // end anonymous namespace | |||
3627 | ||||
3628 | /// Strip aggregate type wrapping. | |||
3629 | /// | |||
3630 | /// This removes no-op aggregate types wrapping an underlying type. It will | |||
3631 | /// strip as many layers of types as it can without changing either the type | |||
3632 | /// size or the allocated size. | |||
3633 | static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { | |||
3634 | if (Ty->isSingleValueType()) | |||
3635 | return Ty; | |||
3636 | ||||
3637 | uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize(); | |||
3638 | uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize(); | |||
3639 | ||||
3640 | Type *InnerTy; | |||
3641 | if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { | |||
3642 | InnerTy = ArrTy->getElementType(); | |||
3643 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { | |||
3644 | const StructLayout *SL = DL.getStructLayout(STy); | |||
3645 | unsigned Index = SL->getElementContainingOffset(0); | |||
3646 | InnerTy = STy->getElementType(Index); | |||
3647 | } else { | |||
3648 | return Ty; | |||
3649 | } | |||
3650 | ||||
3651 | if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() || | |||
3652 | TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize()) | |||
3653 | return Ty; | |||
3654 | ||||
3655 | return stripAggregateTypeWrapping(DL, InnerTy); | |||
3656 | } | |||
3657 | ||||
3658 | /// Try to find a partition of the aggregate type passed in for a given | |||
3659 | /// offset and size. | |||
3660 | /// | |||
3661 | /// This recurses through the aggregate type and tries to compute a subtype | |||
3662 | /// based on the offset and size. When the offset and size span a sub-section | |||
3663 | /// of an array, it will even compute a new array type for that sub-section, | |||
3664 | /// and the same for structs. | |||
3665 | /// | |||
3666 | /// Note that this routine is very strict and tries to find a partition of the | |||
3667 | /// type which produces the *exact* right offset and size. It is not forgiving | |||
3668 | /// when the size or offset cause either end of type-based partition to be off. | |||
3669 | /// Also, this is a best-effort routine. It is reasonable to give up and not | |||
3670 | /// return a type if necessary. | |||
3671 | static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, | |||
3672 | uint64_t Size) { | |||
3673 | if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedSize() == Size) | |||
3674 | return stripAggregateTypeWrapping(DL, Ty); | |||
3675 | if (Offset > DL.getTypeAllocSize(Ty).getFixedSize() || | |||
3676 | (DL.getTypeAllocSize(Ty).getFixedSize() - Offset) < Size) | |||
3677 | return nullptr; | |||
3678 | ||||
3679 | if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) { | |||
3680 | Type *ElementTy; | |||
3681 | uint64_t TyNumElements; | |||
3682 | if (auto *AT = dyn_cast<ArrayType>(Ty)) { | |||
3683 | ElementTy = AT->getElementType(); | |||
3684 | TyNumElements = AT->getNumElements(); | |||
3685 | } else { | |||
3686 | // FIXME: This isn't right for vectors with non-byte-sized or | |||
3687 | // non-power-of-two sized elements. | |||
3688 | auto *VT = cast<FixedVectorType>(Ty); | |||
3689 | ElementTy = VT->getElementType(); | |||
3690 | TyNumElements = VT->getNumElements(); | |||
3691 | } | |||
3692 | uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize(); | |||
3693 | uint64_t NumSkippedElements = Offset / ElementSize; | |||
3694 | if (NumSkippedElements >= TyNumElements) | |||
3695 | return nullptr; | |||
3696 | Offset -= NumSkippedElements * ElementSize; | |||
3697 | ||||
3698 | // First check if we need to recurse. | |||
3699 | if (Offset > 0 || Size < ElementSize) { | |||
3700 | // Bail if the partition ends in a different array element. | |||
3701 | if ((Offset + Size) > ElementSize) | |||
3702 | return nullptr; | |||
3703 | // Recurse through the element type trying to peel off offset bytes. | |||
3704 | return getTypePartition(DL, ElementTy, Offset, Size); | |||
3705 | } | |||
3706 | assert(Offset == 0)((void)0); | |||
3707 | ||||
3708 | if (Size == ElementSize) | |||
3709 | return stripAggregateTypeWrapping(DL, ElementTy); | |||
3710 | assert(Size > ElementSize)((void)0); | |||
3711 | uint64_t NumElements = Size / ElementSize; | |||
3712 | if (NumElements * ElementSize != Size) | |||
3713 | return nullptr; | |||
3714 | return ArrayType::get(ElementTy, NumElements); | |||
3715 | } | |||
3716 | ||||
3717 | StructType *STy = dyn_cast<StructType>(Ty); | |||
3718 | if (!STy) | |||
3719 | return nullptr; | |||
3720 | ||||
3721 | const StructLayout *SL = DL.getStructLayout(STy); | |||
3722 | if (Offset >= SL->getSizeInBytes()) | |||
3723 | return nullptr; | |||
3724 | uint64_t EndOffset = Offset + Size; | |||
3725 | if (EndOffset > SL->getSizeInBytes()) | |||
3726 | return nullptr; | |||
3727 | ||||
3728 | unsigned Index = SL->getElementContainingOffset(Offset); | |||
3729 | Offset -= SL->getElementOffset(Index); | |||
3730 | ||||
3731 | Type *ElementTy = STy->getElementType(Index); | |||
3732 | uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize(); | |||
3733 | if (Offset >= ElementSize) | |||
3734 | return nullptr; // The offset points into alignment padding. | |||
3735 | ||||
3736 | // See if any partition must be contained by the element. | |||
3737 | if (Offset > 0 || Size < ElementSize) { | |||
3738 | if ((Offset + Size) > ElementSize) | |||
3739 | return nullptr; | |||
3740 | return getTypePartition(DL, ElementTy, Offset, Size); | |||
3741 | } | |||
3742 | assert(Offset == 0)((void)0); | |||
3743 | ||||
3744 | if (Size == ElementSize) | |||
3745 | return stripAggregateTypeWrapping(DL, ElementTy); | |||
3746 | ||||
3747 | StructType::element_iterator EI = STy->element_begin() + Index, | |||
3748 | EE = STy->element_end(); | |||
3749 | if (EndOffset < SL->getSizeInBytes()) { | |||
3750 | unsigned EndIndex = SL->getElementContainingOffset(EndOffset); | |||
3751 | if (Index == EndIndex) | |||
3752 | return nullptr; // Within a single element and its padding. | |||
3753 | ||||
3754 | // Don't try to form "natural" types if the elements don't line up with the | |||
3755 | // expected size. | |||
3756 | // FIXME: We could potentially recurse down through the last element in the | |||
3757 | // sub-struct to find a natural end point. | |||
3758 | if (SL->getElementOffset(EndIndex) != EndOffset) | |||
3759 | return nullptr; | |||
3760 | ||||
3761 | assert(Index < EndIndex)((void)0); | |||
3762 | EE = STy->element_begin() + EndIndex; | |||
3763 | } | |||
3764 | ||||
3765 | // Try to build up a sub-structure. | |||
3766 | StructType *SubTy = | |||
3767 | StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked()); | |||
3768 | const StructLayout *SubSL = DL.getStructLayout(SubTy); | |||
3769 | if (Size != SubSL->getSizeInBytes()) | |||
3770 | return nullptr; // The sub-struct doesn't have quite the size needed. | |||
3771 | ||||
3772 | return SubTy; | |||
3773 | } | |||
3774 | ||||
3775 | /// Pre-split loads and stores to simplify rewriting. | |||
3776 | /// | |||
3777 | /// We want to break up the splittable load+store pairs as much as | |||
3778 | /// possible. This is important to do as a preprocessing step, as once we | |||
3779 | /// start rewriting the accesses to partitions of the alloca we lose the | |||
3780 | /// necessary information to correctly split apart paired loads and stores | |||
3781 | /// which both point into this alloca. The case to consider is something like | |||
3782 | /// the following: | |||
3783 | /// | |||
3784 | /// %a = alloca [12 x i8] | |||
3785 | /// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0 | |||
3786 | /// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4 | |||
3787 | /// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8 | |||
3788 | /// %iptr1 = bitcast i8* %gep1 to i64* | |||
3789 | /// %iptr2 = bitcast i8* %gep2 to i64* | |||
3790 | /// %fptr1 = bitcast i8* %gep1 to float* | |||
3791 | /// %fptr2 = bitcast i8* %gep2 to float* | |||
3792 | /// %fptr3 = bitcast i8* %gep3 to float* | |||
3793 | /// store float 0.0, float* %fptr1 | |||
3794 | /// store float 1.0, float* %fptr2 | |||
3795 | /// %v = load i64* %iptr1 | |||
3796 | /// store i64 %v, i64* %iptr2 | |||
3797 | /// %f1 = load float* %fptr2 | |||
3798 | /// %f2 = load float* %fptr3 | |||
3799 | /// | |||
3800 | /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and | |||
3801 | /// promote everything so we recover the 2 SSA values that should have been | |||
3802 | /// there all along. | |||
3803 | /// | |||
3804 | /// \returns true if any changes are made. | |||
3805 | bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { | |||
3806 | LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n")do { } while (false); | |||
3807 | ||||
3808 | // Track the loads and stores which are candidates for pre-splitting here, in | |||
3809 | // the order they first appear during the partition scan. These give stable | |||
3810 | // iteration order and a basis for tracking which loads and stores we | |||
3811 | // actually split. | |||
3812 | SmallVector<LoadInst *, 4> Loads; | |||
3813 | SmallVector<StoreInst *, 4> Stores; | |||
3814 | ||||
3815 | // We need to accumulate the splits required of each load or store where we | |||
3816 | // can find them via a direct lookup. This is important to cross-check loads | |||
3817 | // and stores against each other. We also track the slice so that we can kill | |||
3818 | // all the slices that end up split. | |||
3819 | struct SplitOffsets { | |||
3820 | Slice *S; | |||
3821 | std::vector<uint64_t> Splits; | |||
3822 | }; | |||
3823 | SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap; | |||
3824 | ||||
3825 | // Track loads out of this alloca which cannot, for any reason, be pre-split. | |||
3826 | // This is important as we also cannot pre-split stores of those loads! | |||
3827 | // FIXME: This is all pretty gross. It means that we can be more aggressive | |||
3828 | // in pre-splitting when the load feeding the store happens to come from | |||
3829 | // a separate alloca. Put another way, the effectiveness of SROA would be | |||
3830 | // decreased by a frontend which just concatenated all of its local allocas | |||
3831 | // into one big flat alloca. But defeating such patterns is exactly the job | |||
3832 | // SROA is tasked with! Sadly, to not have this discrepancy we would have | |||
3833 | // change store pre-splitting to actually force pre-splitting of the load | |||
3834 | // that feeds it *and all stores*. That makes pre-splitting much harder, but | |||
3835 | // maybe it would make it more principled? | |||
3836 | SmallPtrSet<LoadInst *, 8> UnsplittableLoads; | |||
3837 | ||||
3838 | LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n")do { } while (false); | |||
3839 | for (auto &P : AS.partitions()) { | |||
3840 | for (Slice &S : P) { | |||
3841 | Instruction *I = cast<Instruction>(S.getUse()->getUser()); | |||
3842 | if (!S.isSplittable() || S.endOffset() <= P.endOffset()) { | |||
3843 | // If this is a load we have to track that it can't participate in any | |||
3844 | // pre-splitting. If this is a store of a load we have to track that | |||
3845 | // that load also can't participate in any pre-splitting. | |||
3846 | if (auto *LI = dyn_cast<LoadInst>(I)) | |||
3847 | UnsplittableLoads.insert(LI); | |||
3848 | else if (auto *SI = dyn_cast<StoreInst>(I)) | |||
3849 | if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand())) | |||
3850 | UnsplittableLoads.insert(LI); | |||
3851 | continue; | |||
3852 | } | |||
3853 | assert(P.endOffset() > S.beginOffset() &&((void)0) | |||
3854 | "Empty or backwards partition!")((void)0); | |||
3855 | ||||
3856 | // Determine if this is a pre-splittable slice. | |||
3857 | if (auto *LI = dyn_cast<LoadInst>(I)) { | |||
3858 | assert(!LI->isVolatile() && "Cannot split volatile loads!")((void)0); | |||
3859 | ||||
3860 | // The load must be used exclusively to store into other pointers for | |||
3861 | // us to be able to arbitrarily pre-split it. The stores must also be | |||
3862 | // simple to avoid changing semantics. | |||
3863 | auto IsLoadSimplyStored = [](LoadInst *LI) { | |||
3864 | for (User *LU : LI->users()) { | |||
3865 | auto *SI = dyn_cast<StoreInst>(LU); | |||
3866 | if (!SI || !SI->isSimple()) | |||
3867 | return false; | |||
3868 | } | |||
3869 | return true; | |||
3870 | }; | |||
3871 | if (!IsLoadSimplyStored(LI)) { | |||
3872 | UnsplittableLoads.insert(LI); | |||
3873 | continue; | |||
3874 | } | |||
3875 | ||||
3876 | Loads.push_back(LI); | |||
3877 | } else if (auto *SI = dyn_cast<StoreInst>(I)) { | |||
3878 | if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex())) | |||
3879 | // Skip stores *of* pointers. FIXME: This shouldn't even be possible! | |||
3880 | continue; | |||
3881 | auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand()); | |||
3882 | if (!StoredLoad || !StoredLoad->isSimple()) | |||
3883 | continue; | |||
3884 | assert(!SI->isVolatile() && "Cannot split volatile stores!")((void)0); | |||
3885 | ||||
3886 | Stores.push_back(SI); | |||
3887 | } else { | |||
3888 | // Other uses cannot be pre-split. | |||
3889 | continue; | |||
3890 | } | |||
3891 | ||||
3892 | // Record the initial split. | |||
3893 | LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n")do { } while (false); | |||
3894 | auto &Offsets = SplitOffsetsMap[I]; | |||
3895 | assert(Offsets.Splits.empty() &&((void)0) | |||
3896 | "Should not have splits the first time we see an instruction!")((void)0); | |||
3897 | Offsets.S = &S; | |||
3898 | Offsets.Splits.push_back(P.endOffset() - S.beginOffset()); | |||
3899 | } | |||
3900 | ||||
3901 | // Now scan the already split slices, and add a split for any of them which | |||
3902 | // we're going to pre-split. | |||
3903 | for (Slice *S : P.splitSliceTails()) { | |||
3904 | auto SplitOffsetsMapI = | |||
3905 | SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser())); | |||
3906 | if (SplitOffsetsMapI == SplitOffsetsMap.end()) | |||
3907 | continue; | |||
3908 | auto &Offsets = SplitOffsetsMapI->second; | |||
3909 | ||||
3910 | assert(Offsets.S == S && "Found a mismatched slice!")((void)0); | |||
3911 | assert(!Offsets.Splits.empty() &&((void)0) | |||
3912 | "Cannot have an empty set of splits on the second partition!")((void)0); | |||
3913 | assert(Offsets.Splits.back() ==((void)0) | |||
3914 | P.beginOffset() - Offsets.S->beginOffset() &&((void)0) | |||
3915 | "Previous split does not end where this one begins!")((void)0); | |||
3916 | ||||
3917 | // Record each split. The last partition's end isn't needed as the size | |||
3918 | // of the slice dictates that. | |||
3919 | if (S->endOffset() > P.endOffset()) | |||
3920 | Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset()); | |||
3921 | } | |||
3922 | } | |||
3923 | ||||
3924 | // We may have split loads where some of their stores are split stores. For | |||
3925 | // such loads and stores, we can only pre-split them if their splits exactly | |||
3926 | // match relative to their starting offset. We have to verify this prior to | |||
3927 | // any rewriting. | |||
3928 | llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { | |||
3929 | // Lookup the load we are storing in our map of split | |||
3930 | // offsets. | |||
3931 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
3932 | // If it was completely unsplittable, then we're done, | |||
3933 | // and this store can't be pre-split. | |||
3934 | if (UnsplittableLoads.count(LI)) | |||
3935 | return true; | |||
3936 | ||||
3937 | auto LoadOffsetsI = SplitOffsetsMap.find(LI); | |||
3938 | if (LoadOffsetsI == SplitOffsetsMap.end()) | |||
3939 | return false; // Unrelated loads are definitely safe. | |||
3940 | auto &LoadOffsets = LoadOffsetsI->second; | |||
3941 | ||||
3942 | // Now lookup the store's offsets. | |||
3943 | auto &StoreOffsets = SplitOffsetsMap[SI]; | |||
3944 | ||||
3945 | // If the relative offsets of each split in the load and | |||
3946 | // store match exactly, then we can split them and we | |||
3947 | // don't need to remove them here. | |||
3948 | if (LoadOffsets.Splits == StoreOffsets.Splits) | |||
3949 | return false; | |||
3950 | ||||
3951 | LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"do { } while (false) | |||
3952 | << " " << *LI << "\n"do { } while (false) | |||
3953 | << " " << *SI << "\n")do { } while (false); | |||
3954 | ||||
3955 | // We've found a store and load that we need to split | |||
3956 | // with mismatched relative splits. Just give up on them | |||
3957 | // and remove both instructions from our list of | |||
3958 | // candidates. | |||
3959 | UnsplittableLoads.insert(LI); | |||
3960 | return true; | |||
3961 | }); | |||
3962 | // Now we have to go *back* through all the stores, because a later store may | |||
3963 | // have caused an earlier store's load to become unsplittable and if it is | |||
3964 | // unsplittable for the later store, then we can't rely on it being split in | |||
3965 | // the earlier store either. | |||
3966 | llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) { | |||
3967 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
3968 | return UnsplittableLoads.count(LI); | |||
3969 | }); | |||
3970 | // Once we've established all the loads that can't be split for some reason, | |||
3971 | // filter any that made it into our list out. | |||
3972 | llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) { | |||
3973 | return UnsplittableLoads.count(LI); | |||
3974 | }); | |||
3975 | ||||
3976 | // If no loads or stores are left, there is no pre-splitting to be done for | |||
3977 | // this alloca. | |||
3978 | if (Loads.empty() && Stores.empty()) | |||
3979 | return false; | |||
3980 | ||||
3981 | // From here on, we can't fail and will be building new accesses, so rig up | |||
3982 | // an IR builder. | |||
3983 | IRBuilderTy IRB(&AI); | |||
3984 | ||||
3985 | // Collect the new slices which we will merge into the alloca slices. | |||
3986 | SmallVector<Slice, 4> NewSlices; | |||
3987 | ||||
3988 | // Track any allocas we end up splitting loads and stores for so we iterate | |||
3989 | // on them. | |||
3990 | SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas; | |||
3991 | ||||
3992 | // At this point, we have collected all of the loads and stores we can | |||
3993 | // pre-split, and the specific splits needed for them. We actually do the | |||
3994 | // splitting in a specific order in order to handle when one of the loads in | |||
3995 | // the value operand to one of the stores. | |||
3996 | // | |||
3997 | // First, we rewrite all of the split loads, and just accumulate each split | |||
3998 | // load in a parallel structure. We also build the slices for them and append | |||
3999 | // them to the alloca slices. | |||
4000 | SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap; | |||
4001 | std::vector<LoadInst *> SplitLoads; | |||
4002 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||
4003 | for (LoadInst *LI : Loads) { | |||
4004 | SplitLoads.clear(); | |||
4005 | ||||
4006 | IntegerType *Ty = cast<IntegerType>(LI->getType()); | |||
4007 | assert(Ty->getBitWidth() % 8 == 0)((void)0); | |||
4008 | uint64_t LoadSize = Ty->getBitWidth() / 8; | |||
4009 | assert(LoadSize > 0 && "Cannot have a zero-sized integer load!")((void)0); | |||
4010 | ||||
4011 | auto &Offsets = SplitOffsetsMap[LI]; | |||
4012 | assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&((void)0) | |||
4013 | "Slice size should always match load size exactly!")((void)0); | |||
4014 | uint64_t BaseOffset = Offsets.S->beginOffset(); | |||
4015 | assert(BaseOffset + LoadSize > BaseOffset &&((void)0) | |||
4016 | "Cannot represent alloca access size using 64-bit integers!")((void)0); | |||
4017 | ||||
4018 | Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand()); | |||
4019 | IRB.SetInsertPoint(LI); | |||
4020 | ||||
4021 | LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n")do { } while (false); | |||
4022 | ||||
4023 | uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); | |||
4024 | int Idx = 0, Size = Offsets.Splits.size(); | |||
4025 | for (;;) { | |||
4026 | auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); | |||
4027 | auto AS = LI->getPointerAddressSpace(); | |||
4028 | auto *PartPtrTy = PartTy->getPointerTo(AS); | |||
4029 | LoadInst *PLoad = IRB.CreateAlignedLoad( | |||
4030 | PartTy, | |||
4031 | getAdjustedPtr(IRB, DL, BasePtr, | |||
4032 | APInt(DL.getIndexSizeInBits(AS), PartOffset), | |||
4033 | PartPtrTy, BasePtr->getName() + "."), | |||
4034 | getAdjustedAlignment(LI, PartOffset), | |||
4035 | /*IsVolatile*/ false, LI->getName()); | |||
4036 | PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, | |||
4037 | LLVMContext::MD_access_group}); | |||
4038 | ||||
4039 | // Append this load onto the list of split loads so we can find it later | |||
4040 | // to rewrite the stores. | |||
4041 | SplitLoads.push_back(PLoad); | |||
4042 | ||||
4043 | // Now build a new slice for the alloca. | |||
4044 | NewSlices.push_back( | |||
4045 | Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, | |||
4046 | &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), | |||
4047 | /*IsSplittable*/ false)); | |||
4048 | LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { } while (false) | |||
4049 | << ", " << NewSlices.back().endOffset()do { } while (false) | |||
4050 | << "): " << *PLoad << "\n")do { } while (false); | |||
4051 | ||||
4052 | // See if we've handled all the splits. | |||
4053 | if (Idx >= Size) | |||
4054 | break; | |||
4055 | ||||
4056 | // Setup the next partition. | |||
4057 | PartOffset = Offsets.Splits[Idx]; | |||
4058 | ++Idx; | |||
4059 | PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset; | |||
4060 | } | |||
4061 | ||||
4062 | // Now that we have the split loads, do the slow walk over all uses of the | |||
4063 | // load and rewrite them as split stores, or save the split loads to use | |||
4064 | // below if the store is going to be split there anyways. | |||
4065 | bool DeferredStores = false; | |||
4066 | for (User *LU : LI->users()) { | |||
4067 | StoreInst *SI = cast<StoreInst>(LU); | |||
4068 | if (!Stores.empty() && SplitOffsetsMap.count(SI)) { | |||
4069 | DeferredStores = true; | |||
4070 | LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SIdo { } while (false) | |||
4071 | << "\n")do { } while (false); | |||
4072 | continue; | |||
4073 | } | |||
4074 | ||||
4075 | Value *StoreBasePtr = SI->getPointerOperand(); | |||
4076 | IRB.SetInsertPoint(SI); | |||
4077 | ||||
4078 | LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n")do { } while (false); | |||
4079 | ||||
4080 | for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { | |||
4081 | LoadInst *PLoad = SplitLoads[Idx]; | |||
4082 | uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; | |||
4083 | auto *PartPtrTy = | |||
4084 | PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); | |||
4085 | ||||
4086 | auto AS = SI->getPointerAddressSpace(); | |||
4087 | StoreInst *PStore = IRB.CreateAlignedStore( | |||
4088 | PLoad, | |||
4089 | getAdjustedPtr(IRB, DL, StoreBasePtr, | |||
4090 | APInt(DL.getIndexSizeInBits(AS), PartOffset), | |||
4091 | PartPtrTy, StoreBasePtr->getName() + "."), | |||
4092 | getAdjustedAlignment(SI, PartOffset), | |||
4093 | /*IsVolatile*/ false); | |||
4094 | PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, | |||
4095 | LLVMContext::MD_access_group}); | |||
4096 | LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n")do { } while (false); | |||
4097 | } | |||
4098 | ||||
4099 | // We want to immediately iterate on any allocas impacted by splitting | |||
4100 | // this store, and we have to track any promotable alloca (indicated by | |||
4101 | // a direct store) as needing to be resplit because it is no longer | |||
4102 | // promotable. | |||
4103 | if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) { | |||
4104 | ResplitPromotableAllocas.insert(OtherAI); | |||
4105 | Worklist.insert(OtherAI); | |||
4106 | } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( | |||
4107 | StoreBasePtr->stripInBoundsOffsets())) { | |||
4108 | Worklist.insert(OtherAI); | |||
4109 | } | |||
4110 | ||||
4111 | // Mark the original store as dead. | |||
4112 | DeadInsts.push_back(SI); | |||
4113 | } | |||
4114 | ||||
4115 | // Save the split loads if there are deferred stores among the users. | |||
4116 | if (DeferredStores) | |||
4117 | SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads))); | |||
4118 | ||||
4119 | // Mark the original load as dead and kill the original slice. | |||
4120 | DeadInsts.push_back(LI); | |||
4121 | Offsets.S->kill(); | |||
4122 | } | |||
4123 | ||||
4124 | // Second, we rewrite all of the split stores. At this point, we know that | |||
4125 | // all loads from this alloca have been split already. For stores of such | |||
4126 | // loads, we can simply look up the pre-existing split loads. For stores of | |||
4127 | // other loads, we split those loads first and then write split stores of | |||
4128 | // them. | |||
4129 | for (StoreInst *SI : Stores) { | |||
4130 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
4131 | IntegerType *Ty = cast<IntegerType>(LI->getType()); | |||
4132 | assert(Ty->getBitWidth() % 8 == 0)((void)0); | |||
4133 | uint64_t StoreSize = Ty->getBitWidth() / 8; | |||
4134 | assert(StoreSize > 0 && "Cannot have a zero-sized integer store!")((void)0); | |||
4135 | ||||
4136 | auto &Offsets = SplitOffsetsMap[SI]; | |||
4137 | assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&((void)0) | |||
4138 | "Slice size should always match load size exactly!")((void)0); | |||
4139 | uint64_t BaseOffset = Offsets.S->beginOffset(); | |||
4140 | assert(BaseOffset + StoreSize > BaseOffset &&((void)0) | |||
4141 | "Cannot represent alloca access size using 64-bit integers!")((void)0); | |||
4142 | ||||
4143 | Value *LoadBasePtr = LI->getPointerOperand(); | |||
4144 | Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand()); | |||
4145 | ||||
4146 | LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n")do { } while (false); | |||
4147 | ||||
4148 | // Check whether we have an already split load. | |||
4149 | auto SplitLoadsMapI = SplitLoadsMap.find(LI); | |||
4150 | std::vector<LoadInst *> *SplitLoads = nullptr; | |||
4151 | if (SplitLoadsMapI != SplitLoadsMap.end()) { | |||
4152 | SplitLoads = &SplitLoadsMapI->second; | |||
4153 | assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&((void)0) | |||
4154 | "Too few split loads for the number of splits in the store!")((void)0); | |||
4155 | } else { | |||
4156 | LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n")do { } while (false); | |||
4157 | } | |||
4158 | ||||
4159 | uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); | |||
4160 | int Idx = 0, Size = Offsets.Splits.size(); | |||
4161 | for (;;) { | |||
4162 | auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); | |||
4163 | auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); | |||
4164 | auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); | |||
4165 | ||||
4166 | // Either lookup a split load or create one. | |||
4167 | LoadInst *PLoad; | |||
4168 | if (SplitLoads) { | |||
4169 | PLoad = (*SplitLoads)[Idx]; | |||
4170 | } else { | |||
4171 | IRB.SetInsertPoint(LI); | |||
4172 | auto AS = LI->getPointerAddressSpace(); | |||
4173 | PLoad = IRB.CreateAlignedLoad( | |||
4174 | PartTy, | |||
4175 | getAdjustedPtr(IRB, DL, LoadBasePtr, | |||
4176 | APInt(DL.getIndexSizeInBits(AS), PartOffset), | |||
4177 | LoadPartPtrTy, LoadBasePtr->getName() + "."), | |||
4178 | getAdjustedAlignment(LI, PartOffset), | |||
4179 | /*IsVolatile*/ false, LI->getName()); | |||
4180 | PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, | |||
4181 | LLVMContext::MD_access_group}); | |||
4182 | } | |||
4183 | ||||
4184 | // And store this partition. | |||
4185 | IRB.SetInsertPoint(SI); | |||
4186 | auto AS = SI->getPointerAddressSpace(); | |||
4187 | StoreInst *PStore = IRB.CreateAlignedStore( | |||
4188 | PLoad, | |||
4189 | getAdjustedPtr(IRB, DL, StoreBasePtr, | |||
4190 | APInt(DL.getIndexSizeInBits(AS), PartOffset), | |||
4191 | StorePartPtrTy, StoreBasePtr->getName() + "."), | |||
4192 | getAdjustedAlignment(SI, PartOffset), | |||
4193 | /*IsVolatile*/ false); | |||
4194 | PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, | |||
4195 | LLVMContext::MD_access_group}); | |||
4196 | ||||
4197 | // Now build a new slice for the alloca. | |||
4198 | NewSlices.push_back( | |||
4199 | Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, | |||
4200 | &PStore->getOperandUse(PStore->getPointerOperandIndex()), | |||
4201 | /*IsSplittable*/ false)); | |||
4202 | LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()do { } while (false) | |||
4203 | << ", " << NewSlices.back().endOffset()do { } while (false) | |||
4204 | << "): " << *PStore << "\n")do { } while (false); | |||
4205 | if (!SplitLoads) { | |||
4206 | LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n")do { } while (false); | |||
4207 | } | |||
4208 | ||||
4209 | // See if we've finished all the splits. | |||
4210 | if (Idx >= Size) | |||
4211 | break; | |||
4212 | ||||
4213 | // Setup the next partition. | |||
4214 | PartOffset = Offsets.Splits[Idx]; | |||
4215 | ++Idx; | |||
4216 | PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset; | |||
4217 | } | |||
4218 | ||||
4219 | // We want to immediately iterate on any allocas impacted by splitting | |||
4220 | // this load, which is only relevant if it isn't a load of this alloca and | |||
4221 | // thus we didn't already split the loads above. We also have to keep track | |||
4222 | // of any promotable allocas we split loads on as they can no longer be | |||
4223 | // promoted. | |||
4224 | if (!SplitLoads) { | |||
4225 | if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) { | |||
4226 | assert(OtherAI != &AI && "We can't re-split our own alloca!")((void)0); | |||
4227 | ResplitPromotableAllocas.insert(OtherAI); | |||
4228 | Worklist.insert(OtherAI); | |||
4229 | } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>( | |||
4230 | LoadBasePtr->stripInBoundsOffsets())) { | |||
4231 | assert(OtherAI != &AI && "We can't re-split our own alloca!")((void)0); | |||
4232 | Worklist.insert(OtherAI); | |||
4233 | } | |||
4234 | } | |||
4235 | ||||
4236 | // Mark the original store as dead now that we've split it up and kill its | |||
4237 | // slice. Note that we leave the original load in place unless this store | |||
4238 | // was its only use. It may in turn be split up if it is an alloca load | |||
4239 | // for some other alloca, but it may be a normal load. This may introduce | |||
4240 | // redundant loads, but where those can be merged the rest of the optimizer | |||
4241 | // should handle the merging, and this uncovers SSA splits which is more | |||
4242 | // important. In practice, the original loads will almost always be fully | |||
4243 | // split and removed eventually, and the splits will be merged by any | |||
4244 | // trivial CSE, including instcombine. | |||
4245 | if (LI->hasOneUse()) { | |||
4246 | assert(*LI->user_begin() == SI && "Single use isn't this store!")((void)0); | |||
4247 | DeadInsts.push_back(LI); | |||
4248 | } | |||
4249 | DeadInsts.push_back(SI); | |||
4250 | Offsets.S->kill(); | |||
4251 | } | |||
4252 | ||||
4253 | // Remove the killed slices that have ben pre-split. | |||
4254 | llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); }); | |||
4255 | ||||
4256 | // Insert our new slices. This will sort and merge them into the sorted | |||
4257 | // sequence. | |||
4258 | AS.insert(NewSlices); | |||
4259 | ||||
4260 | LLVM_DEBUG(dbgs() << " Pre-split slices:\n")do { } while (false); | |||
4261 | #ifndef NDEBUG1 | |||
4262 | for (auto I = AS.begin(), E = AS.end(); I != E; ++I) | |||
4263 | LLVM_DEBUG(AS.print(dbgs(), I, " "))do { } while (false); | |||
4264 | #endif | |||
4265 | ||||
4266 | // Finally, don't try to promote any allocas that new require re-splitting. | |||
4267 | // They have already been added to the worklist above. | |||
4268 | llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) { | |||
4269 | return ResplitPromotableAllocas.count(AI); | |||
4270 | }); | |||
4271 | ||||
4272 | return true; | |||
4273 | } | |||
4274 | ||||
4275 | /// Rewrite an alloca partition's users. | |||
4276 | /// | |||
4277 | /// This routine drives both of the rewriting goals of the SROA pass. It tries | |||
4278 | /// to rewrite uses of an alloca partition to be conducive for SSA value | |||
4279 | /// promotion. If the partition needs a new, more refined alloca, this will | |||
4280 | /// build that new alloca, preserving as much type information as possible, and | |||
4281 | /// rewrite the uses of the old alloca to point at the new one and have the | |||
4282 | /// appropriate new offsets. It also evaluates how successful the rewrite was | |||
4283 | /// at enabling promotion and if it was successful queues the alloca to be | |||
4284 | /// promoted. | |||
4285 | AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, | |||
4286 | Partition &P) { | |||
4287 | // Try to compute a friendly type for this partition of the alloca. This | |||
4288 | // won't always succeed, in which case we fall back to a legal integer type | |||
4289 | // or an i8 array of an appropriate size. | |||
4290 | Type *SliceTy = nullptr; | |||
4291 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||
4292 | std::pair<Type *, IntegerType *> CommonUseTy = | |||
4293 | findCommonType(P.begin(), P.end(), P.endOffset()); | |||
4294 | // Do all uses operate on the same type? | |||
4295 | if (CommonUseTy.first) | |||
4296 | if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) | |||
4297 | SliceTy = CommonUseTy.first; | |||
4298 | // If not, can we find an appropriate subtype in the original allocated type? | |||
4299 | if (!SliceTy) | |||
4300 | if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), | |||
4301 | P.beginOffset(), P.size())) | |||
4302 | SliceTy = TypePartitionTy; | |||
4303 | // If still not, can we use the largest bitwidth integer type used? | |||
4304 | if (!SliceTy && CommonUseTy.second) | |||
4305 | if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) | |||
4306 | SliceTy = CommonUseTy.second; | |||
4307 | if ((!SliceTy || (SliceTy->isArrayTy() && | |||
4308 | SliceTy->getArrayElementType()->isIntegerTy())) && | |||
4309 | DL.isLegalInteger(P.size() * 8)) | |||
4310 | SliceTy = Type::getIntNTy(*C, P.size() * 8); | |||
4311 | if (!SliceTy) | |||
4312 | SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); | |||
4313 | assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size())((void)0); | |||
4314 | ||||
4315 | bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); | |||
4316 | ||||
4317 | VectorType *VecTy = | |||
4318 | IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); | |||
4319 | if (VecTy) | |||
4320 | SliceTy = VecTy; | |||
4321 | ||||
4322 | // Check for the case where we're going to rewrite to a new alloca of the | |||
4323 | // exact same type as the original, and with the same access offsets. In that | |||
4324 | // case, re-use the existing alloca, but still run through the rewriter to | |||
4325 | // perform phi and select speculation. | |||
4326 | // P.beginOffset() can be non-zero even with the same type in a case with | |||
4327 | // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll). | |||
4328 | AllocaInst *NewAI; | |||
4329 | if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) { | |||
4330 | NewAI = &AI; | |||
4331 | // FIXME: We should be able to bail at this point with "nothing changed". | |||
4332 | // FIXME: We might want to defer PHI speculation until after here. | |||
4333 | // FIXME: return nullptr; | |||
4334 | } else { | |||
4335 | // Make sure the alignment is compatible with P.beginOffset(). | |||
4336 | const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset()); | |||
4337 | // If we will get at least this much alignment from the type alone, leave | |||
4338 | // the alloca's alignment unconstrained. | |||
4339 | const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy); | |||
4340 | NewAI = new AllocaInst( | |||
4341 | SliceTy, AI.getType()->getAddressSpace(), nullptr, | |||
4342 | IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment, | |||
4343 | AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI); | |||
4344 | // Copy the old AI debug location over to the new one. | |||
4345 | NewAI->setDebugLoc(AI.getDebugLoc()); | |||
4346 | ++NumNewAllocas; | |||
4347 | } | |||
4348 | ||||
4349 | LLVM_DEBUG(dbgs() << "Rewriting alloca partition "do { } while (false) | |||
4350 | << "[" << P.beginOffset() << "," << P.endOffset()do { } while (false) | |||
4351 | << ") to: " << *NewAI << "\n")do { } while (false); | |||
4352 | ||||
4353 | // Track the high watermark on the worklist as it is only relevant for | |||
4354 | // promoted allocas. We will reset it to this point if the alloca is not in | |||
4355 | // fact scheduled for promotion. | |||
4356 | unsigned PPWOldSize = PostPromotionWorklist.size(); | |||
4357 | unsigned NumUses = 0; | |||
4358 | SmallSetVector<PHINode *, 8> PHIUsers; | |||
4359 | SmallSetVector<SelectInst *, 8> SelectUsers; | |||
4360 | ||||
4361 | AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), | |||
4362 | P.endOffset(), IsIntegerPromotable, VecTy, | |||
4363 | PHIUsers, SelectUsers); | |||
4364 | bool Promotable = true; | |||
4365 | for (Slice *S : P.splitSliceTails()) { | |||
4366 | Promotable &= Rewriter.visit(S); | |||
4367 | ++NumUses; | |||
4368 | } | |||
4369 | for (Slice &S : P) { | |||
4370 | Promotable &= Rewriter.visit(&S); | |||
4371 | ++NumUses; | |||
4372 | } | |||
4373 | ||||
4374 | NumAllocaPartitionUses += NumUses; | |||
4375 | MaxUsesPerAllocaPartition.updateMax(NumUses); | |||
4376 | ||||
4377 | // Now that we've processed all the slices in the new partition, check if any | |||
4378 | // PHIs or Selects would block promotion. | |||
4379 | for (PHINode *PHI : PHIUsers) | |||
4380 | if (!isSafePHIToSpeculate(*PHI)) { | |||
4381 | Promotable = false; | |||
4382 | PHIUsers.clear(); | |||
4383 | SelectUsers.clear(); | |||
4384 | break; | |||
4385 | } | |||
4386 | ||||
4387 | for (SelectInst *Sel : SelectUsers) | |||
4388 | if (!isSafeSelectToSpeculate(*Sel)) { | |||
4389 | Promotable = false; | |||
4390 | PHIUsers.clear(); | |||
4391 | SelectUsers.clear(); | |||
4392 | break; | |||
4393 | } | |||
4394 | ||||
4395 | if (Promotable) { | |||
4396 | for (Use *U : AS.getDeadUsesIfPromotable()) { | |||
4397 | auto *OldInst = dyn_cast<Instruction>(U->get()); | |||
4398 | Value::dropDroppableUse(*U); | |||
4399 | if (OldInst) | |||
4400 | if (isInstructionTriviallyDead(OldInst)) | |||
4401 | DeadInsts.push_back(OldInst); | |||
4402 | } | |||
4403 | if (PHIUsers.empty() && SelectUsers.empty()) { | |||
4404 | // Promote the alloca. | |||
4405 | PromotableAllocas.push_back(NewAI); | |||
4406 | } else { | |||
4407 | // If we have either PHIs or Selects to speculate, add them to those | |||
4408 | // worklists and re-queue the new alloca so that we promote in on the | |||
4409 | // next iteration. | |||
4410 | for (PHINode *PHIUser : PHIUsers) | |||
4411 | SpeculatablePHIs.insert(PHIUser); | |||
4412 | for (SelectInst *SelectUser : SelectUsers) | |||
4413 | SpeculatableSelects.insert(SelectUser); | |||
4414 | Worklist.insert(NewAI); | |||
4415 | } | |||
4416 | } else { | |||
4417 | // Drop any post-promotion work items if promotion didn't happen. | |||
4418 | while (PostPromotionWorklist.size() > PPWOldSize) | |||
4419 | PostPromotionWorklist.pop_back(); | |||
4420 | ||||
4421 | // We couldn't promote and we didn't create a new partition, nothing | |||
4422 | // happened. | |||
4423 | if (NewAI == &AI) | |||
4424 | return nullptr; | |||
4425 | ||||
4426 | // If we can't promote the alloca, iterate on it to check for new | |||
4427 | // refinements exposed by splitting the current alloca. Don't iterate on an | |||
4428 | // alloca which didn't actually change and didn't get promoted. | |||
4429 | Worklist.insert(NewAI); | |||
4430 | } | |||
4431 | ||||
4432 | return NewAI; | |||
4433 | } | |||
4434 | ||||
4435 | /// Walks the slices of an alloca and form partitions based on them, | |||
4436 | /// rewriting each of their uses. | |||
4437 | bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { | |||
4438 | if (AS.begin() == AS.end()) | |||
4439 | return false; | |||
4440 | ||||
4441 | unsigned NumPartitions = 0; | |||
4442 | bool Changed = false; | |||
4443 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||
4444 | ||||
4445 | // First try to pre-split loads and stores. | |||
4446 | Changed |= presplitLoadsAndStores(AI, AS); | |||
4447 | ||||
4448 | // Now that we have identified any pre-splitting opportunities, | |||
4449 | // mark loads and stores unsplittable except for the following case. | |||
4450 | // We leave a slice splittable if all other slices are disjoint or fully | |||
4451 | // included in the slice, such as whole-alloca loads and stores. | |||
4452 | // If we fail to split these during pre-splitting, we want to force them | |||
4453 | // to be rewritten into a partition. | |||
4454 | bool IsSorted = true; | |||
4455 | ||||
4456 | uint64_t AllocaSize = | |||
4457 | DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize(); | |||
4458 | const uint64_t MaxBitVectorSize = 1024; | |||
4459 | if (AllocaSize <= MaxBitVectorSize) { | |||
4460 | // If a byte boundary is included in any load or store, a slice starting or | |||
4461 | // ending at the boundary is not splittable. | |||
4462 | SmallBitVector SplittableOffset(AllocaSize + 1, true); | |||
4463 | for (Slice &S : AS) | |||
4464 | for (unsigned O = S.beginOffset() + 1; | |||
4465 | O < S.endOffset() && O < AllocaSize; O++) | |||
4466 | SplittableOffset.reset(O); | |||
4467 | ||||
4468 | for (Slice &S : AS) { | |||
4469 | if (!S.isSplittable()) | |||
4470 | continue; | |||
4471 | ||||
4472 | if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) && | |||
4473 | (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()])) | |||
4474 | continue; | |||
4475 | ||||
4476 | if (isa<LoadInst>(S.getUse()->getUser()) || | |||
4477 | isa<StoreInst>(S.getUse()->getUser())) { | |||
4478 | S.makeUnsplittable(); | |||
4479 | IsSorted = false; | |||
4480 | } | |||
4481 | } | |||
4482 | } | |||
4483 | else { | |||
4484 | // We only allow whole-alloca splittable loads and stores | |||
4485 | // for a large alloca to avoid creating too large BitVector. | |||
4486 | for (Slice &S : AS) { | |||
4487 | if (!S.isSplittable()) | |||
4488 | continue; | |||
4489 | ||||
4490 | if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize) | |||
4491 | continue; | |||
4492 | ||||
4493 | if (isa<LoadInst>(S.getUse()->getUser()) || | |||
4494 | isa<StoreInst>(S.getUse()->getUser())) { | |||
4495 | S.makeUnsplittable(); | |||
4496 | IsSorted = false; | |||
4497 | } | |||
4498 | } | |||
4499 | } | |||
4500 | ||||
4501 | if (!IsSorted) | |||
4502 | llvm::sort(AS); | |||
4503 | ||||
4504 | /// Describes the allocas introduced by rewritePartition in order to migrate | |||
4505 | /// the debug info. | |||
4506 | struct Fragment { | |||
4507 | AllocaInst *Alloca; | |||
4508 | uint64_t Offset; | |||
4509 | uint64_t Size; | |||
4510 | Fragment(AllocaInst *AI, uint64_t O, uint64_t S) | |||
4511 | : Alloca(AI), Offset(O), Size(S) {} | |||
4512 | }; | |||
4513 | SmallVector<Fragment, 4> Fragments; | |||
4514 | ||||
4515 | // Rewrite each partition. | |||
4516 | for (auto &P : AS.partitions()) { | |||
4517 | if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) { | |||
4518 | Changed = true; | |||
4519 | if (NewAI != &AI) { | |||
4520 | uint64_t SizeOfByte = 8; | |||
4521 | uint64_t AllocaSize = | |||
4522 | DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedSize(); | |||
4523 | // Don't include any padding. | |||
4524 | uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); | |||
4525 | Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size)); | |||
4526 | } | |||
4527 | } | |||
4528 | ++NumPartitions; | |||
4529 | } | |||
4530 | ||||
4531 | NumAllocaPartitions += NumPartitions; | |||
4532 | MaxPartitionsPerAlloca.updateMax(NumPartitions); | |||
4533 | ||||
4534 | // Migrate debug information from the old alloca to the new alloca(s) | |||
4535 | // and the individual partitions. | |||
4536 | TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI); | |||
4537 | for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) { | |||
4538 | auto *Expr = DbgDeclare->getExpression(); | |||
4539 | DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); | |||
4540 | uint64_t AllocaSize = | |||
4541 | DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize(); | |||
4542 | for (auto Fragment : Fragments) { | |||
4543 | // Create a fragment expression describing the new partition or reuse AI's | |||
4544 | // expression if there is only one partition. | |||
4545 | auto *FragmentExpr = Expr; | |||
4546 | if (Fragment.Size < AllocaSize || Expr->isFragment()) { | |||
4547 | // If this alloca is already a scalar replacement of a larger aggregate, | |||
4548 | // Fragment.Offset describes the offset inside the scalar. | |||
4549 | auto ExprFragment = Expr->getFragmentInfo(); | |||
4550 | uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0; | |||
4551 | uint64_t Start = Offset + Fragment.Offset; | |||
4552 | uint64_t Size = Fragment.Size; | |||
4553 | if (ExprFragment) { | |||
4554 | uint64_t AbsEnd = | |||
4555 | ExprFragment->OffsetInBits + ExprFragment->SizeInBits; | |||
4556 | if (Start >= AbsEnd) | |||
4557 | // No need to describe a SROAed padding. | |||
4558 | continue; | |||
4559 | Size = std::min(Size, AbsEnd - Start); | |||
4560 | } | |||
4561 | // The new, smaller fragment is stenciled out from the old fragment. | |||
4562 | if (auto OrigFragment = FragmentExpr->getFragmentInfo()) { | |||
4563 | assert(Start >= OrigFragment->OffsetInBits &&((void)0) | |||
4564 | "new fragment is outside of original fragment")((void)0); | |||
4565 | Start -= OrigFragment->OffsetInBits; | |||
4566 | } | |||
4567 | ||||
4568 | // The alloca may be larger than the variable. | |||
4569 | auto VarSize = DbgDeclare->getVariable()->getSizeInBits(); | |||
4570 | if (VarSize) { | |||
4571 | if (Size > *VarSize) | |||
4572 | Size = *VarSize; | |||
4573 | if (Size == 0 || Start + Size > *VarSize) | |||
4574 | continue; | |||
4575 | } | |||
4576 | ||||
4577 | // Avoid creating a fragment expression that covers the entire variable. | |||
4578 | if (!VarSize || *VarSize != Size) { | |||
4579 | if (auto E = | |||
4580 | DIExpression::createFragmentExpression(Expr, Start, Size)) | |||
4581 | FragmentExpr = *E; | |||
4582 | else | |||
4583 | continue; | |||
4584 | } | |||
4585 | } | |||
4586 | ||||
4587 | // Remove any existing intrinsics on the new alloca describing | |||
4588 | // the variable fragment. | |||
4589 | for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) { | |||
4590 | auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS, | |||
4591 | const DbgVariableIntrinsic *RHS) { | |||
4592 | return LHS->getVariable() == RHS->getVariable() && | |||
4593 | LHS->getDebugLoc()->getInlinedAt() == | |||
4594 | RHS->getDebugLoc()->getInlinedAt(); | |||
4595 | }; | |||
4596 | if (SameVariableFragment(OldDII, DbgDeclare)) | |||
4597 | OldDII->eraseFromParent(); | |||
4598 | } | |||
4599 | ||||
4600 | DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr, | |||
4601 | DbgDeclare->getDebugLoc(), &AI); | |||
4602 | } | |||
4603 | } | |||
4604 | return Changed; | |||
4605 | } | |||
4606 | ||||
4607 | /// Clobber a use with undef, deleting the used value if it becomes dead. | |||
4608 | void SROA::clobberUse(Use &U) { | |||
4609 | Value *OldV = U; | |||
4610 | // Replace the use with an undef value. | |||
4611 | U = UndefValue::get(OldV->getType()); | |||
4612 | ||||
4613 | // Check for this making an instruction dead. We have to garbage collect | |||
4614 | // all the dead instructions to ensure the uses of any alloca end up being | |||
4615 | // minimal. | |||
4616 | if (Instruction *OldI = dyn_cast<Instruction>(OldV)) | |||
4617 | if (isInstructionTriviallyDead(OldI)) { | |||
4618 | DeadInsts.push_back(OldI); | |||
4619 | } | |||
4620 | } | |||
4621 | ||||
4622 | /// Analyze an alloca for SROA. | |||
4623 | /// | |||
4624 | /// This analyzes the alloca to ensure we can reason about it, builds | |||
4625 | /// the slices of the alloca, and then hands it off to be split and | |||
4626 | /// rewritten as needed. | |||
4627 | bool SROA::runOnAlloca(AllocaInst &AI) { | |||
4628 | LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n")do { } while (false); | |||
4629 | ++NumAllocasAnalyzed; | |||
4630 | ||||
4631 | // Special case dead allocas, as they're trivial. | |||
4632 | if (AI.use_empty()) { | |||
4633 | AI.eraseFromParent(); | |||
4634 | return true; | |||
4635 | } | |||
4636 | const DataLayout &DL = AI.getModule()->getDataLayout(); | |||
4637 | ||||
4638 | // Skip alloca forms that this analysis can't handle. | |||
4639 | auto *AT = AI.getAllocatedType(); | |||
4640 | if (AI.isArrayAllocation() || !AT->isSized() || isa<ScalableVectorType>(AT) || | |||
4641 | DL.getTypeAllocSize(AT).getFixedSize() == 0) | |||
4642 | return false; | |||
4643 | ||||
4644 | bool Changed = false; | |||
4645 | ||||
4646 | // First, split any FCA loads and stores touching this alloca to promote | |||
4647 | // better splitting and promotion opportunities. | |||
4648 | AggLoadStoreRewriter AggRewriter(DL); | |||
4649 | Changed |= AggRewriter.rewrite(AI); | |||
4650 | ||||
4651 | // Build the slices using a recursive instruction-visiting builder. | |||
4652 | AllocaSlices AS(DL, AI); | |||
4653 | LLVM_DEBUG(AS.print(dbgs()))do { } while (false); | |||
4654 | if (AS.isEscaped()) | |||
4655 | return Changed; | |||
4656 | ||||
4657 | // Delete all the dead users of this alloca before splitting and rewriting it. | |||
4658 | for (Instruction *DeadUser : AS.getDeadUsers()) { | |||
4659 | // Free up everything used by this instruction. | |||
4660 | for (Use &DeadOp : DeadUser->operands()) | |||
4661 | clobberUse(DeadOp); | |||
4662 | ||||
4663 | // Now replace the uses of this instruction. | |||
4664 | DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType())); | |||
4665 | ||||
4666 | // And mark it for deletion. | |||
4667 | DeadInsts.push_back(DeadUser); | |||
4668 | Changed = true; | |||
4669 | } | |||
4670 | for (Use *DeadOp : AS.getDeadOperands()) { | |||
4671 | clobberUse(*DeadOp); | |||
4672 | Changed = true; | |||
4673 | } | |||
4674 | ||||
4675 | // No slices to split. Leave the dead alloca for a later pass to clean up. | |||
4676 | if (AS.begin() == AS.end()) | |||
4677 | return Changed; | |||
4678 | ||||
4679 | Changed |= splitAlloca(AI, AS); | |||
4680 | ||||
4681 | LLVM_DEBUG(dbgs() << " Speculating PHIs\n")do { } while (false); | |||
4682 | while (!SpeculatablePHIs.empty()) | |||
4683 | speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val()); | |||
4684 | ||||
4685 | LLVM_DEBUG(dbgs() << " Speculating Selects\n")do { } while (false); | |||
4686 | while (!SpeculatableSelects.empty()) | |||
4687 | speculateSelectInstLoads(*SpeculatableSelects.pop_back_val()); | |||
4688 | ||||
4689 | return Changed; | |||
4690 | } | |||
4691 | ||||
4692 | /// Delete the dead instructions accumulated in this run. | |||
4693 | /// | |||
4694 | /// Recursively deletes the dead instructions we've accumulated. This is done | |||
4695 | /// at the very end to maximize locality of the recursive delete and to | |||
4696 | /// minimize the problems of invalidated instruction pointers as such pointers | |||
4697 | /// are used heavily in the intermediate stages of the algorithm. | |||
4698 | /// | |||
4699 | /// We also record the alloca instructions deleted here so that they aren't | |||
4700 | /// subsequently handed to mem2reg to promote. | |||
4701 | bool SROA::deleteDeadInstructions( | |||
4702 | SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) { | |||
4703 | bool Changed = false; | |||
4704 | while (!DeadInsts.empty()) { | |||
4705 | Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); | |||
4706 | if (!I) continue; | |||
4707 | LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n")do { } while (false); | |||
4708 | ||||
4709 | // If the instruction is an alloca, find the possible dbg.declare connected | |||
4710 | // to it, and remove it too. We must do this before calling RAUW or we will | |||
4711 | // not be able to find it. | |||
4712 | if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { | |||
4713 | DeletedAllocas.insert(AI); | |||
4714 | for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI)) | |||
4715 | OldDII->eraseFromParent(); | |||
4716 | } | |||
4717 | ||||
4718 | I->replaceAllUsesWith(UndefValue::get(I->getType())); | |||
4719 | ||||
4720 | for (Use &Operand : I->operands()) | |||
4721 | if (Instruction *U = dyn_cast<Instruction>(Operand)) { | |||
4722 | // Zero out the operand and see if it becomes trivially dead. | |||
4723 | Operand = nullptr; | |||
4724 | if (isInstructionTriviallyDead(U)) | |||
4725 | DeadInsts.push_back(U); | |||
4726 | } | |||
4727 | ||||
4728 | ++NumDeleted; | |||
4729 | I->eraseFromParent(); | |||
4730 | Changed = true; | |||
4731 | } | |||
4732 | return Changed; | |||
4733 | } | |||
4734 | ||||
4735 | /// Promote the allocas, using the best available technique. | |||
4736 | /// | |||
4737 | /// This attempts to promote whatever allocas have been identified as viable in | |||
4738 | /// the PromotableAllocas list. If that list is empty, there is nothing to do. | |||
4739 | /// This function returns whether any promotion occurred. | |||
4740 | bool SROA::promoteAllocas(Function &F) { | |||
4741 | if (PromotableAllocas.empty()) | |||
4742 | return false; | |||
4743 | ||||
4744 | NumPromoted += PromotableAllocas.size(); | |||
4745 | ||||
4746 | LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n")do { } while (false); | |||
4747 | PromoteMemToReg(PromotableAllocas, *DT, AC); | |||
4748 | PromotableAllocas.clear(); | |||
4749 | return true; | |||
4750 | } | |||
4751 | ||||
4752 | PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, | |||
4753 | AssumptionCache &RunAC) { | |||
4754 | LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n")do { } while (false); | |||
4755 | C = &F.getContext(); | |||
4756 | DT = &RunDT; | |||
4757 | AC = &RunAC; | |||
4758 | ||||
4759 | BasicBlock &EntryBB = F.getEntryBlock(); | |||
4760 | for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); | |||
4761 | I != E; ++I) { | |||
4762 | if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { | |||
4763 | if (isa<ScalableVectorType>(AI->getAllocatedType())) { | |||
4764 | if (isAllocaPromotable(AI)) | |||
4765 | PromotableAllocas.push_back(AI); | |||
4766 | } else { | |||
4767 | Worklist.insert(AI); | |||
4768 | } | |||
4769 | } | |||
4770 | } | |||
4771 | ||||
4772 | bool Changed = false; | |||
4773 | // A set of deleted alloca instruction pointers which should be removed from | |||
4774 | // the list of promotable allocas. | |||
4775 | SmallPtrSet<AllocaInst *, 4> DeletedAllocas; | |||
4776 | ||||
4777 | do { | |||
4778 | while (!Worklist.empty()) { | |||
4779 | Changed |= runOnAlloca(*Worklist.pop_back_val()); | |||
4780 | Changed |= deleteDeadInstructions(DeletedAllocas); | |||
4781 | ||||
4782 | // Remove the deleted allocas from various lists so that we don't try to | |||
4783 | // continue processing them. | |||
4784 | if (!DeletedAllocas.empty()) { | |||
4785 | auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; | |||
4786 | Worklist.remove_if(IsInSet); | |||
4787 | PostPromotionWorklist.remove_if(IsInSet); | |||
4788 | llvm::erase_if(PromotableAllocas, IsInSet); | |||
4789 | DeletedAllocas.clear(); | |||
4790 | } | |||
4791 | } | |||
4792 | ||||
4793 | Changed |= promoteAllocas(F); | |||
4794 | ||||
4795 | Worklist = PostPromotionWorklist; | |||
4796 | PostPromotionWorklist.clear(); | |||
4797 | } while (!Worklist.empty()); | |||
4798 | ||||
4799 | if (!Changed) | |||
4800 | return PreservedAnalyses::all(); | |||
4801 | ||||
4802 | PreservedAnalyses PA; | |||
4803 | PA.preserveSet<CFGAnalyses>(); | |||
4804 | return PA; | |||
4805 | } | |||
4806 | ||||
4807 | PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) { | |||
4808 | return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F), | |||
4809 | AM.getResult<AssumptionAnalysis>(F)); | |||
4810 | } | |||
4811 | ||||
4812 | /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. | |||
4813 | /// | |||
4814 | /// This is in the llvm namespace purely to allow it to be a friend of the \c | |||
4815 | /// SROA pass. | |||
4816 | class llvm::sroa::SROALegacyPass : public FunctionPass { | |||
4817 | /// The SROA implementation. | |||
4818 | SROA Impl; | |||
4819 | ||||
4820 | public: | |||
4821 | static char ID; | |||
4822 | ||||
4823 | SROALegacyPass() : FunctionPass(ID) { | |||
4824 | initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); | |||
4825 | } | |||
4826 | ||||
4827 | bool runOnFunction(Function &F) override { | |||
4828 | if (skipFunction(F)) | |||
4829 | return false; | |||
4830 | ||||
4831 | auto PA = Impl.runImpl( | |||
4832 | F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(), | |||
4833 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); | |||
4834 | return !PA.areAllPreserved(); | |||
4835 | } | |||
4836 | ||||
4837 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
4838 | AU.addRequired<AssumptionCacheTracker>(); | |||
4839 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
4840 | AU.addPreserved<GlobalsAAWrapperPass>(); | |||
4841 | AU.setPreservesCFG(); | |||
4842 | } | |||
4843 | ||||
4844 | StringRef getPassName() const override { return "SROA"; } | |||
4845 | }; | |||
4846 | ||||
4847 | char SROALegacyPass::ID = 0; | |||
4848 | ||||
4849 | FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); } | |||
4850 | ||||
4851 | INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",static void *initializeSROALegacyPassPassOnce(PassRegistry & Registry) { | |||
4852 | "Scalar Replacement Of Aggregates", false, false)static void *initializeSROALegacyPassPassOnce(PassRegistry & Registry) { | |||
4853 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
4854 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
4855 | INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",PassInfo *PI = new PassInfo( "Scalar Replacement Of Aggregates" , "sroa", &SROALegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <SROALegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeSROALegacyPassPassFlag ; void llvm::initializeSROALegacyPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeSROALegacyPassPassFlag, initializeSROALegacyPassPassOnce , std::ref(Registry)); } | |||
4856 | false, false)PassInfo *PI = new PassInfo( "Scalar Replacement Of Aggregates" , "sroa", &SROALegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <SROALegacyPass>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeSROALegacyPassPassFlag ; void llvm::initializeSROALegacyPassPass(PassRegistry &Registry ) { llvm::call_once(InitializeSROALegacyPassPassFlag, initializeSROALegacyPassPassOnce , std::ref(Registry)); } |
1 | //===- llvm/Instructions.h - Instruction subclass definitions ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file exposes the class definitions of all of the subclasses of the |
10 | // Instruction class. This is meant to be an easy way to get access to all |
11 | // instruction subclasses. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_IR_INSTRUCTIONS_H |
16 | #define LLVM_IR_INSTRUCTIONS_H |
17 | |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/Bitfields.h" |
20 | #include "llvm/ADT/MapVector.h" |
21 | #include "llvm/ADT/None.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/ADT/Twine.h" |
26 | #include "llvm/ADT/iterator.h" |
27 | #include "llvm/ADT/iterator_range.h" |
28 | #include "llvm/IR/Attributes.h" |
29 | #include "llvm/IR/BasicBlock.h" |
30 | #include "llvm/IR/CallingConv.h" |
31 | #include "llvm/IR/CFG.h" |
32 | #include "llvm/IR/Constant.h" |
33 | #include "llvm/IR/DerivedTypes.h" |
34 | #include "llvm/IR/Function.h" |
35 | #include "llvm/IR/InstrTypes.h" |
36 | #include "llvm/IR/Instruction.h" |
37 | #include "llvm/IR/OperandTraits.h" |
38 | #include "llvm/IR/Type.h" |
39 | #include "llvm/IR/Use.h" |
40 | #include "llvm/IR/User.h" |
41 | #include "llvm/IR/Value.h" |
42 | #include "llvm/Support/AtomicOrdering.h" |
43 | #include "llvm/Support/Casting.h" |
44 | #include "llvm/Support/ErrorHandling.h" |
45 | #include <cassert> |
46 | #include <cstddef> |
47 | #include <cstdint> |
48 | #include <iterator> |
49 | |
50 | namespace llvm { |
51 | |
52 | class APInt; |
53 | class ConstantInt; |
54 | class DataLayout; |
55 | class LLVMContext; |
56 | |
57 | //===----------------------------------------------------------------------===// |
58 | // AllocaInst Class |
59 | //===----------------------------------------------------------------------===// |
60 | |
61 | /// an instruction to allocate memory on the stack |
62 | class AllocaInst : public UnaryInstruction { |
63 | Type *AllocatedType; |
64 | |
65 | using AlignmentField = AlignmentBitfieldElementT<0>; |
66 | using UsedWithInAllocaField = BoolBitfieldElementT<AlignmentField::NextBit>; |
67 | using SwiftErrorField = BoolBitfieldElementT<UsedWithInAllocaField::NextBit>; |
68 | static_assert(Bitfield::areContiguous<AlignmentField, UsedWithInAllocaField, |
69 | SwiftErrorField>(), |
70 | "Bitfields must be contiguous"); |
71 | |
72 | protected: |
73 | // Note: Instruction needs to be a friend here to call cloneImpl. |
74 | friend class Instruction; |
75 | |
76 | AllocaInst *cloneImpl() const; |
77 | |
78 | public: |
79 | explicit AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, |
80 | const Twine &Name, Instruction *InsertBefore); |
81 | AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, |
82 | const Twine &Name, BasicBlock *InsertAtEnd); |
83 | |
84 | AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name, |
85 | Instruction *InsertBefore); |
86 | AllocaInst(Type *Ty, unsigned AddrSpace, |
87 | const Twine &Name, BasicBlock *InsertAtEnd); |
88 | |
89 | AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, Align Align, |
90 | const Twine &Name = "", Instruction *InsertBefore = nullptr); |
91 | AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, Align Align, |
92 | const Twine &Name, BasicBlock *InsertAtEnd); |
93 | |
94 | /// Return true if there is an allocation size parameter to the allocation |
95 | /// instruction that is not 1. |
96 | bool isArrayAllocation() const; |
97 | |
98 | /// Get the number of elements allocated. For a simple allocation of a single |
99 | /// element, this will return a constant 1 value. |
100 | const Value *getArraySize() const { return getOperand(0); } |
101 | Value *getArraySize() { return getOperand(0); } |
102 | |
103 | /// Overload to return most specific pointer type. |
104 | PointerType *getType() const { |
105 | return cast<PointerType>(Instruction::getType()); |
106 | } |
107 | |
108 | /// Get allocation size in bits. Returns None if size can't be determined, |
109 | /// e.g. in case of a VLA. |
110 | Optional<TypeSize> getAllocationSizeInBits(const DataLayout &DL) const; |
111 | |
112 | /// Return the type that is being allocated by the instruction. |
113 | Type *getAllocatedType() const { return AllocatedType; } |
114 | /// for use only in special circumstances that need to generically |
115 | /// transform a whole instruction (eg: IR linking and vectorization). |
116 | void setAllocatedType(Type *Ty) { AllocatedType = Ty; } |
117 | |
118 | /// Return the alignment of the memory that is being allocated by the |
119 | /// instruction. |
120 | Align getAlign() const { |
121 | return Align(1ULL << getSubclassData<AlignmentField>()); |
122 | } |
123 | |
124 | void setAlignment(Align Align) { |
125 | setSubclassData<AlignmentField>(Log2(Align)); |
126 | } |
127 | |
128 | // FIXME: Remove this one transition to Align is over. |
129 | unsigned getAlignment() const { return getAlign().value(); } |
130 | |
131 | /// Return true if this alloca is in the entry block of the function and is a |
132 | /// constant size. If so, the code generator will fold it into the |
133 | /// prolog/epilog code, so it is basically free. |
134 | bool isStaticAlloca() const; |
135 | |
136 | /// Return true if this alloca is used as an inalloca argument to a call. Such |
137 | /// allocas are never considered static even if they are in the entry block. |
138 | bool isUsedWithInAlloca() const { |
139 | return getSubclassData<UsedWithInAllocaField>(); |
140 | } |
141 | |
142 | /// Specify whether this alloca is used to represent the arguments to a call. |
143 | void setUsedWithInAlloca(bool V) { |
144 | setSubclassData<UsedWithInAllocaField>(V); |
145 | } |
146 | |
147 | /// Return true if this alloca is used as a swifterror argument to a call. |
148 | bool isSwiftError() const { return getSubclassData<SwiftErrorField>(); } |
149 | /// Specify whether this alloca is used to represent a swifterror. |
150 | void setSwiftError(bool V) { setSubclassData<SwiftErrorField>(V); } |
151 | |
152 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
153 | static bool classof(const Instruction *I) { |
154 | return (I->getOpcode() == Instruction::Alloca); |
155 | } |
156 | static bool classof(const Value *V) { |
157 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
158 | } |
159 | |
160 | private: |
161 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
162 | // method so that subclasses cannot accidentally use it. |
163 | template <typename Bitfield> |
164 | void setSubclassData(typename Bitfield::Type Value) { |
165 | Instruction::setSubclassData<Bitfield>(Value); |
166 | } |
167 | }; |
168 | |
169 | //===----------------------------------------------------------------------===// |
170 | // LoadInst Class |
171 | //===----------------------------------------------------------------------===// |
172 | |
173 | /// An instruction for reading from memory. This uses the SubclassData field in |
174 | /// Value to store whether or not the load is volatile. |
175 | class LoadInst : public UnaryInstruction { |
176 | using VolatileField = BoolBitfieldElementT<0>; |
177 | using AlignmentField = AlignmentBitfieldElementT<VolatileField::NextBit>; |
178 | using OrderingField = AtomicOrderingBitfieldElementT<AlignmentField::NextBit>; |
179 | static_assert( |
180 | Bitfield::areContiguous<VolatileField, AlignmentField, OrderingField>(), |
181 | "Bitfields must be contiguous"); |
182 | |
183 | void AssertOK(); |
184 | |
185 | protected: |
186 | // Note: Instruction needs to be a friend here to call cloneImpl. |
187 | friend class Instruction; |
188 | |
189 | LoadInst *cloneImpl() const; |
190 | |
191 | public: |
192 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, |
193 | Instruction *InsertBefore); |
194 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd); |
195 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
196 | Instruction *InsertBefore); |
197 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
198 | BasicBlock *InsertAtEnd); |
199 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
200 | Align Align, Instruction *InsertBefore = nullptr); |
201 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
202 | Align Align, BasicBlock *InsertAtEnd); |
203 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
204 | Align Align, AtomicOrdering Order, |
205 | SyncScope::ID SSID = SyncScope::System, |
206 | Instruction *InsertBefore = nullptr); |
207 | LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, |
208 | Align Align, AtomicOrdering Order, SyncScope::ID SSID, |
209 | BasicBlock *InsertAtEnd); |
210 | |
211 | /// Return true if this is a load from a volatile memory location. |
212 | bool isVolatile() const { return getSubclassData<VolatileField>(); } |
213 | |
214 | /// Specify whether this is a volatile load or not. |
215 | void setVolatile(bool V) { setSubclassData<VolatileField>(V); } |
216 | |
217 | /// Return the alignment of the access that is being performed. |
218 | /// FIXME: Remove this function once transition to Align is over. |
219 | /// Use getAlign() instead. |
220 | unsigned getAlignment() const { return getAlign().value(); } |
221 | |
222 | /// Return the alignment of the access that is being performed. |
223 | Align getAlign() const { |
224 | return Align(1ULL << (getSubclassData<AlignmentField>())); |
225 | } |
226 | |
227 | void setAlignment(Align Align) { |
228 | setSubclassData<AlignmentField>(Log2(Align)); |
229 | } |
230 | |
231 | /// Returns the ordering constraint of this load instruction. |
232 | AtomicOrdering getOrdering() const { |
233 | return getSubclassData<OrderingField>(); |
234 | } |
235 | /// Sets the ordering constraint of this load instruction. May not be Release |
236 | /// or AcquireRelease. |
237 | void setOrdering(AtomicOrdering Ordering) { |
238 | setSubclassData<OrderingField>(Ordering); |
239 | } |
240 | |
241 | /// Returns the synchronization scope ID of this load instruction. |
242 | SyncScope::ID getSyncScopeID() const { |
243 | return SSID; |
244 | } |
245 | |
246 | /// Sets the synchronization scope ID of this load instruction. |
247 | void setSyncScopeID(SyncScope::ID SSID) { |
248 | this->SSID = SSID; |
249 | } |
250 | |
251 | /// Sets the ordering constraint and the synchronization scope ID of this load |
252 | /// instruction. |
253 | void setAtomic(AtomicOrdering Ordering, |
254 | SyncScope::ID SSID = SyncScope::System) { |
255 | setOrdering(Ordering); |
256 | setSyncScopeID(SSID); |
257 | } |
258 | |
259 | bool isSimple() const { return !isAtomic() && !isVolatile(); } |
260 | |
261 | bool isUnordered() const { |
262 | return (getOrdering() == AtomicOrdering::NotAtomic || |
263 | getOrdering() == AtomicOrdering::Unordered) && |
264 | !isVolatile(); |
265 | } |
266 | |
267 | Value *getPointerOperand() { return getOperand(0); } |
268 | const Value *getPointerOperand() const { return getOperand(0); } |
269 | static unsigned getPointerOperandIndex() { return 0U; } |
270 | Type *getPointerOperandType() const { return getPointerOperand()->getType(); } |
271 | |
272 | /// Returns the address space of the pointer operand. |
273 | unsigned getPointerAddressSpace() const { |
274 | return getPointerOperandType()->getPointerAddressSpace(); |
275 | } |
276 | |
277 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
278 | static bool classof(const Instruction *I) { |
279 | return I->getOpcode() == Instruction::Load; |
280 | } |
281 | static bool classof(const Value *V) { |
282 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
283 | } |
284 | |
285 | private: |
286 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
287 | // method so that subclasses cannot accidentally use it. |
288 | template <typename Bitfield> |
289 | void setSubclassData(typename Bitfield::Type Value) { |
290 | Instruction::setSubclassData<Bitfield>(Value); |
291 | } |
292 | |
293 | /// The synchronization scope ID of this load instruction. Not quite enough |
294 | /// room in SubClassData for everything, so synchronization scope ID gets its |
295 | /// own field. |
296 | SyncScope::ID SSID; |
297 | }; |
298 | |
299 | //===----------------------------------------------------------------------===// |
300 | // StoreInst Class |
301 | //===----------------------------------------------------------------------===// |
302 | |
303 | /// An instruction for storing to memory. |
304 | class StoreInst : public Instruction { |
305 | using VolatileField = BoolBitfieldElementT<0>; |
306 | using AlignmentField = AlignmentBitfieldElementT<VolatileField::NextBit>; |
307 | using OrderingField = AtomicOrderingBitfieldElementT<AlignmentField::NextBit>; |
308 | static_assert( |
309 | Bitfield::areContiguous<VolatileField, AlignmentField, OrderingField>(), |
310 | "Bitfields must be contiguous"); |
311 | |
312 | void AssertOK(); |
313 | |
314 | protected: |
315 | // Note: Instruction needs to be a friend here to call cloneImpl. |
316 | friend class Instruction; |
317 | |
318 | StoreInst *cloneImpl() const; |
319 | |
320 | public: |
321 | StoreInst(Value *Val, Value *Ptr, Instruction *InsertBefore); |
322 | StoreInst(Value *Val, Value *Ptr, BasicBlock *InsertAtEnd); |
323 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, Instruction *InsertBefore); |
324 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd); |
325 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, |
326 | Instruction *InsertBefore = nullptr); |
327 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, |
328 | BasicBlock *InsertAtEnd); |
329 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, |
330 | AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, |
331 | Instruction *InsertBefore = nullptr); |
332 | StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, |
333 | AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); |
334 | |
335 | // allocate space for exactly two operands |
336 | void *operator new(size_t S) { return User::operator new(S, 2); } |
337 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
338 | |
339 | /// Return true if this is a store to a volatile memory location. |
340 | bool isVolatile() const { return getSubclassData<VolatileField>(); } |
341 | |
342 | /// Specify whether this is a volatile store or not. |
343 | void setVolatile(bool V) { setSubclassData<VolatileField>(V); } |
344 | |
345 | /// Transparently provide more efficient getOperand methods. |
346 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
347 | |
348 | /// Return the alignment of the access that is being performed |
349 | /// FIXME: Remove this function once transition to Align is over. |
350 | /// Use getAlign() instead. |
351 | unsigned getAlignment() const { return getAlign().value(); } |
352 | |
353 | Align getAlign() const { |
354 | return Align(1ULL << (getSubclassData<AlignmentField>())); |
355 | } |
356 | |
357 | void setAlignment(Align Align) { |
358 | setSubclassData<AlignmentField>(Log2(Align)); |
359 | } |
360 | |
361 | /// Returns the ordering constraint of this store instruction. |
362 | AtomicOrdering getOrdering() const { |
363 | return getSubclassData<OrderingField>(); |
364 | } |
365 | |
366 | /// Sets the ordering constraint of this store instruction. May not be |
367 | /// Acquire or AcquireRelease. |
368 | void setOrdering(AtomicOrdering Ordering) { |
369 | setSubclassData<OrderingField>(Ordering); |
370 | } |
371 | |
372 | /// Returns the synchronization scope ID of this store instruction. |
373 | SyncScope::ID getSyncScopeID() const { |
374 | return SSID; |
375 | } |
376 | |
377 | /// Sets the synchronization scope ID of this store instruction. |
378 | void setSyncScopeID(SyncScope::ID SSID) { |
379 | this->SSID = SSID; |
380 | } |
381 | |
382 | /// Sets the ordering constraint and the synchronization scope ID of this |
383 | /// store instruction. |
384 | void setAtomic(AtomicOrdering Ordering, |
385 | SyncScope::ID SSID = SyncScope::System) { |
386 | setOrdering(Ordering); |
387 | setSyncScopeID(SSID); |
388 | } |
389 | |
390 | bool isSimple() const { return !isAtomic() && !isVolatile(); } |
391 | |
392 | bool isUnordered() const { |
393 | return (getOrdering() == AtomicOrdering::NotAtomic || |
394 | getOrdering() == AtomicOrdering::Unordered) && |
395 | !isVolatile(); |
396 | } |
397 | |
398 | Value *getValueOperand() { return getOperand(0); } |
399 | const Value *getValueOperand() const { return getOperand(0); } |
400 | |
401 | Value *getPointerOperand() { return getOperand(1); } |
402 | const Value *getPointerOperand() const { return getOperand(1); } |
403 | static unsigned getPointerOperandIndex() { return 1U; } |
404 | Type *getPointerOperandType() const { return getPointerOperand()->getType(); } |
405 | |
406 | /// Returns the address space of the pointer operand. |
407 | unsigned getPointerAddressSpace() const { |
408 | return getPointerOperandType()->getPointerAddressSpace(); |
409 | } |
410 | |
411 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
412 | static bool classof(const Instruction *I) { |
413 | return I->getOpcode() == Instruction::Store; |
414 | } |
415 | static bool classof(const Value *V) { |
416 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
417 | } |
418 | |
419 | private: |
420 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
421 | // method so that subclasses cannot accidentally use it. |
422 | template <typename Bitfield> |
423 | void setSubclassData(typename Bitfield::Type Value) { |
424 | Instruction::setSubclassData<Bitfield>(Value); |
425 | } |
426 | |
427 | /// The synchronization scope ID of this store instruction. Not quite enough |
428 | /// room in SubClassData for everything, so synchronization scope ID gets its |
429 | /// own field. |
430 | SyncScope::ID SSID; |
431 | }; |
432 | |
433 | template <> |
434 | struct OperandTraits<StoreInst> : public FixedNumOperandTraits<StoreInst, 2> { |
435 | }; |
436 | |
437 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)StoreInst::op_iterator StoreInst::op_begin() { return OperandTraits <StoreInst>::op_begin(this); } StoreInst::const_op_iterator StoreInst::op_begin() const { return OperandTraits<StoreInst >::op_begin(const_cast<StoreInst*>(this)); } StoreInst ::op_iterator StoreInst::op_end() { return OperandTraits<StoreInst >::op_end(this); } StoreInst::const_op_iterator StoreInst:: op_end() const { return OperandTraits<StoreInst>::op_end (const_cast<StoreInst*>(this)); } Value *StoreInst::getOperand (unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<StoreInst>::op_begin(const_cast <StoreInst*>(this))[i_nocapture].get()); } void StoreInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<StoreInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned StoreInst::getNumOperands() const { return OperandTraits<StoreInst>::operands(this); } template <int Idx_nocapture> Use &StoreInst::Op() { return this ->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture > const Use &StoreInst::Op() const { return this->OpFrom <Idx_nocapture>(this); } |
438 | |
439 | //===----------------------------------------------------------------------===// |
440 | // FenceInst Class |
441 | //===----------------------------------------------------------------------===// |
442 | |
443 | /// An instruction for ordering other memory operations. |
444 | class FenceInst : public Instruction { |
445 | using OrderingField = AtomicOrderingBitfieldElementT<0>; |
446 | |
447 | void Init(AtomicOrdering Ordering, SyncScope::ID SSID); |
448 | |
449 | protected: |
450 | // Note: Instruction needs to be a friend here to call cloneImpl. |
451 | friend class Instruction; |
452 | |
453 | FenceInst *cloneImpl() const; |
454 | |
455 | public: |
456 | // Ordering may only be Acquire, Release, AcquireRelease, or |
457 | // SequentiallyConsistent. |
458 | FenceInst(LLVMContext &C, AtomicOrdering Ordering, |
459 | SyncScope::ID SSID = SyncScope::System, |
460 | Instruction *InsertBefore = nullptr); |
461 | FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, |
462 | BasicBlock *InsertAtEnd); |
463 | |
464 | // allocate space for exactly zero operands |
465 | void *operator new(size_t S) { return User::operator new(S, 0); } |
466 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
467 | |
468 | /// Returns the ordering constraint of this fence instruction. |
469 | AtomicOrdering getOrdering() const { |
470 | return getSubclassData<OrderingField>(); |
471 | } |
472 | |
473 | /// Sets the ordering constraint of this fence instruction. May only be |
474 | /// Acquire, Release, AcquireRelease, or SequentiallyConsistent. |
475 | void setOrdering(AtomicOrdering Ordering) { |
476 | setSubclassData<OrderingField>(Ordering); |
477 | } |
478 | |
479 | /// Returns the synchronization scope ID of this fence instruction. |
480 | SyncScope::ID getSyncScopeID() const { |
481 | return SSID; |
482 | } |
483 | |
484 | /// Sets the synchronization scope ID of this fence instruction. |
485 | void setSyncScopeID(SyncScope::ID SSID) { |
486 | this->SSID = SSID; |
487 | } |
488 | |
489 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
490 | static bool classof(const Instruction *I) { |
491 | return I->getOpcode() == Instruction::Fence; |
492 | } |
493 | static bool classof(const Value *V) { |
494 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
495 | } |
496 | |
497 | private: |
498 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
499 | // method so that subclasses cannot accidentally use it. |
500 | template <typename Bitfield> |
501 | void setSubclassData(typename Bitfield::Type Value) { |
502 | Instruction::setSubclassData<Bitfield>(Value); |
503 | } |
504 | |
505 | /// The synchronization scope ID of this fence instruction. Not quite enough |
506 | /// room in SubClassData for everything, so synchronization scope ID gets its |
507 | /// own field. |
508 | SyncScope::ID SSID; |
509 | }; |
510 | |
511 | //===----------------------------------------------------------------------===// |
512 | // AtomicCmpXchgInst Class |
513 | //===----------------------------------------------------------------------===// |
514 | |
515 | /// An instruction that atomically checks whether a |
516 | /// specified value is in a memory location, and, if it is, stores a new value |
517 | /// there. The value returned by this instruction is a pair containing the |
518 | /// original value as first element, and an i1 indicating success (true) or |
519 | /// failure (false) as second element. |
520 | /// |
521 | class AtomicCmpXchgInst : public Instruction { |
522 | void Init(Value *Ptr, Value *Cmp, Value *NewVal, Align Align, |
523 | AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, |
524 | SyncScope::ID SSID); |
525 | |
526 | template <unsigned Offset> |
527 | using AtomicOrderingBitfieldElement = |
528 | typename Bitfield::Element<AtomicOrdering, Offset, 3, |
529 | AtomicOrdering::LAST>; |
530 | |
531 | protected: |
532 | // Note: Instruction needs to be a friend here to call cloneImpl. |
533 | friend class Instruction; |
534 | |
535 | AtomicCmpXchgInst *cloneImpl() const; |
536 | |
537 | public: |
538 | AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, Align Alignment, |
539 | AtomicOrdering SuccessOrdering, |
540 | AtomicOrdering FailureOrdering, SyncScope::ID SSID, |
541 | Instruction *InsertBefore = nullptr); |
542 | AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, Align Alignment, |
543 | AtomicOrdering SuccessOrdering, |
544 | AtomicOrdering FailureOrdering, SyncScope::ID SSID, |
545 | BasicBlock *InsertAtEnd); |
546 | |
547 | // allocate space for exactly three operands |
548 | void *operator new(size_t S) { return User::operator new(S, 3); } |
549 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
550 | |
551 | using VolatileField = BoolBitfieldElementT<0>; |
552 | using WeakField = BoolBitfieldElementT<VolatileField::NextBit>; |
553 | using SuccessOrderingField = |
554 | AtomicOrderingBitfieldElementT<WeakField::NextBit>; |
555 | using FailureOrderingField = |
556 | AtomicOrderingBitfieldElementT<SuccessOrderingField::NextBit>; |
557 | using AlignmentField = |
558 | AlignmentBitfieldElementT<FailureOrderingField::NextBit>; |
559 | static_assert( |
560 | Bitfield::areContiguous<VolatileField, WeakField, SuccessOrderingField, |
561 | FailureOrderingField, AlignmentField>(), |
562 | "Bitfields must be contiguous"); |
563 | |
564 | /// Return the alignment of the memory that is being allocated by the |
565 | /// instruction. |
566 | Align getAlign() const { |
567 | return Align(1ULL << getSubclassData<AlignmentField>()); |
568 | } |
569 | |
570 | void setAlignment(Align Align) { |
571 | setSubclassData<AlignmentField>(Log2(Align)); |
572 | } |
573 | |
574 | /// Return true if this is a cmpxchg from a volatile memory |
575 | /// location. |
576 | /// |
577 | bool isVolatile() const { return getSubclassData<VolatileField>(); } |
578 | |
579 | /// Specify whether this is a volatile cmpxchg. |
580 | /// |
581 | void setVolatile(bool V) { setSubclassData<VolatileField>(V); } |
582 | |
583 | /// Return true if this cmpxchg may spuriously fail. |
584 | bool isWeak() const { return getSubclassData<WeakField>(); } |
585 | |
586 | void setWeak(bool IsWeak) { setSubclassData<WeakField>(IsWeak); } |
587 | |
588 | /// Transparently provide more efficient getOperand methods. |
589 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
590 | |
591 | static bool isValidSuccessOrdering(AtomicOrdering Ordering) { |
592 | return Ordering != AtomicOrdering::NotAtomic && |
593 | Ordering != AtomicOrdering::Unordered; |
594 | } |
595 | |
596 | static bool isValidFailureOrdering(AtomicOrdering Ordering) { |
597 | return Ordering != AtomicOrdering::NotAtomic && |
598 | Ordering != AtomicOrdering::Unordered && |
599 | Ordering != AtomicOrdering::AcquireRelease && |
600 | Ordering != AtomicOrdering::Release; |
601 | } |
602 | |
603 | /// Returns the success ordering constraint of this cmpxchg instruction. |
604 | AtomicOrdering getSuccessOrdering() const { |
605 | return getSubclassData<SuccessOrderingField>(); |
606 | } |
607 | |
608 | /// Sets the success ordering constraint of this cmpxchg instruction. |
609 | void setSuccessOrdering(AtomicOrdering Ordering) { |
610 | assert(isValidSuccessOrdering(Ordering) &&((void)0) |
611 | "invalid CmpXchg success ordering")((void)0); |
612 | setSubclassData<SuccessOrderingField>(Ordering); |
613 | } |
614 | |
615 | /// Returns the failure ordering constraint of this cmpxchg instruction. |
616 | AtomicOrdering getFailureOrdering() const { |
617 | return getSubclassData<FailureOrderingField>(); |
618 | } |
619 | |
620 | /// Sets the failure ordering constraint of this cmpxchg instruction. |
621 | void setFailureOrdering(AtomicOrdering Ordering) { |
622 | assert(isValidFailureOrdering(Ordering) &&((void)0) |
623 | "invalid CmpXchg failure ordering")((void)0); |
624 | setSubclassData<FailureOrderingField>(Ordering); |
625 | } |
626 | |
627 | /// Returns a single ordering which is at least as strong as both the |
628 | /// success and failure orderings for this cmpxchg. |
629 | AtomicOrdering getMergedOrdering() const { |
630 | if (getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) |
631 | return AtomicOrdering::SequentiallyConsistent; |
632 | if (getFailureOrdering() == AtomicOrdering::Acquire) { |
633 | if (getSuccessOrdering() == AtomicOrdering::Monotonic) |
634 | return AtomicOrdering::Acquire; |
635 | if (getSuccessOrdering() == AtomicOrdering::Release) |
636 | return AtomicOrdering::AcquireRelease; |
637 | } |
638 | return getSuccessOrdering(); |
639 | } |
640 | |
641 | /// Returns the synchronization scope ID of this cmpxchg instruction. |
642 | SyncScope::ID getSyncScopeID() const { |
643 | return SSID; |
644 | } |
645 | |
646 | /// Sets the synchronization scope ID of this cmpxchg instruction. |
647 | void setSyncScopeID(SyncScope::ID SSID) { |
648 | this->SSID = SSID; |
649 | } |
650 | |
651 | Value *getPointerOperand() { return getOperand(0); } |
652 | const Value *getPointerOperand() const { return getOperand(0); } |
653 | static unsigned getPointerOperandIndex() { return 0U; } |
654 | |
655 | Value *getCompareOperand() { return getOperand(1); } |
656 | const Value *getCompareOperand() const { return getOperand(1); } |
657 | |
658 | Value *getNewValOperand() { return getOperand(2); } |
659 | const Value *getNewValOperand() const { return getOperand(2); } |
660 | |
661 | /// Returns the address space of the pointer operand. |
662 | unsigned getPointerAddressSpace() const { |
663 | return getPointerOperand()->getType()->getPointerAddressSpace(); |
664 | } |
665 | |
666 | /// Returns the strongest permitted ordering on failure, given the |
667 | /// desired ordering on success. |
668 | /// |
669 | /// If the comparison in a cmpxchg operation fails, there is no atomic store |
670 | /// so release semantics cannot be provided. So this function drops explicit |
671 | /// Release requests from the AtomicOrdering. A SequentiallyConsistent |
672 | /// operation would remain SequentiallyConsistent. |
673 | static AtomicOrdering |
674 | getStrongestFailureOrdering(AtomicOrdering SuccessOrdering) { |
675 | switch (SuccessOrdering) { |
676 | default: |
677 | llvm_unreachable("invalid cmpxchg success ordering")__builtin_unreachable(); |
678 | case AtomicOrdering::Release: |
679 | case AtomicOrdering::Monotonic: |
680 | return AtomicOrdering::Monotonic; |
681 | case AtomicOrdering::AcquireRelease: |
682 | case AtomicOrdering::Acquire: |
683 | return AtomicOrdering::Acquire; |
684 | case AtomicOrdering::SequentiallyConsistent: |
685 | return AtomicOrdering::SequentiallyConsistent; |
686 | } |
687 | } |
688 | |
689 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
690 | static bool classof(const Instruction *I) { |
691 | return I->getOpcode() == Instruction::AtomicCmpXchg; |
692 | } |
693 | static bool classof(const Value *V) { |
694 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
695 | } |
696 | |
697 | private: |
698 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
699 | // method so that subclasses cannot accidentally use it. |
700 | template <typename Bitfield> |
701 | void setSubclassData(typename Bitfield::Type Value) { |
702 | Instruction::setSubclassData<Bitfield>(Value); |
703 | } |
704 | |
705 | /// The synchronization scope ID of this cmpxchg instruction. Not quite |
706 | /// enough room in SubClassData for everything, so synchronization scope ID |
707 | /// gets its own field. |
708 | SyncScope::ID SSID; |
709 | }; |
710 | |
711 | template <> |
712 | struct OperandTraits<AtomicCmpXchgInst> : |
713 | public FixedNumOperandTraits<AtomicCmpXchgInst, 3> { |
714 | }; |
715 | |
716 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)AtomicCmpXchgInst::op_iterator AtomicCmpXchgInst::op_begin() { return OperandTraits<AtomicCmpXchgInst>::op_begin(this ); } AtomicCmpXchgInst::const_op_iterator AtomicCmpXchgInst:: op_begin() const { return OperandTraits<AtomicCmpXchgInst> ::op_begin(const_cast<AtomicCmpXchgInst*>(this)); } AtomicCmpXchgInst ::op_iterator AtomicCmpXchgInst::op_end() { return OperandTraits <AtomicCmpXchgInst>::op_end(this); } AtomicCmpXchgInst:: const_op_iterator AtomicCmpXchgInst::op_end() const { return OperandTraits <AtomicCmpXchgInst>::op_end(const_cast<AtomicCmpXchgInst *>(this)); } Value *AtomicCmpXchgInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value >( OperandTraits<AtomicCmpXchgInst>::op_begin(const_cast <AtomicCmpXchgInst*>(this))[i_nocapture].get()); } void AtomicCmpXchgInst::setOperand(unsigned i_nocapture, Value *Val_nocapture ) { ((void)0); OperandTraits<AtomicCmpXchgInst>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned AtomicCmpXchgInst ::getNumOperands() const { return OperandTraits<AtomicCmpXchgInst >::operands(this); } template <int Idx_nocapture> Use &AtomicCmpXchgInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & AtomicCmpXchgInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
717 | |
718 | //===----------------------------------------------------------------------===// |
719 | // AtomicRMWInst Class |
720 | //===----------------------------------------------------------------------===// |
721 | |
722 | /// an instruction that atomically reads a memory location, |
723 | /// combines it with another value, and then stores the result back. Returns |
724 | /// the old value. |
725 | /// |
726 | class AtomicRMWInst : public Instruction { |
727 | protected: |
728 | // Note: Instruction needs to be a friend here to call cloneImpl. |
729 | friend class Instruction; |
730 | |
731 | AtomicRMWInst *cloneImpl() const; |
732 | |
733 | public: |
734 | /// This enumeration lists the possible modifications atomicrmw can make. In |
735 | /// the descriptions, 'p' is the pointer to the instruction's memory location, |
736 | /// 'old' is the initial value of *p, and 'v' is the other value passed to the |
737 | /// instruction. These instructions always return 'old'. |
738 | enum BinOp : unsigned { |
739 | /// *p = v |
740 | Xchg, |
741 | /// *p = old + v |
742 | Add, |
743 | /// *p = old - v |
744 | Sub, |
745 | /// *p = old & v |
746 | And, |
747 | /// *p = ~(old & v) |
748 | Nand, |
749 | /// *p = old | v |
750 | Or, |
751 | /// *p = old ^ v |
752 | Xor, |
753 | /// *p = old >signed v ? old : v |
754 | Max, |
755 | /// *p = old <signed v ? old : v |
756 | Min, |
757 | /// *p = old >unsigned v ? old : v |
758 | UMax, |
759 | /// *p = old <unsigned v ? old : v |
760 | UMin, |
761 | |
762 | /// *p = old + v |
763 | FAdd, |
764 | |
765 | /// *p = old - v |
766 | FSub, |
767 | |
768 | FIRST_BINOP = Xchg, |
769 | LAST_BINOP = FSub, |
770 | BAD_BINOP |
771 | }; |
772 | |
773 | private: |
774 | template <unsigned Offset> |
775 | using AtomicOrderingBitfieldElement = |
776 | typename Bitfield::Element<AtomicOrdering, Offset, 3, |
777 | AtomicOrdering::LAST>; |
778 | |
779 | template <unsigned Offset> |
780 | using BinOpBitfieldElement = |
781 | typename Bitfield::Element<BinOp, Offset, 4, BinOp::LAST_BINOP>; |
782 | |
783 | public: |
784 | AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, |
785 | AtomicOrdering Ordering, SyncScope::ID SSID, |
786 | Instruction *InsertBefore = nullptr); |
787 | AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment, |
788 | AtomicOrdering Ordering, SyncScope::ID SSID, |
789 | BasicBlock *InsertAtEnd); |
790 | |
791 | // allocate space for exactly two operands |
792 | void *operator new(size_t S) { return User::operator new(S, 2); } |
793 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
794 | |
795 | using VolatileField = BoolBitfieldElementT<0>; |
796 | using AtomicOrderingField = |
797 | AtomicOrderingBitfieldElementT<VolatileField::NextBit>; |
798 | using OperationField = BinOpBitfieldElement<AtomicOrderingField::NextBit>; |
799 | using AlignmentField = AlignmentBitfieldElementT<OperationField::NextBit>; |
800 | static_assert(Bitfield::areContiguous<VolatileField, AtomicOrderingField, |
801 | OperationField, AlignmentField>(), |
802 | "Bitfields must be contiguous"); |
803 | |
804 | BinOp getOperation() const { return getSubclassData<OperationField>(); } |
805 | |
806 | static StringRef getOperationName(BinOp Op); |
807 | |
808 | static bool isFPOperation(BinOp Op) { |
809 | switch (Op) { |
810 | case AtomicRMWInst::FAdd: |
811 | case AtomicRMWInst::FSub: |
812 | return true; |
813 | default: |
814 | return false; |
815 | } |
816 | } |
817 | |
818 | void setOperation(BinOp Operation) { |
819 | setSubclassData<OperationField>(Operation); |
820 | } |
821 | |
822 | /// Return the alignment of the memory that is being allocated by the |
823 | /// instruction. |
824 | Align getAlign() const { |
825 | return Align(1ULL << getSubclassData<AlignmentField>()); |
826 | } |
827 | |
828 | void setAlignment(Align Align) { |
829 | setSubclassData<AlignmentField>(Log2(Align)); |
830 | } |
831 | |
832 | /// Return true if this is a RMW on a volatile memory location. |
833 | /// |
834 | bool isVolatile() const { return getSubclassData<VolatileField>(); } |
835 | |
836 | /// Specify whether this is a volatile RMW or not. |
837 | /// |
838 | void setVolatile(bool V) { setSubclassData<VolatileField>(V); } |
839 | |
840 | /// Transparently provide more efficient getOperand methods. |
841 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
842 | |
843 | /// Returns the ordering constraint of this rmw instruction. |
844 | AtomicOrdering getOrdering() const { |
845 | return getSubclassData<AtomicOrderingField>(); |
846 | } |
847 | |
848 | /// Sets the ordering constraint of this rmw instruction. |
849 | void setOrdering(AtomicOrdering Ordering) { |
850 | assert(Ordering != AtomicOrdering::NotAtomic &&((void)0) |
851 | "atomicrmw instructions can only be atomic.")((void)0); |
852 | setSubclassData<AtomicOrderingField>(Ordering); |
853 | } |
854 | |
855 | /// Returns the synchronization scope ID of this rmw instruction. |
856 | SyncScope::ID getSyncScopeID() const { |
857 | return SSID; |
858 | } |
859 | |
860 | /// Sets the synchronization scope ID of this rmw instruction. |
861 | void setSyncScopeID(SyncScope::ID SSID) { |
862 | this->SSID = SSID; |
863 | } |
864 | |
865 | Value *getPointerOperand() { return getOperand(0); } |
866 | const Value *getPointerOperand() const { return getOperand(0); } |
867 | static unsigned getPointerOperandIndex() { return 0U; } |
868 | |
869 | Value *getValOperand() { return getOperand(1); } |
870 | const Value *getValOperand() const { return getOperand(1); } |
871 | |
872 | /// Returns the address space of the pointer operand. |
873 | unsigned getPointerAddressSpace() const { |
874 | return getPointerOperand()->getType()->getPointerAddressSpace(); |
875 | } |
876 | |
877 | bool isFloatingPointOperation() const { |
878 | return isFPOperation(getOperation()); |
879 | } |
880 | |
881 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
882 | static bool classof(const Instruction *I) { |
883 | return I->getOpcode() == Instruction::AtomicRMW; |
884 | } |
885 | static bool classof(const Value *V) { |
886 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
887 | } |
888 | |
889 | private: |
890 | void Init(BinOp Operation, Value *Ptr, Value *Val, Align Align, |
891 | AtomicOrdering Ordering, SyncScope::ID SSID); |
892 | |
893 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
894 | // method so that subclasses cannot accidentally use it. |
895 | template <typename Bitfield> |
896 | void setSubclassData(typename Bitfield::Type Value) { |
897 | Instruction::setSubclassData<Bitfield>(Value); |
898 | } |
899 | |
900 | /// The synchronization scope ID of this rmw instruction. Not quite enough |
901 | /// room in SubClassData for everything, so synchronization scope ID gets its |
902 | /// own field. |
903 | SyncScope::ID SSID; |
904 | }; |
905 | |
906 | template <> |
907 | struct OperandTraits<AtomicRMWInst> |
908 | : public FixedNumOperandTraits<AtomicRMWInst,2> { |
909 | }; |
910 | |
911 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value)AtomicRMWInst::op_iterator AtomicRMWInst::op_begin() { return OperandTraits<AtomicRMWInst>::op_begin(this); } AtomicRMWInst ::const_op_iterator AtomicRMWInst::op_begin() const { return OperandTraits <AtomicRMWInst>::op_begin(const_cast<AtomicRMWInst*> (this)); } AtomicRMWInst::op_iterator AtomicRMWInst::op_end() { return OperandTraits<AtomicRMWInst>::op_end(this); } AtomicRMWInst::const_op_iterator AtomicRMWInst::op_end() const { return OperandTraits<AtomicRMWInst>::op_end(const_cast <AtomicRMWInst*>(this)); } Value *AtomicRMWInst::getOperand (unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<AtomicRMWInst>::op_begin(const_cast <AtomicRMWInst*>(this))[i_nocapture].get()); } void AtomicRMWInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<AtomicRMWInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned AtomicRMWInst::getNumOperands() const { return OperandTraits<AtomicRMWInst>::operands( this); } template <int Idx_nocapture> Use &AtomicRMWInst ::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &AtomicRMWInst ::Op() const { return this->OpFrom<Idx_nocapture>(this ); } |
912 | |
913 | //===----------------------------------------------------------------------===// |
914 | // GetElementPtrInst Class |
915 | //===----------------------------------------------------------------------===// |
916 | |
917 | // checkGEPType - Simple wrapper function to give a better assertion failure |
918 | // message on bad indexes for a gep instruction. |
919 | // |
920 | inline Type *checkGEPType(Type *Ty) { |
921 | assert(Ty && "Invalid GetElementPtrInst indices for type!")((void)0); |
922 | return Ty; |
923 | } |
924 | |
925 | /// an instruction for type-safe pointer arithmetic to |
926 | /// access elements of arrays and structs |
927 | /// |
928 | class GetElementPtrInst : public Instruction { |
929 | Type *SourceElementType; |
930 | Type *ResultElementType; |
931 | |
932 | GetElementPtrInst(const GetElementPtrInst &GEPI); |
933 | |
934 | /// Constructors - Create a getelementptr instruction with a base pointer an |
935 | /// list of indices. The first ctor can optionally insert before an existing |
936 | /// instruction, the second appends the new instruction to the specified |
937 | /// BasicBlock. |
938 | inline GetElementPtrInst(Type *PointeeType, Value *Ptr, |
939 | ArrayRef<Value *> IdxList, unsigned Values, |
940 | const Twine &NameStr, Instruction *InsertBefore); |
941 | inline GetElementPtrInst(Type *PointeeType, Value *Ptr, |
942 | ArrayRef<Value *> IdxList, unsigned Values, |
943 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
944 | |
945 | void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr); |
946 | |
947 | protected: |
948 | // Note: Instruction needs to be a friend here to call cloneImpl. |
949 | friend class Instruction; |
950 | |
951 | GetElementPtrInst *cloneImpl() const; |
952 | |
953 | public: |
954 | static GetElementPtrInst *Create(Type *PointeeType, Value *Ptr, |
955 | ArrayRef<Value *> IdxList, |
956 | const Twine &NameStr = "", |
957 | Instruction *InsertBefore = nullptr) { |
958 | unsigned Values = 1 + unsigned(IdxList.size()); |
959 | assert(PointeeType && "Must specify element type")((void)0); |
960 | assert(cast<PointerType>(Ptr->getType()->getScalarType())((void)0) |
961 | ->isOpaqueOrPointeeTypeMatches(PointeeType))((void)0); |
962 | return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values, |
963 | NameStr, InsertBefore); |
964 | } |
965 | |
966 | static GetElementPtrInst *Create(Type *PointeeType, Value *Ptr, |
967 | ArrayRef<Value *> IdxList, |
968 | const Twine &NameStr, |
969 | BasicBlock *InsertAtEnd) { |
970 | unsigned Values = 1 + unsigned(IdxList.size()); |
971 | assert(PointeeType && "Must specify element type")((void)0); |
972 | assert(cast<PointerType>(Ptr->getType()->getScalarType())((void)0) |
973 | ->isOpaqueOrPointeeTypeMatches(PointeeType))((void)0); |
974 | return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values, |
975 | NameStr, InsertAtEnd); |
976 | } |
977 | |
978 | LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds([[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) |
979 | Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr = "",[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) |
980 | Instruction *InsertBefore = nullptr),[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) |
981 | "Use the version with explicit element type instead")[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr = "", Instruction *InsertBefore = nullptr) { |
982 | return CreateInBounds( |
983 | Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, |
984 | NameStr, InsertBefore); |
985 | } |
986 | |
987 | /// Create an "inbounds" getelementptr. See the documentation for the |
988 | /// "inbounds" flag in LangRef.html for details. |
989 | static GetElementPtrInst * |
990 | CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef<Value *> IdxList, |
991 | const Twine &NameStr = "", |
992 | Instruction *InsertBefore = nullptr) { |
993 | GetElementPtrInst *GEP = |
994 | Create(PointeeType, Ptr, IdxList, NameStr, InsertBefore); |
995 | GEP->setIsInBounds(true); |
996 | return GEP; |
997 | } |
998 | |
999 | LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds([[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr, BasicBlock *InsertAtEnd) |
1000 | Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr,[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr, BasicBlock *InsertAtEnd) |
1001 | BasicBlock *InsertAtEnd),[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr, BasicBlock *InsertAtEnd) |
1002 | "Use the version with explicit element type instead")[[deprecated("Use the version with explicit element type instead" )]] static GetElementPtrInst *CreateInBounds( Value *Ptr, ArrayRef <Value *> IdxList, const Twine &NameStr, BasicBlock *InsertAtEnd) { |
1003 | return CreateInBounds( |
1004 | Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, |
1005 | NameStr, InsertAtEnd); |
1006 | } |
1007 | |
1008 | static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr, |
1009 | ArrayRef<Value *> IdxList, |
1010 | const Twine &NameStr, |
1011 | BasicBlock *InsertAtEnd) { |
1012 | GetElementPtrInst *GEP = |
1013 | Create(PointeeType, Ptr, IdxList, NameStr, InsertAtEnd); |
1014 | GEP->setIsInBounds(true); |
1015 | return GEP; |
1016 | } |
1017 | |
1018 | /// Transparently provide more efficient getOperand methods. |
1019 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
1020 | |
1021 | Type *getSourceElementType() const { return SourceElementType; } |
1022 | |
1023 | void setSourceElementType(Type *Ty) { SourceElementType = Ty; } |
1024 | void setResultElementType(Type *Ty) { ResultElementType = Ty; } |
1025 | |
1026 | Type *getResultElementType() const { |
1027 | assert(cast<PointerType>(getType()->getScalarType())((void)0) |
1028 | ->isOpaqueOrPointeeTypeMatches(ResultElementType))((void)0); |
1029 | return ResultElementType; |
1030 | } |
1031 | |
1032 | /// Returns the address space of this instruction's pointer type. |
1033 | unsigned getAddressSpace() const { |
1034 | // Note that this is always the same as the pointer operand's address space |
1035 | // and that is cheaper to compute, so cheat here. |
1036 | return getPointerAddressSpace(); |
1037 | } |
1038 | |
1039 | /// Returns the result type of a getelementptr with the given source |
1040 | /// element type and indexes. |
1041 | /// |
1042 | /// Null is returned if the indices are invalid for the specified |
1043 | /// source element type. |
1044 | static Type *getIndexedType(Type *Ty, ArrayRef<Value *> IdxList); |
1045 | static Type *getIndexedType(Type *Ty, ArrayRef<Constant *> IdxList); |
1046 | static Type *getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList); |
1047 | |
1048 | /// Return the type of the element at the given index of an indexable |
1049 | /// type. This is equivalent to "getIndexedType(Agg, {Zero, Idx})". |
1050 | /// |
1051 | /// Returns null if the type can't be indexed, or the given index is not |
1052 | /// legal for the given type. |
1053 | static Type *getTypeAtIndex(Type *Ty, Value *Idx); |
1054 | static Type *getTypeAtIndex(Type *Ty, uint64_t Idx); |
1055 | |
1056 | inline op_iterator idx_begin() { return op_begin()+1; } |
1057 | inline const_op_iterator idx_begin() const { return op_begin()+1; } |
1058 | inline op_iterator idx_end() { return op_end(); } |
1059 | inline const_op_iterator idx_end() const { return op_end(); } |
1060 | |
1061 | inline iterator_range<op_iterator> indices() { |
1062 | return make_range(idx_begin(), idx_end()); |
1063 | } |
1064 | |
1065 | inline iterator_range<const_op_iterator> indices() const { |
1066 | return make_range(idx_begin(), idx_end()); |
1067 | } |
1068 | |
1069 | Value *getPointerOperand() { |
1070 | return getOperand(0); |
1071 | } |
1072 | const Value *getPointerOperand() const { |
1073 | return getOperand(0); |
1074 | } |
1075 | static unsigned getPointerOperandIndex() { |
1076 | return 0U; // get index for modifying correct operand. |
1077 | } |
1078 | |
1079 | /// Method to return the pointer operand as a |
1080 | /// PointerType. |
1081 | Type *getPointerOperandType() const { |
1082 | return getPointerOperand()->getType(); |
1083 | } |
1084 | |
1085 | /// Returns the address space of the pointer operand. |
1086 | unsigned getPointerAddressSpace() const { |
1087 | return getPointerOperandType()->getPointerAddressSpace(); |
1088 | } |
1089 | |
1090 | /// Returns the pointer type returned by the GEP |
1091 | /// instruction, which may be a vector of pointers. |
1092 | static Type *getGEPReturnType(Type *ElTy, Value *Ptr, |
1093 | ArrayRef<Value *> IdxList) { |
1094 | PointerType *OrigPtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); |
1095 | unsigned AddrSpace = OrigPtrTy->getAddressSpace(); |
1096 | Type *ResultElemTy = checkGEPType(getIndexedType(ElTy, IdxList)); |
1097 | Type *PtrTy = OrigPtrTy->isOpaque() |
1098 | ? PointerType::get(OrigPtrTy->getContext(), AddrSpace) |
1099 | : PointerType::get(ResultElemTy, AddrSpace); |
1100 | // Vector GEP |
1101 | if (auto *PtrVTy = dyn_cast<VectorType>(Ptr->getType())) { |
1102 | ElementCount EltCount = PtrVTy->getElementCount(); |
1103 | return VectorType::get(PtrTy, EltCount); |
1104 | } |
1105 | for (Value *Index : IdxList) |
1106 | if (auto *IndexVTy = dyn_cast<VectorType>(Index->getType())) { |
1107 | ElementCount EltCount = IndexVTy->getElementCount(); |
1108 | return VectorType::get(PtrTy, EltCount); |
1109 | } |
1110 | // Scalar GEP |
1111 | return PtrTy; |
1112 | } |
1113 | |
1114 | unsigned getNumIndices() const { // Note: always non-negative |
1115 | return getNumOperands() - 1; |
1116 | } |
1117 | |
1118 | bool hasIndices() const { |
1119 | return getNumOperands() > 1; |
1120 | } |
1121 | |
1122 | /// Return true if all of the indices of this GEP are |
1123 | /// zeros. If so, the result pointer and the first operand have the same |
1124 | /// value, just potentially different types. |
1125 | bool hasAllZeroIndices() const; |
1126 | |
1127 | /// Return true if all of the indices of this GEP are |
1128 | /// constant integers. If so, the result pointer and the first operand have |
1129 | /// a constant offset between them. |
1130 | bool hasAllConstantIndices() const; |
1131 | |
1132 | /// Set or clear the inbounds flag on this GEP instruction. |
1133 | /// See LangRef.html for the meaning of inbounds on a getelementptr. |
1134 | void setIsInBounds(bool b = true); |
1135 | |
1136 | /// Determine whether the GEP has the inbounds flag. |
1137 | bool isInBounds() const; |
1138 | |
1139 | /// Accumulate the constant address offset of this GEP if possible. |
1140 | /// |
1141 | /// This routine accepts an APInt into which it will accumulate the constant |
1142 | /// offset of this GEP if the GEP is in fact constant. If the GEP is not |
1143 | /// all-constant, it returns false and the value of the offset APInt is |
1144 | /// undefined (it is *not* preserved!). The APInt passed into this routine |
1145 | /// must be at least as wide as the IntPtr type for the address space of |
1146 | /// the base GEP pointer. |
1147 | bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; |
1148 | bool collectOffset(const DataLayout &DL, unsigned BitWidth, |
1149 | MapVector<Value *, APInt> &VariableOffsets, |
1150 | APInt &ConstantOffset) const; |
1151 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1152 | static bool classof(const Instruction *I) { |
1153 | return (I->getOpcode() == Instruction::GetElementPtr); |
1154 | } |
1155 | static bool classof(const Value *V) { |
1156 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1157 | } |
1158 | }; |
1159 | |
1160 | template <> |
1161 | struct OperandTraits<GetElementPtrInst> : |
1162 | public VariadicOperandTraits<GetElementPtrInst, 1> { |
1163 | }; |
1164 | |
1165 | GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr, |
1166 | ArrayRef<Value *> IdxList, unsigned Values, |
1167 | const Twine &NameStr, |
1168 | Instruction *InsertBefore) |
1169 | : Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr, |
1170 | OperandTraits<GetElementPtrInst>::op_end(this) - Values, |
1171 | Values, InsertBefore), |
1172 | SourceElementType(PointeeType), |
1173 | ResultElementType(getIndexedType(PointeeType, IdxList)) { |
1174 | assert(cast<PointerType>(getType()->getScalarType())((void)0) |
1175 | ->isOpaqueOrPointeeTypeMatches(ResultElementType))((void)0); |
1176 | init(Ptr, IdxList, NameStr); |
1177 | } |
1178 | |
1179 | GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr, |
1180 | ArrayRef<Value *> IdxList, unsigned Values, |
1181 | const Twine &NameStr, |
1182 | BasicBlock *InsertAtEnd) |
1183 | : Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr, |
1184 | OperandTraits<GetElementPtrInst>::op_end(this) - Values, |
1185 | Values, InsertAtEnd), |
1186 | SourceElementType(PointeeType), |
1187 | ResultElementType(getIndexedType(PointeeType, IdxList)) { |
1188 | assert(cast<PointerType>(getType()->getScalarType())((void)0) |
1189 | ->isOpaqueOrPointeeTypeMatches(ResultElementType))((void)0); |
1190 | init(Ptr, IdxList, NameStr); |
1191 | } |
1192 | |
1193 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)GetElementPtrInst::op_iterator GetElementPtrInst::op_begin() { return OperandTraits<GetElementPtrInst>::op_begin(this ); } GetElementPtrInst::const_op_iterator GetElementPtrInst:: op_begin() const { return OperandTraits<GetElementPtrInst> ::op_begin(const_cast<GetElementPtrInst*>(this)); } GetElementPtrInst ::op_iterator GetElementPtrInst::op_end() { return OperandTraits <GetElementPtrInst>::op_end(this); } GetElementPtrInst:: const_op_iterator GetElementPtrInst::op_end() const { return OperandTraits <GetElementPtrInst>::op_end(const_cast<GetElementPtrInst *>(this)); } Value *GetElementPtrInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value >( OperandTraits<GetElementPtrInst>::op_begin(const_cast <GetElementPtrInst*>(this))[i_nocapture].get()); } void GetElementPtrInst::setOperand(unsigned i_nocapture, Value *Val_nocapture ) { ((void)0); OperandTraits<GetElementPtrInst>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned GetElementPtrInst ::getNumOperands() const { return OperandTraits<GetElementPtrInst >::operands(this); } template <int Idx_nocapture> Use &GetElementPtrInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & GetElementPtrInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
1194 | |
1195 | //===----------------------------------------------------------------------===// |
1196 | // ICmpInst Class |
1197 | //===----------------------------------------------------------------------===// |
1198 | |
1199 | /// This instruction compares its operands according to the predicate given |
1200 | /// to the constructor. It only operates on integers or pointers. The operands |
1201 | /// must be identical types. |
1202 | /// Represent an integer comparison operator. |
1203 | class ICmpInst: public CmpInst { |
1204 | void AssertOK() { |
1205 | assert(isIntPredicate() &&((void)0) |
1206 | "Invalid ICmp predicate value")((void)0); |
1207 | assert(getOperand(0)->getType() == getOperand(1)->getType() &&((void)0) |
1208 | "Both operands to ICmp instruction are not of the same type!")((void)0); |
1209 | // Check that the operands are the right type |
1210 | assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||((void)0) |
1211 | getOperand(0)->getType()->isPtrOrPtrVectorTy()) &&((void)0) |
1212 | "Invalid operand types for ICmp instruction")((void)0); |
1213 | } |
1214 | |
1215 | protected: |
1216 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1217 | friend class Instruction; |
1218 | |
1219 | /// Clone an identical ICmpInst |
1220 | ICmpInst *cloneImpl() const; |
1221 | |
1222 | public: |
1223 | /// Constructor with insert-before-instruction semantics. |
1224 | ICmpInst( |
1225 | Instruction *InsertBefore, ///< Where to insert |
1226 | Predicate pred, ///< The predicate to use for the comparison |
1227 | Value *LHS, ///< The left-hand-side of the expression |
1228 | Value *RHS, ///< The right-hand-side of the expression |
1229 | const Twine &NameStr = "" ///< Name of the instruction |
1230 | ) : CmpInst(makeCmpResultType(LHS->getType()), |
1231 | Instruction::ICmp, pred, LHS, RHS, NameStr, |
1232 | InsertBefore) { |
1233 | #ifndef NDEBUG1 |
1234 | AssertOK(); |
1235 | #endif |
1236 | } |
1237 | |
1238 | /// Constructor with insert-at-end semantics. |
1239 | ICmpInst( |
1240 | BasicBlock &InsertAtEnd, ///< Block to insert into. |
1241 | Predicate pred, ///< The predicate to use for the comparison |
1242 | Value *LHS, ///< The left-hand-side of the expression |
1243 | Value *RHS, ///< The right-hand-side of the expression |
1244 | const Twine &NameStr = "" ///< Name of the instruction |
1245 | ) : CmpInst(makeCmpResultType(LHS->getType()), |
1246 | Instruction::ICmp, pred, LHS, RHS, NameStr, |
1247 | &InsertAtEnd) { |
1248 | #ifndef NDEBUG1 |
1249 | AssertOK(); |
1250 | #endif |
1251 | } |
1252 | |
1253 | /// Constructor with no-insertion semantics |
1254 | ICmpInst( |
1255 | Predicate pred, ///< The predicate to use for the comparison |
1256 | Value *LHS, ///< The left-hand-side of the expression |
1257 | Value *RHS, ///< The right-hand-side of the expression |
1258 | const Twine &NameStr = "" ///< Name of the instruction |
1259 | ) : CmpInst(makeCmpResultType(LHS->getType()), |
1260 | Instruction::ICmp, pred, LHS, RHS, NameStr) { |
1261 | #ifndef NDEBUG1 |
1262 | AssertOK(); |
1263 | #endif |
1264 | } |
1265 | |
1266 | /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc. |
1267 | /// @returns the predicate that would be the result if the operand were |
1268 | /// regarded as signed. |
1269 | /// Return the signed version of the predicate |
1270 | Predicate getSignedPredicate() const { |
1271 | return getSignedPredicate(getPredicate()); |
1272 | } |
1273 | |
1274 | /// This is a static version that you can use without an instruction. |
1275 | /// Return the signed version of the predicate. |
1276 | static Predicate getSignedPredicate(Predicate pred); |
1277 | |
1278 | /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc. |
1279 | /// @returns the predicate that would be the result if the operand were |
1280 | /// regarded as unsigned. |
1281 | /// Return the unsigned version of the predicate |
1282 | Predicate getUnsignedPredicate() const { |
1283 | return getUnsignedPredicate(getPredicate()); |
1284 | } |
1285 | |
1286 | /// This is a static version that you can use without an instruction. |
1287 | /// Return the unsigned version of the predicate. |
1288 | static Predicate getUnsignedPredicate(Predicate pred); |
1289 | |
1290 | /// Return true if this predicate is either EQ or NE. This also |
1291 | /// tests for commutativity. |
1292 | static bool isEquality(Predicate P) { |
1293 | return P == ICMP_EQ || P == ICMP_NE; |
1294 | } |
1295 | |
1296 | /// Return true if this predicate is either EQ or NE. This also |
1297 | /// tests for commutativity. |
1298 | bool isEquality() const { |
1299 | return isEquality(getPredicate()); |
1300 | } |
1301 | |
1302 | /// @returns true if the predicate of this ICmpInst is commutative |
1303 | /// Determine if this relation is commutative. |
1304 | bool isCommutative() const { return isEquality(); } |
1305 | |
1306 | /// Return true if the predicate is relational (not EQ or NE). |
1307 | /// |
1308 | bool isRelational() const { |
1309 | return !isEquality(); |
1310 | } |
1311 | |
1312 | /// Return true if the predicate is relational (not EQ or NE). |
1313 | /// |
1314 | static bool isRelational(Predicate P) { |
1315 | return !isEquality(P); |
1316 | } |
1317 | |
1318 | /// Return true if the predicate is SGT or UGT. |
1319 | /// |
1320 | static bool isGT(Predicate P) { |
1321 | return P == ICMP_SGT || P == ICMP_UGT; |
1322 | } |
1323 | |
1324 | /// Return true if the predicate is SLT or ULT. |
1325 | /// |
1326 | static bool isLT(Predicate P) { |
1327 | return P == ICMP_SLT || P == ICMP_ULT; |
1328 | } |
1329 | |
1330 | /// Return true if the predicate is SGE or UGE. |
1331 | /// |
1332 | static bool isGE(Predicate P) { |
1333 | return P == ICMP_SGE || P == ICMP_UGE; |
1334 | } |
1335 | |
1336 | /// Return true if the predicate is SLE or ULE. |
1337 | /// |
1338 | static bool isLE(Predicate P) { |
1339 | return P == ICMP_SLE || P == ICMP_ULE; |
1340 | } |
1341 | |
1342 | /// Exchange the two operands to this instruction in such a way that it does |
1343 | /// not modify the semantics of the instruction. The predicate value may be |
1344 | /// changed to retain the same result if the predicate is order dependent |
1345 | /// (e.g. ult). |
1346 | /// Swap operands and adjust predicate. |
1347 | void swapOperands() { |
1348 | setPredicate(getSwappedPredicate()); |
1349 | Op<0>().swap(Op<1>()); |
1350 | } |
1351 | |
1352 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1353 | static bool classof(const Instruction *I) { |
1354 | return I->getOpcode() == Instruction::ICmp; |
1355 | } |
1356 | static bool classof(const Value *V) { |
1357 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1358 | } |
1359 | }; |
1360 | |
1361 | //===----------------------------------------------------------------------===// |
1362 | // FCmpInst Class |
1363 | //===----------------------------------------------------------------------===// |
1364 | |
1365 | /// This instruction compares its operands according to the predicate given |
1366 | /// to the constructor. It only operates on floating point values or packed |
1367 | /// vectors of floating point values. The operands must be identical types. |
1368 | /// Represents a floating point comparison operator. |
1369 | class FCmpInst: public CmpInst { |
1370 | void AssertOK() { |
1371 | assert(isFPPredicate() && "Invalid FCmp predicate value")((void)0); |
1372 | assert(getOperand(0)->getType() == getOperand(1)->getType() &&((void)0) |
1373 | "Both operands to FCmp instruction are not of the same type!")((void)0); |
1374 | // Check that the operands are the right type |
1375 | assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&((void)0) |
1376 | "Invalid operand types for FCmp instruction")((void)0); |
1377 | } |
1378 | |
1379 | protected: |
1380 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1381 | friend class Instruction; |
1382 | |
1383 | /// Clone an identical FCmpInst |
1384 | FCmpInst *cloneImpl() const; |
1385 | |
1386 | public: |
1387 | /// Constructor with insert-before-instruction semantics. |
1388 | FCmpInst( |
1389 | Instruction *InsertBefore, ///< Where to insert |
1390 | Predicate pred, ///< The predicate to use for the comparison |
1391 | Value *LHS, ///< The left-hand-side of the expression |
1392 | Value *RHS, ///< The right-hand-side of the expression |
1393 | const Twine &NameStr = "" ///< Name of the instruction |
1394 | ) : CmpInst(makeCmpResultType(LHS->getType()), |
1395 | Instruction::FCmp, pred, LHS, RHS, NameStr, |
1396 | InsertBefore) { |
1397 | AssertOK(); |
1398 | } |
1399 | |
1400 | /// Constructor with insert-at-end semantics. |
1401 | FCmpInst( |
1402 | BasicBlock &InsertAtEnd, ///< Block to insert into. |
1403 | Predicate pred, ///< The predicate to use for the comparison |
1404 | Value *LHS, ///< The left-hand-side of the expression |
1405 | Value *RHS, ///< The right-hand-side of the expression |
1406 | const Twine &NameStr = "" ///< Name of the instruction |
1407 | ) : CmpInst(makeCmpResultType(LHS->getType()), |
1408 | Instruction::FCmp, pred, LHS, RHS, NameStr, |
1409 | &InsertAtEnd) { |
1410 | AssertOK(); |
1411 | } |
1412 | |
1413 | /// Constructor with no-insertion semantics |
1414 | FCmpInst( |
1415 | Predicate Pred, ///< The predicate to use for the comparison |
1416 | Value *LHS, ///< The left-hand-side of the expression |
1417 | Value *RHS, ///< The right-hand-side of the expression |
1418 | const Twine &NameStr = "", ///< Name of the instruction |
1419 | Instruction *FlagsSource = nullptr |
1420 | ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, Pred, LHS, |
1421 | RHS, NameStr, nullptr, FlagsSource) { |
1422 | AssertOK(); |
1423 | } |
1424 | |
1425 | /// @returns true if the predicate of this instruction is EQ or NE. |
1426 | /// Determine if this is an equality predicate. |
1427 | static bool isEquality(Predicate Pred) { |
1428 | return Pred == FCMP_OEQ || Pred == FCMP_ONE || Pred == FCMP_UEQ || |
1429 | Pred == FCMP_UNE; |
1430 | } |
1431 | |
1432 | /// @returns true if the predicate of this instruction is EQ or NE. |
1433 | /// Determine if this is an equality predicate. |
1434 | bool isEquality() const { return isEquality(getPredicate()); } |
1435 | |
1436 | /// @returns true if the predicate of this instruction is commutative. |
1437 | /// Determine if this is a commutative predicate. |
1438 | bool isCommutative() const { |
1439 | return isEquality() || |
1440 | getPredicate() == FCMP_FALSE || |
1441 | getPredicate() == FCMP_TRUE || |
1442 | getPredicate() == FCMP_ORD || |
1443 | getPredicate() == FCMP_UNO; |
1444 | } |
1445 | |
1446 | /// @returns true if the predicate is relational (not EQ or NE). |
1447 | /// Determine if this a relational predicate. |
1448 | bool isRelational() const { return !isEquality(); } |
1449 | |
1450 | /// Exchange the two operands to this instruction in such a way that it does |
1451 | /// not modify the semantics of the instruction. The predicate value may be |
1452 | /// changed to retain the same result if the predicate is order dependent |
1453 | /// (e.g. ult). |
1454 | /// Swap operands and adjust predicate. |
1455 | void swapOperands() { |
1456 | setPredicate(getSwappedPredicate()); |
1457 | Op<0>().swap(Op<1>()); |
1458 | } |
1459 | |
1460 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
1461 | static bool classof(const Instruction *I) { |
1462 | return I->getOpcode() == Instruction::FCmp; |
1463 | } |
1464 | static bool classof(const Value *V) { |
1465 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1466 | } |
1467 | }; |
1468 | |
1469 | //===----------------------------------------------------------------------===// |
1470 | /// This class represents a function call, abstracting a target |
1471 | /// machine's calling convention. This class uses low bit of the SubClassData |
1472 | /// field to indicate whether or not this is a tail call. The rest of the bits |
1473 | /// hold the calling convention of the call. |
1474 | /// |
1475 | class CallInst : public CallBase { |
1476 | CallInst(const CallInst &CI); |
1477 | |
1478 | /// Construct a CallInst given a range of arguments. |
1479 | /// Construct a CallInst from a range of arguments |
1480 | inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1481 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr, |
1482 | Instruction *InsertBefore); |
1483 | |
1484 | inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1485 | const Twine &NameStr, Instruction *InsertBefore) |
1486 | : CallInst(Ty, Func, Args, None, NameStr, InsertBefore) {} |
1487 | |
1488 | /// Construct a CallInst given a range of arguments. |
1489 | /// Construct a CallInst from a range of arguments |
1490 | inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1491 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr, |
1492 | BasicBlock *InsertAtEnd); |
1493 | |
1494 | explicit CallInst(FunctionType *Ty, Value *F, const Twine &NameStr, |
1495 | Instruction *InsertBefore); |
1496 | |
1497 | CallInst(FunctionType *ty, Value *F, const Twine &NameStr, |
1498 | BasicBlock *InsertAtEnd); |
1499 | |
1500 | void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args, |
1501 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr); |
1502 | void init(FunctionType *FTy, Value *Func, const Twine &NameStr); |
1503 | |
1504 | /// Compute the number of operands to allocate. |
1505 | static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) { |
1506 | // We need one operand for the called function, plus the input operand |
1507 | // counts provided. |
1508 | return 1 + NumArgs + NumBundleInputs; |
1509 | } |
1510 | |
1511 | protected: |
1512 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1513 | friend class Instruction; |
1514 | |
1515 | CallInst *cloneImpl() const; |
1516 | |
1517 | public: |
1518 | static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr = "", |
1519 | Instruction *InsertBefore = nullptr) { |
1520 | return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertBefore); |
1521 | } |
1522 | |
1523 | static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1524 | const Twine &NameStr, |
1525 | Instruction *InsertBefore = nullptr) { |
1526 | return new (ComputeNumOperands(Args.size())) |
1527 | CallInst(Ty, Func, Args, None, NameStr, InsertBefore); |
1528 | } |
1529 | |
1530 | static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1531 | ArrayRef<OperandBundleDef> Bundles = None, |
1532 | const Twine &NameStr = "", |
1533 | Instruction *InsertBefore = nullptr) { |
1534 | const int NumOperands = |
1535 | ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); |
1536 | const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
1537 | |
1538 | return new (NumOperands, DescriptorBytes) |
1539 | CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore); |
1540 | } |
1541 | |
1542 | static CallInst *Create(FunctionType *Ty, Value *F, const Twine &NameStr, |
1543 | BasicBlock *InsertAtEnd) { |
1544 | return new (ComputeNumOperands(0)) CallInst(Ty, F, NameStr, InsertAtEnd); |
1545 | } |
1546 | |
1547 | static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1548 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
1549 | return new (ComputeNumOperands(Args.size())) |
1550 | CallInst(Ty, Func, Args, None, NameStr, InsertAtEnd); |
1551 | } |
1552 | |
1553 | static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1554 | ArrayRef<OperandBundleDef> Bundles, |
1555 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
1556 | const int NumOperands = |
1557 | ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); |
1558 | const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
1559 | |
1560 | return new (NumOperands, DescriptorBytes) |
1561 | CallInst(Ty, Func, Args, Bundles, NameStr, InsertAtEnd); |
1562 | } |
1563 | |
1564 | static CallInst *Create(FunctionCallee Func, const Twine &NameStr = "", |
1565 | Instruction *InsertBefore = nullptr) { |
1566 | return Create(Func.getFunctionType(), Func.getCallee(), NameStr, |
1567 | InsertBefore); |
1568 | } |
1569 | |
1570 | static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args, |
1571 | ArrayRef<OperandBundleDef> Bundles = None, |
1572 | const Twine &NameStr = "", |
1573 | Instruction *InsertBefore = nullptr) { |
1574 | return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles, |
1575 | NameStr, InsertBefore); |
1576 | } |
1577 | |
1578 | static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args, |
1579 | const Twine &NameStr, |
1580 | Instruction *InsertBefore = nullptr) { |
1581 | return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr, |
1582 | InsertBefore); |
1583 | } |
1584 | |
1585 | static CallInst *Create(FunctionCallee Func, const Twine &NameStr, |
1586 | BasicBlock *InsertAtEnd) { |
1587 | return Create(Func.getFunctionType(), Func.getCallee(), NameStr, |
1588 | InsertAtEnd); |
1589 | } |
1590 | |
1591 | static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args, |
1592 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
1593 | return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr, |
1594 | InsertAtEnd); |
1595 | } |
1596 | |
1597 | static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args, |
1598 | ArrayRef<OperandBundleDef> Bundles, |
1599 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
1600 | return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles, |
1601 | NameStr, InsertAtEnd); |
1602 | } |
1603 | |
1604 | /// Create a clone of \p CI with a different set of operand bundles and |
1605 | /// insert it before \p InsertPt. |
1606 | /// |
1607 | /// The returned call instruction is identical \p CI in every way except that |
1608 | /// the operand bundles for the new instruction are set to the operand bundles |
1609 | /// in \p Bundles. |
1610 | static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles, |
1611 | Instruction *InsertPt = nullptr); |
1612 | |
1613 | /// Generate the IR for a call to malloc: |
1614 | /// 1. Compute the malloc call's argument as the specified type's size, |
1615 | /// possibly multiplied by the array size if the array size is not |
1616 | /// constant 1. |
1617 | /// 2. Call malloc with that argument. |
1618 | /// 3. Bitcast the result of the malloc call to the specified type. |
1619 | static Instruction *CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, |
1620 | Type *AllocTy, Value *AllocSize, |
1621 | Value *ArraySize = nullptr, |
1622 | Function *MallocF = nullptr, |
1623 | const Twine &Name = ""); |
1624 | static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, |
1625 | Type *AllocTy, Value *AllocSize, |
1626 | Value *ArraySize = nullptr, |
1627 | Function *MallocF = nullptr, |
1628 | const Twine &Name = ""); |
1629 | static Instruction *CreateMalloc(Instruction *InsertBefore, Type *IntPtrTy, |
1630 | Type *AllocTy, Value *AllocSize, |
1631 | Value *ArraySize = nullptr, |
1632 | ArrayRef<OperandBundleDef> Bundles = None, |
1633 | Function *MallocF = nullptr, |
1634 | const Twine &Name = ""); |
1635 | static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, |
1636 | Type *AllocTy, Value *AllocSize, |
1637 | Value *ArraySize = nullptr, |
1638 | ArrayRef<OperandBundleDef> Bundles = None, |
1639 | Function *MallocF = nullptr, |
1640 | const Twine &Name = ""); |
1641 | /// Generate the IR for a call to the builtin free function. |
1642 | static Instruction *CreateFree(Value *Source, Instruction *InsertBefore); |
1643 | static Instruction *CreateFree(Value *Source, BasicBlock *InsertAtEnd); |
1644 | static Instruction *CreateFree(Value *Source, |
1645 | ArrayRef<OperandBundleDef> Bundles, |
1646 | Instruction *InsertBefore); |
1647 | static Instruction *CreateFree(Value *Source, |
1648 | ArrayRef<OperandBundleDef> Bundles, |
1649 | BasicBlock *InsertAtEnd); |
1650 | |
1651 | // Note that 'musttail' implies 'tail'. |
1652 | enum TailCallKind : unsigned { |
1653 | TCK_None = 0, |
1654 | TCK_Tail = 1, |
1655 | TCK_MustTail = 2, |
1656 | TCK_NoTail = 3, |
1657 | TCK_LAST = TCK_NoTail |
1658 | }; |
1659 | |
1660 | using TailCallKindField = Bitfield::Element<TailCallKind, 0, 2, TCK_LAST>; |
1661 | static_assert( |
1662 | Bitfield::areContiguous<TailCallKindField, CallBase::CallingConvField>(), |
1663 | "Bitfields must be contiguous"); |
1664 | |
1665 | TailCallKind getTailCallKind() const { |
1666 | return getSubclassData<TailCallKindField>(); |
1667 | } |
1668 | |
1669 | bool isTailCall() const { |
1670 | TailCallKind Kind = getTailCallKind(); |
1671 | return Kind == TCK_Tail || Kind == TCK_MustTail; |
1672 | } |
1673 | |
1674 | bool isMustTailCall() const { return getTailCallKind() == TCK_MustTail; } |
1675 | |
1676 | bool isNoTailCall() const { return getTailCallKind() == TCK_NoTail; } |
1677 | |
1678 | void setTailCallKind(TailCallKind TCK) { |
1679 | setSubclassData<TailCallKindField>(TCK); |
1680 | } |
1681 | |
1682 | void setTailCall(bool IsTc = true) { |
1683 | setTailCallKind(IsTc ? TCK_Tail : TCK_None); |
1684 | } |
1685 | |
1686 | /// Return true if the call can return twice |
1687 | bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); } |
1688 | void setCanReturnTwice() { |
1689 | addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice); |
1690 | } |
1691 | |
1692 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1693 | static bool classof(const Instruction *I) { |
1694 | return I->getOpcode() == Instruction::Call; |
1695 | } |
1696 | static bool classof(const Value *V) { |
1697 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1698 | } |
1699 | |
1700 | /// Updates profile metadata by scaling it by \p S / \p T. |
1701 | void updateProfWeight(uint64_t S, uint64_t T); |
1702 | |
1703 | private: |
1704 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
1705 | // method so that subclasses cannot accidentally use it. |
1706 | template <typename Bitfield> |
1707 | void setSubclassData(typename Bitfield::Type Value) { |
1708 | Instruction::setSubclassData<Bitfield>(Value); |
1709 | } |
1710 | }; |
1711 | |
1712 | CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1713 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr, |
1714 | BasicBlock *InsertAtEnd) |
1715 | : CallBase(Ty->getReturnType(), Instruction::Call, |
1716 | OperandTraits<CallBase>::op_end(this) - |
1717 | (Args.size() + CountBundleInputs(Bundles) + 1), |
1718 | unsigned(Args.size() + CountBundleInputs(Bundles) + 1), |
1719 | InsertAtEnd) { |
1720 | init(Ty, Func, Args, Bundles, NameStr); |
1721 | } |
1722 | |
1723 | CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args, |
1724 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr, |
1725 | Instruction *InsertBefore) |
1726 | : CallBase(Ty->getReturnType(), Instruction::Call, |
1727 | OperandTraits<CallBase>::op_end(this) - |
1728 | (Args.size() + CountBundleInputs(Bundles) + 1), |
1729 | unsigned(Args.size() + CountBundleInputs(Bundles) + 1), |
1730 | InsertBefore) { |
1731 | init(Ty, Func, Args, Bundles, NameStr); |
1732 | } |
1733 | |
1734 | //===----------------------------------------------------------------------===// |
1735 | // SelectInst Class |
1736 | //===----------------------------------------------------------------------===// |
1737 | |
1738 | /// This class represents the LLVM 'select' instruction. |
1739 | /// |
1740 | class SelectInst : public Instruction { |
1741 | SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr, |
1742 | Instruction *InsertBefore) |
1743 | : Instruction(S1->getType(), Instruction::Select, |
1744 | &Op<0>(), 3, InsertBefore) { |
1745 | init(C, S1, S2); |
1746 | setName(NameStr); |
1747 | } |
1748 | |
1749 | SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr, |
1750 | BasicBlock *InsertAtEnd) |
1751 | : Instruction(S1->getType(), Instruction::Select, |
1752 | &Op<0>(), 3, InsertAtEnd) { |
1753 | init(C, S1, S2); |
1754 | setName(NameStr); |
1755 | } |
1756 | |
1757 | void init(Value *C, Value *S1, Value *S2) { |
1758 | assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select")((void)0); |
1759 | Op<0>() = C; |
1760 | Op<1>() = S1; |
1761 | Op<2>() = S2; |
1762 | } |
1763 | |
1764 | protected: |
1765 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1766 | friend class Instruction; |
1767 | |
1768 | SelectInst *cloneImpl() const; |
1769 | |
1770 | public: |
1771 | static SelectInst *Create(Value *C, Value *S1, Value *S2, |
1772 | const Twine &NameStr = "", |
1773 | Instruction *InsertBefore = nullptr, |
1774 | Instruction *MDFrom = nullptr) { |
1775 | SelectInst *Sel = new(3) SelectInst(C, S1, S2, NameStr, InsertBefore); |
1776 | if (MDFrom) |
1777 | Sel->copyMetadata(*MDFrom); |
1778 | return Sel; |
1779 | } |
1780 | |
1781 | static SelectInst *Create(Value *C, Value *S1, Value *S2, |
1782 | const Twine &NameStr, |
1783 | BasicBlock *InsertAtEnd) { |
1784 | return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd); |
1785 | } |
1786 | |
1787 | const Value *getCondition() const { return Op<0>(); } |
1788 | const Value *getTrueValue() const { return Op<1>(); } |
1789 | const Value *getFalseValue() const { return Op<2>(); } |
1790 | Value *getCondition() { return Op<0>(); } |
1791 | Value *getTrueValue() { return Op<1>(); } |
1792 | Value *getFalseValue() { return Op<2>(); } |
1793 | |
1794 | void setCondition(Value *V) { Op<0>() = V; } |
1795 | void setTrueValue(Value *V) { Op<1>() = V; } |
1796 | void setFalseValue(Value *V) { Op<2>() = V; } |
1797 | |
1798 | /// Swap the true and false values of the select instruction. |
1799 | /// This doesn't swap prof metadata. |
1800 | void swapValues() { Op<1>().swap(Op<2>()); } |
1801 | |
1802 | /// Return a string if the specified operands are invalid |
1803 | /// for a select operation, otherwise return null. |
1804 | static const char *areInvalidOperands(Value *Cond, Value *True, Value *False); |
1805 | |
1806 | /// Transparently provide more efficient getOperand methods. |
1807 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
1808 | |
1809 | OtherOps getOpcode() const { |
1810 | return static_cast<OtherOps>(Instruction::getOpcode()); |
1811 | } |
1812 | |
1813 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1814 | static bool classof(const Instruction *I) { |
1815 | return I->getOpcode() == Instruction::Select; |
1816 | } |
1817 | static bool classof(const Value *V) { |
1818 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1819 | } |
1820 | }; |
1821 | |
1822 | template <> |
1823 | struct OperandTraits<SelectInst> : public FixedNumOperandTraits<SelectInst, 3> { |
1824 | }; |
1825 | |
1826 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)SelectInst::op_iterator SelectInst::op_begin() { return OperandTraits <SelectInst>::op_begin(this); } SelectInst::const_op_iterator SelectInst::op_begin() const { return OperandTraits<SelectInst >::op_begin(const_cast<SelectInst*>(this)); } SelectInst ::op_iterator SelectInst::op_end() { return OperandTraits< SelectInst>::op_end(this); } SelectInst::const_op_iterator SelectInst::op_end() const { return OperandTraits<SelectInst >::op_end(const_cast<SelectInst*>(this)); } Value *SelectInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<SelectInst>::op_begin(const_cast <SelectInst*>(this))[i_nocapture].get()); } void SelectInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<SelectInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned SelectInst::getNumOperands() const { return OperandTraits<SelectInst>::operands(this); } template <int Idx_nocapture> Use &SelectInst::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &SelectInst::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
1827 | |
1828 | //===----------------------------------------------------------------------===// |
1829 | // VAArgInst Class |
1830 | //===----------------------------------------------------------------------===// |
1831 | |
1832 | /// This class represents the va_arg llvm instruction, which returns |
1833 | /// an argument of the specified type given a va_list and increments that list |
1834 | /// |
1835 | class VAArgInst : public UnaryInstruction { |
1836 | protected: |
1837 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1838 | friend class Instruction; |
1839 | |
1840 | VAArgInst *cloneImpl() const; |
1841 | |
1842 | public: |
1843 | VAArgInst(Value *List, Type *Ty, const Twine &NameStr = "", |
1844 | Instruction *InsertBefore = nullptr) |
1845 | : UnaryInstruction(Ty, VAArg, List, InsertBefore) { |
1846 | setName(NameStr); |
1847 | } |
1848 | |
1849 | VAArgInst(Value *List, Type *Ty, const Twine &NameStr, |
1850 | BasicBlock *InsertAtEnd) |
1851 | : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) { |
1852 | setName(NameStr); |
1853 | } |
1854 | |
1855 | Value *getPointerOperand() { return getOperand(0); } |
1856 | const Value *getPointerOperand() const { return getOperand(0); } |
1857 | static unsigned getPointerOperandIndex() { return 0U; } |
1858 | |
1859 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1860 | static bool classof(const Instruction *I) { |
1861 | return I->getOpcode() == VAArg; |
1862 | } |
1863 | static bool classof(const Value *V) { |
1864 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1865 | } |
1866 | }; |
1867 | |
1868 | //===----------------------------------------------------------------------===// |
1869 | // ExtractElementInst Class |
1870 | //===----------------------------------------------------------------------===// |
1871 | |
1872 | /// This instruction extracts a single (scalar) |
1873 | /// element from a VectorType value |
1874 | /// |
1875 | class ExtractElementInst : public Instruction { |
1876 | ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "", |
1877 | Instruction *InsertBefore = nullptr); |
1878 | ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr, |
1879 | BasicBlock *InsertAtEnd); |
1880 | |
1881 | protected: |
1882 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1883 | friend class Instruction; |
1884 | |
1885 | ExtractElementInst *cloneImpl() const; |
1886 | |
1887 | public: |
1888 | static ExtractElementInst *Create(Value *Vec, Value *Idx, |
1889 | const Twine &NameStr = "", |
1890 | Instruction *InsertBefore = nullptr) { |
1891 | return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore); |
1892 | } |
1893 | |
1894 | static ExtractElementInst *Create(Value *Vec, Value *Idx, |
1895 | const Twine &NameStr, |
1896 | BasicBlock *InsertAtEnd) { |
1897 | return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd); |
1898 | } |
1899 | |
1900 | /// Return true if an extractelement instruction can be |
1901 | /// formed with the specified operands. |
1902 | static bool isValidOperands(const Value *Vec, const Value *Idx); |
1903 | |
1904 | Value *getVectorOperand() { return Op<0>(); } |
1905 | Value *getIndexOperand() { return Op<1>(); } |
1906 | const Value *getVectorOperand() const { return Op<0>(); } |
1907 | const Value *getIndexOperand() const { return Op<1>(); } |
1908 | |
1909 | VectorType *getVectorOperandType() const { |
1910 | return cast<VectorType>(getVectorOperand()->getType()); |
1911 | } |
1912 | |
1913 | /// Transparently provide more efficient getOperand methods. |
1914 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
1915 | |
1916 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1917 | static bool classof(const Instruction *I) { |
1918 | return I->getOpcode() == Instruction::ExtractElement; |
1919 | } |
1920 | static bool classof(const Value *V) { |
1921 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1922 | } |
1923 | }; |
1924 | |
1925 | template <> |
1926 | struct OperandTraits<ExtractElementInst> : |
1927 | public FixedNumOperandTraits<ExtractElementInst, 2> { |
1928 | }; |
1929 | |
1930 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)ExtractElementInst::op_iterator ExtractElementInst::op_begin( ) { return OperandTraits<ExtractElementInst>::op_begin( this); } ExtractElementInst::const_op_iterator ExtractElementInst ::op_begin() const { return OperandTraits<ExtractElementInst >::op_begin(const_cast<ExtractElementInst*>(this)); } ExtractElementInst::op_iterator ExtractElementInst::op_end() { return OperandTraits<ExtractElementInst>::op_end(this ); } ExtractElementInst::const_op_iterator ExtractElementInst ::op_end() const { return OperandTraits<ExtractElementInst >::op_end(const_cast<ExtractElementInst*>(this)); } Value *ExtractElementInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value>( OperandTraits< ExtractElementInst>::op_begin(const_cast<ExtractElementInst *>(this))[i_nocapture].get()); } void ExtractElementInst:: setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void )0); OperandTraits<ExtractElementInst>::op_begin(this)[ i_nocapture] = Val_nocapture; } unsigned ExtractElementInst:: getNumOperands() const { return OperandTraits<ExtractElementInst >::operands(this); } template <int Idx_nocapture> Use &ExtractElementInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & ExtractElementInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
1931 | |
1932 | //===----------------------------------------------------------------------===// |
1933 | // InsertElementInst Class |
1934 | //===----------------------------------------------------------------------===// |
1935 | |
1936 | /// This instruction inserts a single (scalar) |
1937 | /// element into a VectorType value |
1938 | /// |
1939 | class InsertElementInst : public Instruction { |
1940 | InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, |
1941 | const Twine &NameStr = "", |
1942 | Instruction *InsertBefore = nullptr); |
1943 | InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, |
1944 | BasicBlock *InsertAtEnd); |
1945 | |
1946 | protected: |
1947 | // Note: Instruction needs to be a friend here to call cloneImpl. |
1948 | friend class Instruction; |
1949 | |
1950 | InsertElementInst *cloneImpl() const; |
1951 | |
1952 | public: |
1953 | static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx, |
1954 | const Twine &NameStr = "", |
1955 | Instruction *InsertBefore = nullptr) { |
1956 | return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore); |
1957 | } |
1958 | |
1959 | static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx, |
1960 | const Twine &NameStr, |
1961 | BasicBlock *InsertAtEnd) { |
1962 | return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd); |
1963 | } |
1964 | |
1965 | /// Return true if an insertelement instruction can be |
1966 | /// formed with the specified operands. |
1967 | static bool isValidOperands(const Value *Vec, const Value *NewElt, |
1968 | const Value *Idx); |
1969 | |
1970 | /// Overload to return most specific vector type. |
1971 | /// |
1972 | VectorType *getType() const { |
1973 | return cast<VectorType>(Instruction::getType()); |
1974 | } |
1975 | |
1976 | /// Transparently provide more efficient getOperand methods. |
1977 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
1978 | |
1979 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
1980 | static bool classof(const Instruction *I) { |
1981 | return I->getOpcode() == Instruction::InsertElement; |
1982 | } |
1983 | static bool classof(const Value *V) { |
1984 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
1985 | } |
1986 | }; |
1987 | |
1988 | template <> |
1989 | struct OperandTraits<InsertElementInst> : |
1990 | public FixedNumOperandTraits<InsertElementInst, 3> { |
1991 | }; |
1992 | |
1993 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)InsertElementInst::op_iterator InsertElementInst::op_begin() { return OperandTraits<InsertElementInst>::op_begin(this ); } InsertElementInst::const_op_iterator InsertElementInst:: op_begin() const { return OperandTraits<InsertElementInst> ::op_begin(const_cast<InsertElementInst*>(this)); } InsertElementInst ::op_iterator InsertElementInst::op_end() { return OperandTraits <InsertElementInst>::op_end(this); } InsertElementInst:: const_op_iterator InsertElementInst::op_end() const { return OperandTraits <InsertElementInst>::op_end(const_cast<InsertElementInst *>(this)); } Value *InsertElementInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value >( OperandTraits<InsertElementInst>::op_begin(const_cast <InsertElementInst*>(this))[i_nocapture].get()); } void InsertElementInst::setOperand(unsigned i_nocapture, Value *Val_nocapture ) { ((void)0); OperandTraits<InsertElementInst>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned InsertElementInst ::getNumOperands() const { return OperandTraits<InsertElementInst >::operands(this); } template <int Idx_nocapture> Use &InsertElementInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & InsertElementInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
1994 | |
1995 | //===----------------------------------------------------------------------===// |
1996 | // ShuffleVectorInst Class |
1997 | //===----------------------------------------------------------------------===// |
1998 | |
1999 | constexpr int UndefMaskElem = -1; |
2000 | |
2001 | /// This instruction constructs a fixed permutation of two |
2002 | /// input vectors. |
2003 | /// |
2004 | /// For each element of the result vector, the shuffle mask selects an element |
2005 | /// from one of the input vectors to copy to the result. Non-negative elements |
2006 | /// in the mask represent an index into the concatenated pair of input vectors. |
2007 | /// UndefMaskElem (-1) specifies that the result element is undefined. |
2008 | /// |
2009 | /// For scalable vectors, all the elements of the mask must be 0 or -1. This |
2010 | /// requirement may be relaxed in the future. |
2011 | class ShuffleVectorInst : public Instruction { |
2012 | SmallVector<int, 4> ShuffleMask; |
2013 | Constant *ShuffleMaskForBitcode; |
2014 | |
2015 | protected: |
2016 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2017 | friend class Instruction; |
2018 | |
2019 | ShuffleVectorInst *cloneImpl() const; |
2020 | |
2021 | public: |
2022 | ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, |
2023 | const Twine &NameStr = "", |
2024 | Instruction *InsertBefor = nullptr); |
2025 | ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, |
2026 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2027 | ShuffleVectorInst(Value *V1, Value *V2, ArrayRef<int> Mask, |
2028 | const Twine &NameStr = "", |
2029 | Instruction *InsertBefor = nullptr); |
2030 | ShuffleVectorInst(Value *V1, Value *V2, ArrayRef<int> Mask, |
2031 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2032 | |
2033 | void *operator new(size_t S) { return User::operator new(S, 2); } |
2034 | void operator delete(void *Ptr) { return User::operator delete(Ptr); } |
2035 | |
2036 | /// Swap the operands and adjust the mask to preserve the semantics |
2037 | /// of the instruction. |
2038 | void commute(); |
2039 | |
2040 | /// Return true if a shufflevector instruction can be |
2041 | /// formed with the specified operands. |
2042 | static bool isValidOperands(const Value *V1, const Value *V2, |
2043 | const Value *Mask); |
2044 | static bool isValidOperands(const Value *V1, const Value *V2, |
2045 | ArrayRef<int> Mask); |
2046 | |
2047 | /// Overload to return most specific vector type. |
2048 | /// |
2049 | VectorType *getType() const { |
2050 | return cast<VectorType>(Instruction::getType()); |
2051 | } |
2052 | |
2053 | /// Transparently provide more efficient getOperand methods. |
2054 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
2055 | |
2056 | /// Return the shuffle mask value of this instruction for the given element |
2057 | /// index. Return UndefMaskElem if the element is undef. |
2058 | int getMaskValue(unsigned Elt) const { return ShuffleMask[Elt]; } |
2059 | |
2060 | /// Convert the input shuffle mask operand to a vector of integers. Undefined |
2061 | /// elements of the mask are returned as UndefMaskElem. |
2062 | static void getShuffleMask(const Constant *Mask, |
2063 | SmallVectorImpl<int> &Result); |
2064 | |
2065 | /// Return the mask for this instruction as a vector of integers. Undefined |
2066 | /// elements of the mask are returned as UndefMaskElem. |
2067 | void getShuffleMask(SmallVectorImpl<int> &Result) const { |
2068 | Result.assign(ShuffleMask.begin(), ShuffleMask.end()); |
2069 | } |
2070 | |
2071 | /// Return the mask for this instruction, for use in bitcode. |
2072 | /// |
2073 | /// TODO: This is temporary until we decide a new bitcode encoding for |
2074 | /// shufflevector. |
2075 | Constant *getShuffleMaskForBitcode() const { return ShuffleMaskForBitcode; } |
2076 | |
2077 | static Constant *convertShuffleMaskForBitcode(ArrayRef<int> Mask, |
2078 | Type *ResultTy); |
2079 | |
2080 | void setShuffleMask(ArrayRef<int> Mask); |
2081 | |
2082 | ArrayRef<int> getShuffleMask() const { return ShuffleMask; } |
2083 | |
2084 | /// Return true if this shuffle returns a vector with a different number of |
2085 | /// elements than its source vectors. |
2086 | /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3> |
2087 | /// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5> |
2088 | bool changesLength() const { |
2089 | unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType()) |
2090 | ->getElementCount() |
2091 | .getKnownMinValue(); |
2092 | unsigned NumMaskElts = ShuffleMask.size(); |
2093 | return NumSourceElts != NumMaskElts; |
2094 | } |
2095 | |
2096 | /// Return true if this shuffle returns a vector with a greater number of |
2097 | /// elements than its source vectors. |
2098 | /// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3> |
2099 | bool increasesLength() const { |
2100 | unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType()) |
2101 | ->getElementCount() |
2102 | .getKnownMinValue(); |
2103 | unsigned NumMaskElts = ShuffleMask.size(); |
2104 | return NumSourceElts < NumMaskElts; |
2105 | } |
2106 | |
2107 | /// Return true if this shuffle mask chooses elements from exactly one source |
2108 | /// vector. |
2109 | /// Example: <7,5,undef,7> |
2110 | /// This assumes that vector operands are the same length as the mask. |
2111 | static bool isSingleSourceMask(ArrayRef<int> Mask); |
2112 | static bool isSingleSourceMask(const Constant *Mask) { |
2113 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2114 | SmallVector<int, 16> MaskAsInts; |
2115 | getShuffleMask(Mask, MaskAsInts); |
2116 | return isSingleSourceMask(MaskAsInts); |
2117 | } |
2118 | |
2119 | /// Return true if this shuffle chooses elements from exactly one source |
2120 | /// vector without changing the length of that vector. |
2121 | /// Example: shufflevector <4 x n> A, <4 x n> B, <3,0,undef,3> |
2122 | /// TODO: Optionally allow length-changing shuffles. |
2123 | bool isSingleSource() const { |
2124 | return !changesLength() && isSingleSourceMask(ShuffleMask); |
2125 | } |
2126 | |
2127 | /// Return true if this shuffle mask chooses elements from exactly one source |
2128 | /// vector without lane crossings. A shuffle using this mask is not |
2129 | /// necessarily a no-op because it may change the number of elements from its |
2130 | /// input vectors or it may provide demanded bits knowledge via undef lanes. |
2131 | /// Example: <undef,undef,2,3> |
2132 | static bool isIdentityMask(ArrayRef<int> Mask); |
2133 | static bool isIdentityMask(const Constant *Mask) { |
2134 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2135 | SmallVector<int, 16> MaskAsInts; |
2136 | getShuffleMask(Mask, MaskAsInts); |
2137 | return isIdentityMask(MaskAsInts); |
2138 | } |
2139 | |
2140 | /// Return true if this shuffle chooses elements from exactly one source |
2141 | /// vector without lane crossings and does not change the number of elements |
2142 | /// from its input vectors. |
2143 | /// Example: shufflevector <4 x n> A, <4 x n> B, <4,undef,6,undef> |
2144 | bool isIdentity() const { |
2145 | return !changesLength() && isIdentityMask(ShuffleMask); |
2146 | } |
2147 | |
2148 | /// Return true if this shuffle lengthens exactly one source vector with |
2149 | /// undefs in the high elements. |
2150 | bool isIdentityWithPadding() const; |
2151 | |
2152 | /// Return true if this shuffle extracts the first N elements of exactly one |
2153 | /// source vector. |
2154 | bool isIdentityWithExtract() const; |
2155 | |
2156 | /// Return true if this shuffle concatenates its 2 source vectors. This |
2157 | /// returns false if either input is undefined. In that case, the shuffle is |
2158 | /// is better classified as an identity with padding operation. |
2159 | bool isConcat() const; |
2160 | |
2161 | /// Return true if this shuffle mask chooses elements from its source vectors |
2162 | /// without lane crossings. A shuffle using this mask would be |
2163 | /// equivalent to a vector select with a constant condition operand. |
2164 | /// Example: <4,1,6,undef> |
2165 | /// This returns false if the mask does not choose from both input vectors. |
2166 | /// In that case, the shuffle is better classified as an identity shuffle. |
2167 | /// This assumes that vector operands are the same length as the mask |
2168 | /// (a length-changing shuffle can never be equivalent to a vector select). |
2169 | static bool isSelectMask(ArrayRef<int> Mask); |
2170 | static bool isSelectMask(const Constant *Mask) { |
2171 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2172 | SmallVector<int, 16> MaskAsInts; |
2173 | getShuffleMask(Mask, MaskAsInts); |
2174 | return isSelectMask(MaskAsInts); |
2175 | } |
2176 | |
2177 | /// Return true if this shuffle chooses elements from its source vectors |
2178 | /// without lane crossings and all operands have the same number of elements. |
2179 | /// In other words, this shuffle is equivalent to a vector select with a |
2180 | /// constant condition operand. |
2181 | /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,1,6,3> |
2182 | /// This returns false if the mask does not choose from both input vectors. |
2183 | /// In that case, the shuffle is better classified as an identity shuffle. |
2184 | /// TODO: Optionally allow length-changing shuffles. |
2185 | bool isSelect() const { |
2186 | return !changesLength() && isSelectMask(ShuffleMask); |
2187 | } |
2188 | |
2189 | /// Return true if this shuffle mask swaps the order of elements from exactly |
2190 | /// one source vector. |
2191 | /// Example: <7,6,undef,4> |
2192 | /// This assumes that vector operands are the same length as the mask. |
2193 | static bool isReverseMask(ArrayRef<int> Mask); |
2194 | static bool isReverseMask(const Constant *Mask) { |
2195 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2196 | SmallVector<int, 16> MaskAsInts; |
2197 | getShuffleMask(Mask, MaskAsInts); |
2198 | return isReverseMask(MaskAsInts); |
2199 | } |
2200 | |
2201 | /// Return true if this shuffle swaps the order of elements from exactly |
2202 | /// one source vector. |
2203 | /// Example: shufflevector <4 x n> A, <4 x n> B, <3,undef,1,undef> |
2204 | /// TODO: Optionally allow length-changing shuffles. |
2205 | bool isReverse() const { |
2206 | return !changesLength() && isReverseMask(ShuffleMask); |
2207 | } |
2208 | |
2209 | /// Return true if this shuffle mask chooses all elements with the same value |
2210 | /// as the first element of exactly one source vector. |
2211 | /// Example: <4,undef,undef,4> |
2212 | /// This assumes that vector operands are the same length as the mask. |
2213 | static bool isZeroEltSplatMask(ArrayRef<int> Mask); |
2214 | static bool isZeroEltSplatMask(const Constant *Mask) { |
2215 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2216 | SmallVector<int, 16> MaskAsInts; |
2217 | getShuffleMask(Mask, MaskAsInts); |
2218 | return isZeroEltSplatMask(MaskAsInts); |
2219 | } |
2220 | |
2221 | /// Return true if all elements of this shuffle are the same value as the |
2222 | /// first element of exactly one source vector without changing the length |
2223 | /// of that vector. |
2224 | /// Example: shufflevector <4 x n> A, <4 x n> B, <undef,0,undef,0> |
2225 | /// TODO: Optionally allow length-changing shuffles. |
2226 | /// TODO: Optionally allow splats from other elements. |
2227 | bool isZeroEltSplat() const { |
2228 | return !changesLength() && isZeroEltSplatMask(ShuffleMask); |
2229 | } |
2230 | |
2231 | /// Return true if this shuffle mask is a transpose mask. |
2232 | /// Transpose vector masks transpose a 2xn matrix. They read corresponding |
2233 | /// even- or odd-numbered vector elements from two n-dimensional source |
2234 | /// vectors and write each result into consecutive elements of an |
2235 | /// n-dimensional destination vector. Two shuffles are necessary to complete |
2236 | /// the transpose, one for the even elements and another for the odd elements. |
2237 | /// This description closely follows how the TRN1 and TRN2 AArch64 |
2238 | /// instructions operate. |
2239 | /// |
2240 | /// For example, a simple 2x2 matrix can be transposed with: |
2241 | /// |
2242 | /// ; Original matrix |
2243 | /// m0 = < a, b > |
2244 | /// m1 = < c, d > |
2245 | /// |
2246 | /// ; Transposed matrix |
2247 | /// t0 = < a, c > = shufflevector m0, m1, < 0, 2 > |
2248 | /// t1 = < b, d > = shufflevector m0, m1, < 1, 3 > |
2249 | /// |
2250 | /// For matrices having greater than n columns, the resulting nx2 transposed |
2251 | /// matrix is stored in two result vectors such that one vector contains |
2252 | /// interleaved elements from all the even-numbered rows and the other vector |
2253 | /// contains interleaved elements from all the odd-numbered rows. For example, |
2254 | /// a 2x4 matrix can be transposed with: |
2255 | /// |
2256 | /// ; Original matrix |
2257 | /// m0 = < a, b, c, d > |
2258 | /// m1 = < e, f, g, h > |
2259 | /// |
2260 | /// ; Transposed matrix |
2261 | /// t0 = < a, e, c, g > = shufflevector m0, m1 < 0, 4, 2, 6 > |
2262 | /// t1 = < b, f, d, h > = shufflevector m0, m1 < 1, 5, 3, 7 > |
2263 | static bool isTransposeMask(ArrayRef<int> Mask); |
2264 | static bool isTransposeMask(const Constant *Mask) { |
2265 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2266 | SmallVector<int, 16> MaskAsInts; |
2267 | getShuffleMask(Mask, MaskAsInts); |
2268 | return isTransposeMask(MaskAsInts); |
2269 | } |
2270 | |
2271 | /// Return true if this shuffle transposes the elements of its inputs without |
2272 | /// changing the length of the vectors. This operation may also be known as a |
2273 | /// merge or interleave. See the description for isTransposeMask() for the |
2274 | /// exact specification. |
2275 | /// Example: shufflevector <4 x n> A, <4 x n> B, <0,4,2,6> |
2276 | bool isTranspose() const { |
2277 | return !changesLength() && isTransposeMask(ShuffleMask); |
2278 | } |
2279 | |
2280 | /// Return true if this shuffle mask is an extract subvector mask. |
2281 | /// A valid extract subvector mask returns a smaller vector from a single |
2282 | /// source operand. The base extraction index is returned as well. |
2283 | static bool isExtractSubvectorMask(ArrayRef<int> Mask, int NumSrcElts, |
2284 | int &Index); |
2285 | static bool isExtractSubvectorMask(const Constant *Mask, int NumSrcElts, |
2286 | int &Index) { |
2287 | assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.")((void)0); |
2288 | // Not possible to express a shuffle mask for a scalable vector for this |
2289 | // case. |
2290 | if (isa<ScalableVectorType>(Mask->getType())) |
2291 | return false; |
2292 | SmallVector<int, 16> MaskAsInts; |
2293 | getShuffleMask(Mask, MaskAsInts); |
2294 | return isExtractSubvectorMask(MaskAsInts, NumSrcElts, Index); |
2295 | } |
2296 | |
2297 | /// Return true if this shuffle mask is an extract subvector mask. |
2298 | bool isExtractSubvectorMask(int &Index) const { |
2299 | // Not possible to express a shuffle mask for a scalable vector for this |
2300 | // case. |
2301 | if (isa<ScalableVectorType>(getType())) |
2302 | return false; |
2303 | |
2304 | int NumSrcElts = |
2305 | cast<FixedVectorType>(Op<0>()->getType())->getNumElements(); |
2306 | return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index); |
2307 | } |
2308 | |
2309 | /// Change values in a shuffle permute mask assuming the two vector operands |
2310 | /// of length InVecNumElts have swapped position. |
2311 | static void commuteShuffleMask(MutableArrayRef<int> Mask, |
2312 | unsigned InVecNumElts) { |
2313 | for (int &Idx : Mask) { |
2314 | if (Idx == -1) |
2315 | continue; |
2316 | Idx = Idx < (int)InVecNumElts ? Idx + InVecNumElts : Idx - InVecNumElts; |
2317 | assert(Idx >= 0 && Idx < (int)InVecNumElts * 2 &&((void)0) |
2318 | "shufflevector mask index out of range")((void)0); |
2319 | } |
2320 | } |
2321 | |
2322 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
2323 | static bool classof(const Instruction *I) { |
2324 | return I->getOpcode() == Instruction::ShuffleVector; |
2325 | } |
2326 | static bool classof(const Value *V) { |
2327 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
2328 | } |
2329 | }; |
2330 | |
2331 | template <> |
2332 | struct OperandTraits<ShuffleVectorInst> |
2333 | : public FixedNumOperandTraits<ShuffleVectorInst, 2> {}; |
2334 | |
2335 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)ShuffleVectorInst::op_iterator ShuffleVectorInst::op_begin() { return OperandTraits<ShuffleVectorInst>::op_begin(this ); } ShuffleVectorInst::const_op_iterator ShuffleVectorInst:: op_begin() const { return OperandTraits<ShuffleVectorInst> ::op_begin(const_cast<ShuffleVectorInst*>(this)); } ShuffleVectorInst ::op_iterator ShuffleVectorInst::op_end() { return OperandTraits <ShuffleVectorInst>::op_end(this); } ShuffleVectorInst:: const_op_iterator ShuffleVectorInst::op_end() const { return OperandTraits <ShuffleVectorInst>::op_end(const_cast<ShuffleVectorInst *>(this)); } Value *ShuffleVectorInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value >( OperandTraits<ShuffleVectorInst>::op_begin(const_cast <ShuffleVectorInst*>(this))[i_nocapture].get()); } void ShuffleVectorInst::setOperand(unsigned i_nocapture, Value *Val_nocapture ) { ((void)0); OperandTraits<ShuffleVectorInst>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned ShuffleVectorInst ::getNumOperands() const { return OperandTraits<ShuffleVectorInst >::operands(this); } template <int Idx_nocapture> Use &ShuffleVectorInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & ShuffleVectorInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
2336 | |
2337 | //===----------------------------------------------------------------------===// |
2338 | // ExtractValueInst Class |
2339 | //===----------------------------------------------------------------------===// |
2340 | |
2341 | /// This instruction extracts a struct member or array |
2342 | /// element value from an aggregate value. |
2343 | /// |
2344 | class ExtractValueInst : public UnaryInstruction { |
2345 | SmallVector<unsigned, 4> Indices; |
2346 | |
2347 | ExtractValueInst(const ExtractValueInst &EVI); |
2348 | |
2349 | /// Constructors - Create a extractvalue instruction with a base aggregate |
2350 | /// value and a list of indices. The first ctor can optionally insert before |
2351 | /// an existing instruction, the second appends the new instruction to the |
2352 | /// specified BasicBlock. |
2353 | inline ExtractValueInst(Value *Agg, |
2354 | ArrayRef<unsigned> Idxs, |
2355 | const Twine &NameStr, |
2356 | Instruction *InsertBefore); |
2357 | inline ExtractValueInst(Value *Agg, |
2358 | ArrayRef<unsigned> Idxs, |
2359 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2360 | |
2361 | void init(ArrayRef<unsigned> Idxs, const Twine &NameStr); |
2362 | |
2363 | protected: |
2364 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2365 | friend class Instruction; |
2366 | |
2367 | ExtractValueInst *cloneImpl() const; |
2368 | |
2369 | public: |
2370 | static ExtractValueInst *Create(Value *Agg, |
2371 | ArrayRef<unsigned> Idxs, |
2372 | const Twine &NameStr = "", |
2373 | Instruction *InsertBefore = nullptr) { |
2374 | return new |
2375 | ExtractValueInst(Agg, Idxs, NameStr, InsertBefore); |
2376 | } |
2377 | |
2378 | static ExtractValueInst *Create(Value *Agg, |
2379 | ArrayRef<unsigned> Idxs, |
2380 | const Twine &NameStr, |
2381 | BasicBlock *InsertAtEnd) { |
2382 | return new ExtractValueInst(Agg, Idxs, NameStr, InsertAtEnd); |
2383 | } |
2384 | |
2385 | /// Returns the type of the element that would be extracted |
2386 | /// with an extractvalue instruction with the specified parameters. |
2387 | /// |
2388 | /// Null is returned if the indices are invalid for the specified type. |
2389 | static Type *getIndexedType(Type *Agg, ArrayRef<unsigned> Idxs); |
2390 | |
2391 | using idx_iterator = const unsigned*; |
2392 | |
2393 | inline idx_iterator idx_begin() const { return Indices.begin(); } |
2394 | inline idx_iterator idx_end() const { return Indices.end(); } |
2395 | inline iterator_range<idx_iterator> indices() const { |
2396 | return make_range(idx_begin(), idx_end()); |
2397 | } |
2398 | |
2399 | Value *getAggregateOperand() { |
2400 | return getOperand(0); |
2401 | } |
2402 | const Value *getAggregateOperand() const { |
2403 | return getOperand(0); |
2404 | } |
2405 | static unsigned getAggregateOperandIndex() { |
2406 | return 0U; // get index for modifying correct operand |
2407 | } |
2408 | |
2409 | ArrayRef<unsigned> getIndices() const { |
2410 | return Indices; |
2411 | } |
2412 | |
2413 | unsigned getNumIndices() const { |
2414 | return (unsigned)Indices.size(); |
2415 | } |
2416 | |
2417 | bool hasIndices() const { |
2418 | return true; |
2419 | } |
2420 | |
2421 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
2422 | static bool classof(const Instruction *I) { |
2423 | return I->getOpcode() == Instruction::ExtractValue; |
2424 | } |
2425 | static bool classof(const Value *V) { |
2426 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
2427 | } |
2428 | }; |
2429 | |
2430 | ExtractValueInst::ExtractValueInst(Value *Agg, |
2431 | ArrayRef<unsigned> Idxs, |
2432 | const Twine &NameStr, |
2433 | Instruction *InsertBefore) |
2434 | : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)), |
2435 | ExtractValue, Agg, InsertBefore) { |
2436 | init(Idxs, NameStr); |
2437 | } |
2438 | |
2439 | ExtractValueInst::ExtractValueInst(Value *Agg, |
2440 | ArrayRef<unsigned> Idxs, |
2441 | const Twine &NameStr, |
2442 | BasicBlock *InsertAtEnd) |
2443 | : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)), |
2444 | ExtractValue, Agg, InsertAtEnd) { |
2445 | init(Idxs, NameStr); |
2446 | } |
2447 | |
2448 | //===----------------------------------------------------------------------===// |
2449 | // InsertValueInst Class |
2450 | //===----------------------------------------------------------------------===// |
2451 | |
2452 | /// This instruction inserts a struct field of array element |
2453 | /// value into an aggregate value. |
2454 | /// |
2455 | class InsertValueInst : public Instruction { |
2456 | SmallVector<unsigned, 4> Indices; |
2457 | |
2458 | InsertValueInst(const InsertValueInst &IVI); |
2459 | |
2460 | /// Constructors - Create a insertvalue instruction with a base aggregate |
2461 | /// value, a value to insert, and a list of indices. The first ctor can |
2462 | /// optionally insert before an existing instruction, the second appends |
2463 | /// the new instruction to the specified BasicBlock. |
2464 | inline InsertValueInst(Value *Agg, Value *Val, |
2465 | ArrayRef<unsigned> Idxs, |
2466 | const Twine &NameStr, |
2467 | Instruction *InsertBefore); |
2468 | inline InsertValueInst(Value *Agg, Value *Val, |
2469 | ArrayRef<unsigned> Idxs, |
2470 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2471 | |
2472 | /// Constructors - These two constructors are convenience methods because one |
2473 | /// and two index insertvalue instructions are so common. |
2474 | InsertValueInst(Value *Agg, Value *Val, unsigned Idx, |
2475 | const Twine &NameStr = "", |
2476 | Instruction *InsertBefore = nullptr); |
2477 | InsertValueInst(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr, |
2478 | BasicBlock *InsertAtEnd); |
2479 | |
2480 | void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, |
2481 | const Twine &NameStr); |
2482 | |
2483 | protected: |
2484 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2485 | friend class Instruction; |
2486 | |
2487 | InsertValueInst *cloneImpl() const; |
2488 | |
2489 | public: |
2490 | // allocate space for exactly two operands |
2491 | void *operator new(size_t S) { return User::operator new(S, 2); } |
2492 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
2493 | |
2494 | static InsertValueInst *Create(Value *Agg, Value *Val, |
2495 | ArrayRef<unsigned> Idxs, |
2496 | const Twine &NameStr = "", |
2497 | Instruction *InsertBefore = nullptr) { |
2498 | return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertBefore); |
2499 | } |
2500 | |
2501 | static InsertValueInst *Create(Value *Agg, Value *Val, |
2502 | ArrayRef<unsigned> Idxs, |
2503 | const Twine &NameStr, |
2504 | BasicBlock *InsertAtEnd) { |
2505 | return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertAtEnd); |
2506 | } |
2507 | |
2508 | /// Transparently provide more efficient getOperand methods. |
2509 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
2510 | |
2511 | using idx_iterator = const unsigned*; |
2512 | |
2513 | inline idx_iterator idx_begin() const { return Indices.begin(); } |
2514 | inline idx_iterator idx_end() const { return Indices.end(); } |
2515 | inline iterator_range<idx_iterator> indices() const { |
2516 | return make_range(idx_begin(), idx_end()); |
2517 | } |
2518 | |
2519 | Value *getAggregateOperand() { |
2520 | return getOperand(0); |
2521 | } |
2522 | const Value *getAggregateOperand() const { |
2523 | return getOperand(0); |
2524 | } |
2525 | static unsigned getAggregateOperandIndex() { |
2526 | return 0U; // get index for modifying correct operand |
2527 | } |
2528 | |
2529 | Value *getInsertedValueOperand() { |
2530 | return getOperand(1); |
2531 | } |
2532 | const Value *getInsertedValueOperand() const { |
2533 | return getOperand(1); |
2534 | } |
2535 | static unsigned getInsertedValueOperandIndex() { |
2536 | return 1U; // get index for modifying correct operand |
2537 | } |
2538 | |
2539 | ArrayRef<unsigned> getIndices() const { |
2540 | return Indices; |
2541 | } |
2542 | |
2543 | unsigned getNumIndices() const { |
2544 | return (unsigned)Indices.size(); |
2545 | } |
2546 | |
2547 | bool hasIndices() const { |
2548 | return true; |
2549 | } |
2550 | |
2551 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
2552 | static bool classof(const Instruction *I) { |
2553 | return I->getOpcode() == Instruction::InsertValue; |
2554 | } |
2555 | static bool classof(const Value *V) { |
2556 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
2557 | } |
2558 | }; |
2559 | |
2560 | template <> |
2561 | struct OperandTraits<InsertValueInst> : |
2562 | public FixedNumOperandTraits<InsertValueInst, 2> { |
2563 | }; |
2564 | |
2565 | InsertValueInst::InsertValueInst(Value *Agg, |
2566 | Value *Val, |
2567 | ArrayRef<unsigned> Idxs, |
2568 | const Twine &NameStr, |
2569 | Instruction *InsertBefore) |
2570 | : Instruction(Agg->getType(), InsertValue, |
2571 | OperandTraits<InsertValueInst>::op_begin(this), |
2572 | 2, InsertBefore) { |
2573 | init(Agg, Val, Idxs, NameStr); |
2574 | } |
2575 | |
2576 | InsertValueInst::InsertValueInst(Value *Agg, |
2577 | Value *Val, |
2578 | ArrayRef<unsigned> Idxs, |
2579 | const Twine &NameStr, |
2580 | BasicBlock *InsertAtEnd) |
2581 | : Instruction(Agg->getType(), InsertValue, |
2582 | OperandTraits<InsertValueInst>::op_begin(this), |
2583 | 2, InsertAtEnd) { |
2584 | init(Agg, Val, Idxs, NameStr); |
2585 | } |
2586 | |
2587 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)InsertValueInst::op_iterator InsertValueInst::op_begin() { return OperandTraits<InsertValueInst>::op_begin(this); } InsertValueInst ::const_op_iterator InsertValueInst::op_begin() const { return OperandTraits<InsertValueInst>::op_begin(const_cast< InsertValueInst*>(this)); } InsertValueInst::op_iterator InsertValueInst ::op_end() { return OperandTraits<InsertValueInst>::op_end (this); } InsertValueInst::const_op_iterator InsertValueInst:: op_end() const { return OperandTraits<InsertValueInst>:: op_end(const_cast<InsertValueInst*>(this)); } Value *InsertValueInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<InsertValueInst>::op_begin (const_cast<InsertValueInst*>(this))[i_nocapture].get() ); } void InsertValueInst::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<InsertValueInst >::op_begin(this)[i_nocapture] = Val_nocapture; } unsigned InsertValueInst::getNumOperands() const { return OperandTraits <InsertValueInst>::operands(this); } template <int Idx_nocapture > Use &InsertValueInst::Op() { return this->OpFrom< Idx_nocapture>(this); } template <int Idx_nocapture> const Use &InsertValueInst::Op() const { return this-> OpFrom<Idx_nocapture>(this); } |
2588 | |
2589 | //===----------------------------------------------------------------------===// |
2590 | // PHINode Class |
2591 | //===----------------------------------------------------------------------===// |
2592 | |
2593 | // PHINode - The PHINode class is used to represent the magical mystical PHI |
2594 | // node, that can not exist in nature, but can be synthesized in a computer |
2595 | // scientist's overactive imagination. |
2596 | // |
2597 | class PHINode : public Instruction { |
2598 | /// The number of operands actually allocated. NumOperands is |
2599 | /// the number actually in use. |
2600 | unsigned ReservedSpace; |
2601 | |
2602 | PHINode(const PHINode &PN); |
2603 | |
2604 | explicit PHINode(Type *Ty, unsigned NumReservedValues, |
2605 | const Twine &NameStr = "", |
2606 | Instruction *InsertBefore = nullptr) |
2607 | : Instruction(Ty, Instruction::PHI, nullptr, 0, InsertBefore), |
2608 | ReservedSpace(NumReservedValues) { |
2609 | assert(!Ty->isTokenTy() && "PHI nodes cannot have token type!")((void)0); |
2610 | setName(NameStr); |
2611 | allocHungoffUses(ReservedSpace); |
2612 | } |
2613 | |
2614 | PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, |
2615 | BasicBlock *InsertAtEnd) |
2616 | : Instruction(Ty, Instruction::PHI, nullptr, 0, InsertAtEnd), |
2617 | ReservedSpace(NumReservedValues) { |
2618 | assert(!Ty->isTokenTy() && "PHI nodes cannot have token type!")((void)0); |
2619 | setName(NameStr); |
2620 | allocHungoffUses(ReservedSpace); |
2621 | } |
2622 | |
2623 | protected: |
2624 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2625 | friend class Instruction; |
2626 | |
2627 | PHINode *cloneImpl() const; |
2628 | |
2629 | // allocHungoffUses - this is more complicated than the generic |
2630 | // User::allocHungoffUses, because we have to allocate Uses for the incoming |
2631 | // values and pointers to the incoming blocks, all in one allocation. |
2632 | void allocHungoffUses(unsigned N) { |
2633 | User::allocHungoffUses(N, /* IsPhi */ true); |
2634 | } |
2635 | |
2636 | public: |
2637 | /// Constructors - NumReservedValues is a hint for the number of incoming |
2638 | /// edges that this phi node will have (use 0 if you really have no idea). |
2639 | static PHINode *Create(Type *Ty, unsigned NumReservedValues, |
2640 | const Twine &NameStr = "", |
2641 | Instruction *InsertBefore = nullptr) { |
2642 | return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore); |
2643 | } |
2644 | |
2645 | static PHINode *Create(Type *Ty, unsigned NumReservedValues, |
2646 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
2647 | return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd); |
2648 | } |
2649 | |
2650 | /// Provide fast operand accessors |
2651 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
2652 | |
2653 | // Block iterator interface. This provides access to the list of incoming |
2654 | // basic blocks, which parallels the list of incoming values. |
2655 | |
2656 | using block_iterator = BasicBlock **; |
2657 | using const_block_iterator = BasicBlock * const *; |
2658 | |
2659 | block_iterator block_begin() { |
2660 | return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace); |
2661 | } |
2662 | |
2663 | const_block_iterator block_begin() const { |
2664 | return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace); |
2665 | } |
2666 | |
2667 | block_iterator block_end() { |
2668 | return block_begin() + getNumOperands(); |
2669 | } |
2670 | |
2671 | const_block_iterator block_end() const { |
2672 | return block_begin() + getNumOperands(); |
2673 | } |
2674 | |
2675 | iterator_range<block_iterator> blocks() { |
2676 | return make_range(block_begin(), block_end()); |
2677 | } |
2678 | |
2679 | iterator_range<const_block_iterator> blocks() const { |
2680 | return make_range(block_begin(), block_end()); |
2681 | } |
2682 | |
2683 | op_range incoming_values() { return operands(); } |
2684 | |
2685 | const_op_range incoming_values() const { return operands(); } |
2686 | |
2687 | /// Return the number of incoming edges |
2688 | /// |
2689 | unsigned getNumIncomingValues() const { return getNumOperands(); } |
2690 | |
2691 | /// Return incoming value number x |
2692 | /// |
2693 | Value *getIncomingValue(unsigned i) const { |
2694 | return getOperand(i); |
2695 | } |
2696 | void setIncomingValue(unsigned i, Value *V) { |
2697 | assert(V && "PHI node got a null value!")((void)0); |
2698 | assert(getType() == V->getType() &&((void)0) |
2699 | "All operands to PHI node must be the same type as the PHI node!")((void)0); |
2700 | setOperand(i, V); |
2701 | } |
2702 | |
2703 | static unsigned getOperandNumForIncomingValue(unsigned i) { |
2704 | return i; |
2705 | } |
2706 | |
2707 | static unsigned getIncomingValueNumForOperand(unsigned i) { |
2708 | return i; |
2709 | } |
2710 | |
2711 | /// Return incoming basic block number @p i. |
2712 | /// |
2713 | BasicBlock *getIncomingBlock(unsigned i) const { |
2714 | return block_begin()[i]; |
2715 | } |
2716 | |
2717 | /// Return incoming basic block corresponding |
2718 | /// to an operand of the PHI. |
2719 | /// |
2720 | BasicBlock *getIncomingBlock(const Use &U) const { |
2721 | assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?")((void)0); |
2722 | return getIncomingBlock(unsigned(&U - op_begin())); |
2723 | } |
2724 | |
2725 | /// Return incoming basic block corresponding |
2726 | /// to value use iterator. |
2727 | /// |
2728 | BasicBlock *getIncomingBlock(Value::const_user_iterator I) const { |
2729 | return getIncomingBlock(I.getUse()); |
2730 | } |
2731 | |
2732 | void setIncomingBlock(unsigned i, BasicBlock *BB) { |
2733 | assert(BB && "PHI node got a null basic block!")((void)0); |
2734 | block_begin()[i] = BB; |
2735 | } |
2736 | |
2737 | /// Replace every incoming basic block \p Old to basic block \p New. |
2738 | void replaceIncomingBlockWith(const BasicBlock *Old, BasicBlock *New) { |
2739 | assert(New && Old && "PHI node got a null basic block!")((void)0); |
2740 | for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op) |
2741 | if (getIncomingBlock(Op) == Old) |
2742 | setIncomingBlock(Op, New); |
2743 | } |
2744 | |
2745 | /// Add an incoming value to the end of the PHI list |
2746 | /// |
2747 | void addIncoming(Value *V, BasicBlock *BB) { |
2748 | if (getNumOperands() == ReservedSpace) |
2749 | growOperands(); // Get more space! |
2750 | // Initialize some new operands. |
2751 | setNumHungOffUseOperands(getNumOperands() + 1); |
2752 | setIncomingValue(getNumOperands() - 1, V); |
2753 | setIncomingBlock(getNumOperands() - 1, BB); |
2754 | } |
2755 | |
2756 | /// Remove an incoming value. This is useful if a |
2757 | /// predecessor basic block is deleted. The value removed is returned. |
2758 | /// |
2759 | /// If the last incoming value for a PHI node is removed (and DeletePHIIfEmpty |
2760 | /// is true), the PHI node is destroyed and any uses of it are replaced with |
2761 | /// dummy values. The only time there should be zero incoming values to a PHI |
2762 | /// node is when the block is dead, so this strategy is sound. |
2763 | /// |
2764 | Value *removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty = true); |
2765 | |
2766 | Value *removeIncomingValue(const BasicBlock *BB, bool DeletePHIIfEmpty=true) { |
2767 | int Idx = getBasicBlockIndex(BB); |
2768 | assert(Idx >= 0 && "Invalid basic block argument to remove!")((void)0); |
2769 | return removeIncomingValue(Idx, DeletePHIIfEmpty); |
2770 | } |
2771 | |
2772 | /// Return the first index of the specified basic |
2773 | /// block in the value list for this PHI. Returns -1 if no instance. |
2774 | /// |
2775 | int getBasicBlockIndex(const BasicBlock *BB) const { |
2776 | for (unsigned i = 0, e = getNumOperands(); i != e; ++i) |
2777 | if (block_begin()[i] == BB) |
2778 | return i; |
2779 | return -1; |
2780 | } |
2781 | |
2782 | Value *getIncomingValueForBlock(const BasicBlock *BB) const { |
2783 | int Idx = getBasicBlockIndex(BB); |
2784 | assert(Idx >= 0 && "Invalid basic block argument!")((void)0); |
2785 | return getIncomingValue(Idx); |
2786 | } |
2787 | |
2788 | /// Set every incoming value(s) for block \p BB to \p V. |
2789 | void setIncomingValueForBlock(const BasicBlock *BB, Value *V) { |
2790 | assert(BB && "PHI node got a null basic block!")((void)0); |
2791 | bool Found = false; |
2792 | for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op) |
2793 | if (getIncomingBlock(Op) == BB) { |
2794 | Found = true; |
2795 | setIncomingValue(Op, V); |
2796 | } |
2797 | (void)Found; |
2798 | assert(Found && "Invalid basic block argument to set!")((void)0); |
2799 | } |
2800 | |
2801 | /// If the specified PHI node always merges together the |
2802 | /// same value, return the value, otherwise return null. |
2803 | Value *hasConstantValue() const; |
2804 | |
2805 | /// Whether the specified PHI node always merges |
2806 | /// together the same value, assuming undefs are equal to a unique |
2807 | /// non-undef value. |
2808 | bool hasConstantOrUndefValue() const; |
2809 | |
2810 | /// If the PHI node is complete which means all of its parent's predecessors |
2811 | /// have incoming value in this PHI, return true, otherwise return false. |
2812 | bool isComplete() const { |
2813 | return llvm::all_of(predecessors(getParent()), |
2814 | [this](const BasicBlock *Pred) { |
2815 | return getBasicBlockIndex(Pred) >= 0; |
2816 | }); |
2817 | } |
2818 | |
2819 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
2820 | static bool classof(const Instruction *I) { |
2821 | return I->getOpcode() == Instruction::PHI; |
2822 | } |
2823 | static bool classof(const Value *V) { |
2824 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
2825 | } |
2826 | |
2827 | private: |
2828 | void growOperands(); |
2829 | }; |
2830 | |
2831 | template <> |
2832 | struct OperandTraits<PHINode> : public HungoffOperandTraits<2> { |
2833 | }; |
2834 | |
2835 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)PHINode::op_iterator PHINode::op_begin() { return OperandTraits <PHINode>::op_begin(this); } PHINode::const_op_iterator PHINode::op_begin() const { return OperandTraits<PHINode> ::op_begin(const_cast<PHINode*>(this)); } PHINode::op_iterator PHINode::op_end() { return OperandTraits<PHINode>::op_end (this); } PHINode::const_op_iterator PHINode::op_end() const { return OperandTraits<PHINode>::op_end(const_cast<PHINode *>(this)); } Value *PHINode::getOperand(unsigned i_nocapture ) const { ((void)0); return cast_or_null<Value>( OperandTraits <PHINode>::op_begin(const_cast<PHINode*>(this))[i_nocapture ].get()); } void PHINode::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<PHINode>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned PHINode::getNumOperands () const { return OperandTraits<PHINode>::operands(this ); } template <int Idx_nocapture> Use &PHINode::Op( ) { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &PHINode::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
2836 | |
2837 | //===----------------------------------------------------------------------===// |
2838 | // LandingPadInst Class |
2839 | //===----------------------------------------------------------------------===// |
2840 | |
2841 | //===--------------------------------------------------------------------------- |
2842 | /// The landingpad instruction holds all of the information |
2843 | /// necessary to generate correct exception handling. The landingpad instruction |
2844 | /// cannot be moved from the top of a landing pad block, which itself is |
2845 | /// accessible only from the 'unwind' edge of an invoke. This uses the |
2846 | /// SubclassData field in Value to store whether or not the landingpad is a |
2847 | /// cleanup. |
2848 | /// |
2849 | class LandingPadInst : public Instruction { |
2850 | using CleanupField = BoolBitfieldElementT<0>; |
2851 | |
2852 | /// The number of operands actually allocated. NumOperands is |
2853 | /// the number actually in use. |
2854 | unsigned ReservedSpace; |
2855 | |
2856 | LandingPadInst(const LandingPadInst &LP); |
2857 | |
2858 | public: |
2859 | enum ClauseType { Catch, Filter }; |
2860 | |
2861 | private: |
2862 | explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues, |
2863 | const Twine &NameStr, Instruction *InsertBefore); |
2864 | explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues, |
2865 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2866 | |
2867 | // Allocate space for exactly zero operands. |
2868 | void *operator new(size_t S) { return User::operator new(S); } |
2869 | |
2870 | void growOperands(unsigned Size); |
2871 | void init(unsigned NumReservedValues, const Twine &NameStr); |
2872 | |
2873 | protected: |
2874 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2875 | friend class Instruction; |
2876 | |
2877 | LandingPadInst *cloneImpl() const; |
2878 | |
2879 | public: |
2880 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
2881 | |
2882 | /// Constructors - NumReservedClauses is a hint for the number of incoming |
2883 | /// clauses that this landingpad will have (use 0 if you really have no idea). |
2884 | static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses, |
2885 | const Twine &NameStr = "", |
2886 | Instruction *InsertBefore = nullptr); |
2887 | static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses, |
2888 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
2889 | |
2890 | /// Provide fast operand accessors |
2891 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
2892 | |
2893 | /// Return 'true' if this landingpad instruction is a |
2894 | /// cleanup. I.e., it should be run when unwinding even if its landing pad |
2895 | /// doesn't catch the exception. |
2896 | bool isCleanup() const { return getSubclassData<CleanupField>(); } |
2897 | |
2898 | /// Indicate that this landingpad instruction is a cleanup. |
2899 | void setCleanup(bool V) { setSubclassData<CleanupField>(V); } |
2900 | |
2901 | /// Add a catch or filter clause to the landing pad. |
2902 | void addClause(Constant *ClauseVal); |
2903 | |
2904 | /// Get the value of the clause at index Idx. Use isCatch/isFilter to |
2905 | /// determine what type of clause this is. |
2906 | Constant *getClause(unsigned Idx) const { |
2907 | return cast<Constant>(getOperandList()[Idx]); |
2908 | } |
2909 | |
2910 | /// Return 'true' if the clause and index Idx is a catch clause. |
2911 | bool isCatch(unsigned Idx) const { |
2912 | return !isa<ArrayType>(getOperandList()[Idx]->getType()); |
2913 | } |
2914 | |
2915 | /// Return 'true' if the clause and index Idx is a filter clause. |
2916 | bool isFilter(unsigned Idx) const { |
2917 | return isa<ArrayType>(getOperandList()[Idx]->getType()); |
2918 | } |
2919 | |
2920 | /// Get the number of clauses for this landing pad. |
2921 | unsigned getNumClauses() const { return getNumOperands(); } |
2922 | |
2923 | /// Grow the size of the operand list to accommodate the new |
2924 | /// number of clauses. |
2925 | void reserveClauses(unsigned Size) { growOperands(Size); } |
2926 | |
2927 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
2928 | static bool classof(const Instruction *I) { |
2929 | return I->getOpcode() == Instruction::LandingPad; |
2930 | } |
2931 | static bool classof(const Value *V) { |
2932 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
2933 | } |
2934 | }; |
2935 | |
2936 | template <> |
2937 | struct OperandTraits<LandingPadInst> : public HungoffOperandTraits<1> { |
2938 | }; |
2939 | |
2940 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)LandingPadInst::op_iterator LandingPadInst::op_begin() { return OperandTraits<LandingPadInst>::op_begin(this); } LandingPadInst ::const_op_iterator LandingPadInst::op_begin() const { return OperandTraits<LandingPadInst>::op_begin(const_cast< LandingPadInst*>(this)); } LandingPadInst::op_iterator LandingPadInst ::op_end() { return OperandTraits<LandingPadInst>::op_end (this); } LandingPadInst::const_op_iterator LandingPadInst::op_end () const { return OperandTraits<LandingPadInst>::op_end (const_cast<LandingPadInst*>(this)); } Value *LandingPadInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<LandingPadInst>::op_begin( const_cast<LandingPadInst*>(this))[i_nocapture].get()); } void LandingPadInst::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<LandingPadInst >::op_begin(this)[i_nocapture] = Val_nocapture; } unsigned LandingPadInst::getNumOperands() const { return OperandTraits <LandingPadInst>::operands(this); } template <int Idx_nocapture > Use &LandingPadInst::Op() { return this->OpFrom< Idx_nocapture>(this); } template <int Idx_nocapture> const Use &LandingPadInst::Op() const { return this-> OpFrom<Idx_nocapture>(this); } |
2941 | |
2942 | //===----------------------------------------------------------------------===// |
2943 | // ReturnInst Class |
2944 | //===----------------------------------------------------------------------===// |
2945 | |
2946 | //===--------------------------------------------------------------------------- |
2947 | /// Return a value (possibly void), from a function. Execution |
2948 | /// does not continue in this function any longer. |
2949 | /// |
2950 | class ReturnInst : public Instruction { |
2951 | ReturnInst(const ReturnInst &RI); |
2952 | |
2953 | private: |
2954 | // ReturnInst constructors: |
2955 | // ReturnInst() - 'ret void' instruction |
2956 | // ReturnInst( null) - 'ret void' instruction |
2957 | // ReturnInst(Value* X) - 'ret X' instruction |
2958 | // ReturnInst( null, Inst *I) - 'ret void' instruction, insert before I |
2959 | // ReturnInst(Value* X, Inst *I) - 'ret X' instruction, insert before I |
2960 | // ReturnInst( null, BB *B) - 'ret void' instruction, insert @ end of B |
2961 | // ReturnInst(Value* X, BB *B) - 'ret X' instruction, insert @ end of B |
2962 | // |
2963 | // NOTE: If the Value* passed is of type void then the constructor behaves as |
2964 | // if it was passed NULL. |
2965 | explicit ReturnInst(LLVMContext &C, Value *retVal = nullptr, |
2966 | Instruction *InsertBefore = nullptr); |
2967 | ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd); |
2968 | explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd); |
2969 | |
2970 | protected: |
2971 | // Note: Instruction needs to be a friend here to call cloneImpl. |
2972 | friend class Instruction; |
2973 | |
2974 | ReturnInst *cloneImpl() const; |
2975 | |
2976 | public: |
2977 | static ReturnInst* Create(LLVMContext &C, Value *retVal = nullptr, |
2978 | Instruction *InsertBefore = nullptr) { |
2979 | return new(!!retVal) ReturnInst(C, retVal, InsertBefore); |
2980 | } |
2981 | |
2982 | static ReturnInst* Create(LLVMContext &C, Value *retVal, |
2983 | BasicBlock *InsertAtEnd) { |
2984 | return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd); |
2985 | } |
2986 | |
2987 | static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) { |
2988 | return new(0) ReturnInst(C, InsertAtEnd); |
2989 | } |
2990 | |
2991 | /// Provide fast operand accessors |
2992 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
2993 | |
2994 | /// Convenience accessor. Returns null if there is no return value. |
2995 | Value *getReturnValue() const { |
2996 | return getNumOperands() != 0 ? getOperand(0) : nullptr; |
2997 | } |
2998 | |
2999 | unsigned getNumSuccessors() const { return 0; } |
3000 | |
3001 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
3002 | static bool classof(const Instruction *I) { |
3003 | return (I->getOpcode() == Instruction::Ret); |
3004 | } |
3005 | static bool classof(const Value *V) { |
3006 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
3007 | } |
3008 | |
3009 | private: |
3010 | BasicBlock *getSuccessor(unsigned idx) const { |
3011 | llvm_unreachable("ReturnInst has no successors!")__builtin_unreachable(); |
3012 | } |
3013 | |
3014 | void setSuccessor(unsigned idx, BasicBlock *B) { |
3015 | llvm_unreachable("ReturnInst has no successors!")__builtin_unreachable(); |
3016 | } |
3017 | }; |
3018 | |
3019 | template <> |
3020 | struct OperandTraits<ReturnInst> : public VariadicOperandTraits<ReturnInst> { |
3021 | }; |
3022 | |
3023 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)ReturnInst::op_iterator ReturnInst::op_begin() { return OperandTraits <ReturnInst>::op_begin(this); } ReturnInst::const_op_iterator ReturnInst::op_begin() const { return OperandTraits<ReturnInst >::op_begin(const_cast<ReturnInst*>(this)); } ReturnInst ::op_iterator ReturnInst::op_end() { return OperandTraits< ReturnInst>::op_end(this); } ReturnInst::const_op_iterator ReturnInst::op_end() const { return OperandTraits<ReturnInst >::op_end(const_cast<ReturnInst*>(this)); } Value *ReturnInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<ReturnInst>::op_begin(const_cast <ReturnInst*>(this))[i_nocapture].get()); } void ReturnInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<ReturnInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned ReturnInst::getNumOperands() const { return OperandTraits<ReturnInst>::operands(this); } template <int Idx_nocapture> Use &ReturnInst::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &ReturnInst::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
3024 | |
3025 | //===----------------------------------------------------------------------===// |
3026 | // BranchInst Class |
3027 | //===----------------------------------------------------------------------===// |
3028 | |
3029 | //===--------------------------------------------------------------------------- |
3030 | /// Conditional or Unconditional Branch instruction. |
3031 | /// |
3032 | class BranchInst : public Instruction { |
3033 | /// Ops list - Branches are strange. The operands are ordered: |
3034 | /// [Cond, FalseDest,] TrueDest. This makes some accessors faster because |
3035 | /// they don't have to check for cond/uncond branchness. These are mostly |
3036 | /// accessed relative from op_end(). |
3037 | BranchInst(const BranchInst &BI); |
3038 | // BranchInst constructors (where {B, T, F} are blocks, and C is a condition): |
3039 | // BranchInst(BB *B) - 'br B' |
3040 | // BranchInst(BB* T, BB *F, Value *C) - 'br C, T, F' |
3041 | // BranchInst(BB* B, Inst *I) - 'br B' insert before I |
3042 | // BranchInst(BB* T, BB *F, Value *C, Inst *I) - 'br C, T, F', insert before I |
3043 | // BranchInst(BB* B, BB *I) - 'br B' insert at end |
3044 | // BranchInst(BB* T, BB *F, Value *C, BB *I) - 'br C, T, F', insert at end |
3045 | explicit BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore = nullptr); |
3046 | BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, |
3047 | Instruction *InsertBefore = nullptr); |
3048 | BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd); |
3049 | BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, |
3050 | BasicBlock *InsertAtEnd); |
3051 | |
3052 | void AssertOK(); |
3053 | |
3054 | protected: |
3055 | // Note: Instruction needs to be a friend here to call cloneImpl. |
3056 | friend class Instruction; |
3057 | |
3058 | BranchInst *cloneImpl() const; |
3059 | |
3060 | public: |
3061 | /// Iterator type that casts an operand to a basic block. |
3062 | /// |
3063 | /// This only makes sense because the successors are stored as adjacent |
3064 | /// operands for branch instructions. |
3065 | struct succ_op_iterator |
3066 | : iterator_adaptor_base<succ_op_iterator, value_op_iterator, |
3067 | std::random_access_iterator_tag, BasicBlock *, |
3068 | ptrdiff_t, BasicBlock *, BasicBlock *> { |
3069 | explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {} |
3070 | |
3071 | BasicBlock *operator*() const { return cast<BasicBlock>(*I); } |
3072 | BasicBlock *operator->() const { return operator*(); } |
3073 | }; |
3074 | |
3075 | /// The const version of `succ_op_iterator`. |
3076 | struct const_succ_op_iterator |
3077 | : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator, |
3078 | std::random_access_iterator_tag, |
3079 | const BasicBlock *, ptrdiff_t, const BasicBlock *, |
3080 | const BasicBlock *> { |
3081 | explicit const_succ_op_iterator(const_value_op_iterator I) |
3082 | : iterator_adaptor_base(I) {} |
3083 | |
3084 | const BasicBlock *operator*() const { return cast<BasicBlock>(*I); } |
3085 | const BasicBlock *operator->() const { return operator*(); } |
3086 | }; |
3087 | |
3088 | static BranchInst *Create(BasicBlock *IfTrue, |
3089 | Instruction *InsertBefore = nullptr) { |
3090 | return new(1) BranchInst(IfTrue, InsertBefore); |
3091 | } |
3092 | |
3093 | static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse, |
3094 | Value *Cond, Instruction *InsertBefore = nullptr) { |
3095 | return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore); |
3096 | } |
3097 | |
3098 | static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) { |
3099 | return new(1) BranchInst(IfTrue, InsertAtEnd); |
3100 | } |
3101 | |
3102 | static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse, |
3103 | Value *Cond, BasicBlock *InsertAtEnd) { |
3104 | return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd); |
3105 | } |
3106 | |
3107 | /// Transparently provide more efficient getOperand methods. |
3108 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
3109 | |
3110 | bool isUnconditional() const { return getNumOperands() == 1; } |
3111 | bool isConditional() const { return getNumOperands() == 3; } |
3112 | |
3113 | Value *getCondition() const { |
3114 | assert(isConditional() && "Cannot get condition of an uncond branch!")((void)0); |
3115 | return Op<-3>(); |
3116 | } |
3117 | |
3118 | void setCondition(Value *V) { |
3119 | assert(isConditional() && "Cannot set condition of unconditional branch!")((void)0); |
3120 | Op<-3>() = V; |
3121 | } |
3122 | |
3123 | unsigned getNumSuccessors() const { return 1+isConditional(); } |
3124 | |
3125 | BasicBlock *getSuccessor(unsigned i) const { |
3126 | assert(i < getNumSuccessors() && "Successor # out of range for Branch!")((void)0); |
3127 | return cast_or_null<BasicBlock>((&Op<-1>() - i)->get()); |
3128 | } |
3129 | |
3130 | void setSuccessor(unsigned idx, BasicBlock *NewSucc) { |
3131 | assert(idx < getNumSuccessors() && "Successor # out of range for Branch!")((void)0); |
3132 | *(&Op<-1>() - idx) = NewSucc; |
3133 | } |
3134 | |
3135 | /// Swap the successors of this branch instruction. |
3136 | /// |
3137 | /// Swaps the successors of the branch instruction. This also swaps any |
3138 | /// branch weight metadata associated with the instruction so that it |
3139 | /// continues to map correctly to each operand. |
3140 | void swapSuccessors(); |
3141 | |
3142 | iterator_range<succ_op_iterator> successors() { |
3143 | return make_range( |
3144 | succ_op_iterator(std::next(value_op_begin(), isConditional() ? 1 : 0)), |
3145 | succ_op_iterator(value_op_end())); |
3146 | } |
3147 | |
3148 | iterator_range<const_succ_op_iterator> successors() const { |
3149 | return make_range(const_succ_op_iterator( |
3150 | std::next(value_op_begin(), isConditional() ? 1 : 0)), |
3151 | const_succ_op_iterator(value_op_end())); |
3152 | } |
3153 | |
3154 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
3155 | static bool classof(const Instruction *I) { |
3156 | return (I->getOpcode() == Instruction::Br); |
3157 | } |
3158 | static bool classof(const Value *V) { |
3159 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
3160 | } |
3161 | }; |
3162 | |
3163 | template <> |
3164 | struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> { |
3165 | }; |
3166 | |
3167 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)BranchInst::op_iterator BranchInst::op_begin() { return OperandTraits <BranchInst>::op_begin(this); } BranchInst::const_op_iterator BranchInst::op_begin() const { return OperandTraits<BranchInst >::op_begin(const_cast<BranchInst*>(this)); } BranchInst ::op_iterator BranchInst::op_end() { return OperandTraits< BranchInst>::op_end(this); } BranchInst::const_op_iterator BranchInst::op_end() const { return OperandTraits<BranchInst >::op_end(const_cast<BranchInst*>(this)); } Value *BranchInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<BranchInst>::op_begin(const_cast <BranchInst*>(this))[i_nocapture].get()); } void BranchInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<BranchInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned BranchInst::getNumOperands() const { return OperandTraits<BranchInst>::operands(this); } template <int Idx_nocapture> Use &BranchInst::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &BranchInst::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
3168 | |
3169 | //===----------------------------------------------------------------------===// |
3170 | // SwitchInst Class |
3171 | //===----------------------------------------------------------------------===// |
3172 | |
3173 | //===--------------------------------------------------------------------------- |
3174 | /// Multiway switch |
3175 | /// |
3176 | class SwitchInst : public Instruction { |
3177 | unsigned ReservedSpace; |
3178 | |
3179 | // Operand[0] = Value to switch on |
3180 | // Operand[1] = Default basic block destination |
3181 | // Operand[2n ] = Value to match |
3182 | // Operand[2n+1] = BasicBlock to go to on match |
3183 | SwitchInst(const SwitchInst &SI); |
3184 | |
3185 | /// Create a new switch instruction, specifying a value to switch on and a |
3186 | /// default destination. The number of additional cases can be specified here |
3187 | /// to make memory allocation more efficient. This constructor can also |
3188 | /// auto-insert before another instruction. |
3189 | SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, |
3190 | Instruction *InsertBefore); |
3191 | |
3192 | /// Create a new switch instruction, specifying a value to switch on and a |
3193 | /// default destination. The number of additional cases can be specified here |
3194 | /// to make memory allocation more efficient. This constructor also |
3195 | /// auto-inserts at the end of the specified BasicBlock. |
3196 | SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, |
3197 | BasicBlock *InsertAtEnd); |
3198 | |
3199 | // allocate space for exactly zero operands |
3200 | void *operator new(size_t S) { return User::operator new(S); } |
3201 | |
3202 | void init(Value *Value, BasicBlock *Default, unsigned NumReserved); |
3203 | void growOperands(); |
3204 | |
3205 | protected: |
3206 | // Note: Instruction needs to be a friend here to call cloneImpl. |
3207 | friend class Instruction; |
3208 | |
3209 | SwitchInst *cloneImpl() const; |
3210 | |
3211 | public: |
3212 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
3213 | |
3214 | // -2 |
3215 | static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1); |
3216 | |
3217 | template <typename CaseHandleT> class CaseIteratorImpl; |
3218 | |
3219 | /// A handle to a particular switch case. It exposes a convenient interface |
3220 | /// to both the case value and the successor block. |
3221 | /// |
3222 | /// We define this as a template and instantiate it to form both a const and |
3223 | /// non-const handle. |
3224 | template <typename SwitchInstT, typename ConstantIntT, typename BasicBlockT> |
3225 | class CaseHandleImpl { |
3226 | // Directly befriend both const and non-const iterators. |
3227 | friend class SwitchInst::CaseIteratorImpl< |
3228 | CaseHandleImpl<SwitchInstT, ConstantIntT, BasicBlockT>>; |
3229 | |
3230 | protected: |
3231 | // Expose the switch type we're parameterized with to the iterator. |
3232 | using SwitchInstType = SwitchInstT; |
3233 | |
3234 | SwitchInstT *SI; |
3235 | ptrdiff_t Index; |
3236 | |
3237 | CaseHandleImpl() = default; |
3238 | CaseHandleImpl(SwitchInstT *SI, ptrdiff_t Index) : SI(SI), Index(Index) {} |
3239 | |
3240 | public: |
3241 | /// Resolves case value for current case. |
3242 | ConstantIntT *getCaseValue() const { |
3243 | assert((unsigned)Index < SI->getNumCases() &&((void)0) |
3244 | "Index out the number of cases.")((void)0); |
3245 | return reinterpret_cast<ConstantIntT *>(SI->getOperand(2 + Index * 2)); |
3246 | } |
3247 | |
3248 | /// Resolves successor for current case. |
3249 | BasicBlockT *getCaseSuccessor() const { |
3250 | assert(((unsigned)Index < SI->getNumCases() ||((void)0) |
3251 | (unsigned)Index == DefaultPseudoIndex) &&((void)0) |
3252 | "Index out the number of cases.")((void)0); |
3253 | return SI->getSuccessor(getSuccessorIndex()); |
3254 | } |
3255 | |
3256 | /// Returns number of current case. |
3257 | unsigned getCaseIndex() const { return Index; } |
3258 | |
3259 | /// Returns successor index for current case successor. |
3260 | unsigned getSuccessorIndex() const { |
3261 | assert(((unsigned)Index == DefaultPseudoIndex ||((void)0) |
3262 | (unsigned)Index < SI->getNumCases()) &&((void)0) |
3263 | "Index out the number of cases.")((void)0); |
3264 | return (unsigned)Index != DefaultPseudoIndex ? Index + 1 : 0; |
3265 | } |
3266 | |
3267 | bool operator==(const CaseHandleImpl &RHS) const { |
3268 | assert(SI == RHS.SI && "Incompatible operators.")((void)0); |
3269 | return Index == RHS.Index; |
3270 | } |
3271 | }; |
3272 | |
3273 | using ConstCaseHandle = |
3274 | CaseHandleImpl<const SwitchInst, const ConstantInt, const BasicBlock>; |
3275 | |
3276 | class CaseHandle |
3277 | : public CaseHandleImpl<SwitchInst, ConstantInt, BasicBlock> { |
3278 | friend class SwitchInst::CaseIteratorImpl<CaseHandle>; |
3279 | |
3280 | public: |
3281 | CaseHandle(SwitchInst *SI, ptrdiff_t Index) : CaseHandleImpl(SI, Index) {} |
3282 | |
3283 | /// Sets the new value for current case. |
3284 | void setValue(ConstantInt *V) { |
3285 | assert((unsigned)Index < SI->getNumCases() &&((void)0) |
3286 | "Index out the number of cases.")((void)0); |
3287 | SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V)); |
3288 | } |
3289 | |
3290 | /// Sets the new successor for current case. |
3291 | void setSuccessor(BasicBlock *S) { |
3292 | SI->setSuccessor(getSuccessorIndex(), S); |
3293 | } |
3294 | }; |
3295 | |
3296 | template <typename CaseHandleT> |
3297 | class CaseIteratorImpl |
3298 | : public iterator_facade_base<CaseIteratorImpl<CaseHandleT>, |
3299 | std::random_access_iterator_tag, |
3300 | CaseHandleT> { |
3301 | using SwitchInstT = typename CaseHandleT::SwitchInstType; |
3302 | |
3303 | CaseHandleT Case; |
3304 | |
3305 | public: |
3306 | /// Default constructed iterator is in an invalid state until assigned to |
3307 | /// a case for a particular switch. |
3308 | CaseIteratorImpl() = default; |
3309 | |
3310 | /// Initializes case iterator for given SwitchInst and for given |
3311 | /// case number. |
3312 | CaseIteratorImpl(SwitchInstT *SI, unsigned CaseNum) : Case(SI, CaseNum) {} |
3313 | |
3314 | /// Initializes case iterator for given SwitchInst and for given |
3315 | /// successor index. |
3316 | static CaseIteratorImpl fromSuccessorIndex(SwitchInstT *SI, |
3317 | unsigned SuccessorIndex) { |
3318 | assert(SuccessorIndex < SI->getNumSuccessors() &&((void)0) |
3319 | "Successor index # out of range!")((void)0); |
3320 | return SuccessorIndex != 0 ? CaseIteratorImpl(SI, SuccessorIndex - 1) |
3321 | : CaseIteratorImpl(SI, DefaultPseudoIndex); |
3322 | } |
3323 | |
3324 | /// Support converting to the const variant. This will be a no-op for const |
3325 | /// variant. |
3326 | operator CaseIteratorImpl<ConstCaseHandle>() const { |
3327 | return CaseIteratorImpl<ConstCaseHandle>(Case.SI, Case.Index); |
3328 | } |
3329 | |
3330 | CaseIteratorImpl &operator+=(ptrdiff_t N) { |
3331 | // Check index correctness after addition. |
3332 | // Note: Index == getNumCases() means end(). |
3333 | assert(Case.Index + N >= 0 &&((void)0) |
3334 | (unsigned)(Case.Index + N) <= Case.SI->getNumCases() &&((void)0) |
3335 | "Case.Index out the number of cases.")((void)0); |
3336 | Case.Index += N; |
3337 | return *this; |
3338 | } |
3339 | CaseIteratorImpl &operator-=(ptrdiff_t N) { |
3340 | // Check index correctness after subtraction. |
3341 | // Note: Case.Index == getNumCases() means end(). |
3342 | assert(Case.Index - N >= 0 &&((void)0) |
3343 | (unsigned)(Case.Index - N) <= Case.SI->getNumCases() &&((void)0) |
3344 | "Case.Index out the number of cases.")((void)0); |
3345 | Case.Index -= N; |
3346 | return *this; |
3347 | } |
3348 | ptrdiff_t operator-(const CaseIteratorImpl &RHS) const { |
3349 | assert(Case.SI == RHS.Case.SI && "Incompatible operators.")((void)0); |
3350 | return Case.Index - RHS.Case.Index; |
3351 | } |
3352 | bool operator==(const CaseIteratorImpl &RHS) const { |
3353 | return Case == RHS.Case; |
3354 | } |
3355 | bool operator<(const CaseIteratorImpl &RHS) const { |
3356 | assert(Case.SI == RHS.Case.SI && "Incompatible operators.")((void)0); |
3357 | return Case.Index < RHS.Case.Index; |
3358 | } |
3359 | CaseHandleT &operator*() { return Case; } |
3360 | const CaseHandleT &operator*() const { return Case; } |
3361 | }; |
3362 | |
3363 | using CaseIt = CaseIteratorImpl<CaseHandle>; |
3364 | using ConstCaseIt = CaseIteratorImpl<ConstCaseHandle>; |
3365 | |
3366 | static SwitchInst *Create(Value *Value, BasicBlock *Default, |
3367 | unsigned NumCases, |
3368 | Instruction *InsertBefore = nullptr) { |
3369 | return new SwitchInst(Value, Default, NumCases, InsertBefore); |
3370 | } |
3371 | |
3372 | static SwitchInst *Create(Value *Value, BasicBlock *Default, |
3373 | unsigned NumCases, BasicBlock *InsertAtEnd) { |
3374 | return new SwitchInst(Value, Default, NumCases, InsertAtEnd); |
3375 | } |
3376 | |
3377 | /// Provide fast operand accessors |
3378 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
3379 | |
3380 | // Accessor Methods for Switch stmt |
3381 | Value *getCondition() const { return getOperand(0); } |
3382 | void setCondition(Value *V) { setOperand(0, V); } |
3383 | |
3384 | BasicBlock *getDefaultDest() const { |
3385 | return cast<BasicBlock>(getOperand(1)); |
3386 | } |
3387 | |
3388 | void setDefaultDest(BasicBlock *DefaultCase) { |
3389 | setOperand(1, reinterpret_cast<Value*>(DefaultCase)); |
3390 | } |
3391 | |
3392 | /// Return the number of 'cases' in this switch instruction, excluding the |
3393 | /// default case. |
3394 | unsigned getNumCases() const { |
3395 | return getNumOperands()/2 - 1; |
3396 | } |
3397 | |
3398 | /// Returns a read/write iterator that points to the first case in the |
3399 | /// SwitchInst. |
3400 | CaseIt case_begin() { |
3401 | return CaseIt(this, 0); |
3402 | } |
3403 | |
3404 | /// Returns a read-only iterator that points to the first case in the |
3405 | /// SwitchInst. |
3406 | ConstCaseIt case_begin() const { |
3407 | return ConstCaseIt(this, 0); |
3408 | } |
3409 | |
3410 | /// Returns a read/write iterator that points one past the last in the |
3411 | /// SwitchInst. |
3412 | CaseIt case_end() { |
3413 | return CaseIt(this, getNumCases()); |
3414 | } |
3415 | |
3416 | /// Returns a read-only iterator that points one past the last in the |
3417 | /// SwitchInst. |
3418 | ConstCaseIt case_end() const { |
3419 | return ConstCaseIt(this, getNumCases()); |
3420 | } |
3421 | |
3422 | /// Iteration adapter for range-for loops. |
3423 | iterator_range<CaseIt> cases() { |
3424 | return make_range(case_begin(), case_end()); |
3425 | } |
3426 | |
3427 | /// Constant iteration adapter for range-for loops. |
3428 | iterator_range<ConstCaseIt> cases() const { |
3429 | return make_range(case_begin(), case_end()); |
3430 | } |
3431 | |
3432 | /// Returns an iterator that points to the default case. |
3433 | /// Note: this iterator allows to resolve successor only. Attempt |
3434 | /// to resolve case value causes an assertion. |
3435 | /// Also note, that increment and decrement also causes an assertion and |
3436 | /// makes iterator invalid. |
3437 | CaseIt case_default() { |
3438 | return CaseIt(this, DefaultPseudoIndex); |
3439 | } |
3440 | ConstCaseIt case_default() const { |
3441 | return ConstCaseIt(this, DefaultPseudoIndex); |
3442 | } |
3443 | |
3444 | /// Search all of the case values for the specified constant. If it is |
3445 | /// explicitly handled, return the case iterator of it, otherwise return |
3446 | /// default case iterator to indicate that it is handled by the default |
3447 | /// handler. |
3448 | CaseIt findCaseValue(const ConstantInt *C) { |
3449 | CaseIt I = llvm::find_if( |
3450 | cases(), [C](CaseHandle &Case) { return Case.getCaseValue() == C; }); |
3451 | if (I != case_end()) |
3452 | return I; |
3453 | |
3454 | return case_default(); |
3455 | } |
3456 | ConstCaseIt findCaseValue(const ConstantInt *C) const { |
3457 | ConstCaseIt I = llvm::find_if(cases(), [C](ConstCaseHandle &Case) { |
3458 | return Case.getCaseValue() == C; |
3459 | }); |
3460 | if (I != case_end()) |
3461 | return I; |
3462 | |
3463 | return case_default(); |
3464 | } |
3465 | |
3466 | /// Finds the unique case value for a given successor. Returns null if the |
3467 | /// successor is not found, not unique, or is the default case. |
3468 | ConstantInt *findCaseDest(BasicBlock *BB) { |
3469 | if (BB == getDefaultDest()) |
3470 | return nullptr; |
3471 | |
3472 | ConstantInt *CI = nullptr; |
3473 | for (auto Case : cases()) { |
3474 | if (Case.getCaseSuccessor() != BB) |
3475 | continue; |
3476 | |
3477 | if (CI) |
3478 | return nullptr; // Multiple cases lead to BB. |
3479 | |
3480 | CI = Case.getCaseValue(); |
3481 | } |
3482 | |
3483 | return CI; |
3484 | } |
3485 | |
3486 | /// Add an entry to the switch instruction. |
3487 | /// Note: |
3488 | /// This action invalidates case_end(). Old case_end() iterator will |
3489 | /// point to the added case. |
3490 | void addCase(ConstantInt *OnVal, BasicBlock *Dest); |
3491 | |
3492 | /// This method removes the specified case and its successor from the switch |
3493 | /// instruction. Note that this operation may reorder the remaining cases at |
3494 | /// index idx and above. |
3495 | /// Note: |
3496 | /// This action invalidates iterators for all cases following the one removed, |
3497 | /// including the case_end() iterator. It returns an iterator for the next |
3498 | /// case. |
3499 | CaseIt removeCase(CaseIt I); |
3500 | |
3501 | unsigned getNumSuccessors() const { return getNumOperands()/2; } |
3502 | BasicBlock *getSuccessor(unsigned idx) const { |
3503 | assert(idx < getNumSuccessors() &&"Successor idx out of range for switch!")((void)0); |
3504 | return cast<BasicBlock>(getOperand(idx*2+1)); |
3505 | } |
3506 | void setSuccessor(unsigned idx, BasicBlock *NewSucc) { |
3507 | assert(idx < getNumSuccessors() && "Successor # out of range for switch!")((void)0); |
3508 | setOperand(idx * 2 + 1, NewSucc); |
3509 | } |
3510 | |
3511 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
3512 | static bool classof(const Instruction *I) { |
3513 | return I->getOpcode() == Instruction::Switch; |
3514 | } |
3515 | static bool classof(const Value *V) { |
3516 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
3517 | } |
3518 | }; |
3519 | |
3520 | /// A wrapper class to simplify modification of SwitchInst cases along with |
3521 | /// their prof branch_weights metadata. |
3522 | class SwitchInstProfUpdateWrapper { |
3523 | SwitchInst &SI; |
3524 | Optional<SmallVector<uint32_t, 8> > Weights = None; |
3525 | bool Changed = false; |
3526 | |
3527 | protected: |
3528 | static MDNode *getProfBranchWeightsMD(const SwitchInst &SI); |
3529 | |
3530 | MDNode *buildProfBranchWeightsMD(); |
3531 | |
3532 | void init(); |
3533 | |
3534 | public: |
3535 | using CaseWeightOpt = Optional<uint32_t>; |
3536 | SwitchInst *operator->() { return &SI; } |
3537 | SwitchInst &operator*() { return SI; } |
3538 | operator SwitchInst *() { return &SI; } |
3539 | |
3540 | SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); } |
3541 | |
3542 | ~SwitchInstProfUpdateWrapper() { |
3543 | if (Changed) |
3544 | SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD()); |
3545 | } |
3546 | |
3547 | /// Delegate the call to the underlying SwitchInst::removeCase() and remove |
3548 | /// correspondent branch weight. |
3549 | SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I); |
3550 | |
3551 | /// Delegate the call to the underlying SwitchInst::addCase() and set the |
3552 | /// specified branch weight for the added case. |
3553 | void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W); |
3554 | |
3555 | /// Delegate the call to the underlying SwitchInst::eraseFromParent() and mark |
3556 | /// this object to not touch the underlying SwitchInst in destructor. |
3557 | SymbolTableList<Instruction>::iterator eraseFromParent(); |
3558 | |
3559 | void setSuccessorWeight(unsigned idx, CaseWeightOpt W); |
3560 | CaseWeightOpt getSuccessorWeight(unsigned idx); |
3561 | |
3562 | static CaseWeightOpt getSuccessorWeight(const SwitchInst &SI, unsigned idx); |
3563 | }; |
3564 | |
3565 | template <> |
3566 | struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> { |
3567 | }; |
3568 | |
3569 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)SwitchInst::op_iterator SwitchInst::op_begin() { return OperandTraits <SwitchInst>::op_begin(this); } SwitchInst::const_op_iterator SwitchInst::op_begin() const { return OperandTraits<SwitchInst >::op_begin(const_cast<SwitchInst*>(this)); } SwitchInst ::op_iterator SwitchInst::op_end() { return OperandTraits< SwitchInst>::op_end(this); } SwitchInst::const_op_iterator SwitchInst::op_end() const { return OperandTraits<SwitchInst >::op_end(const_cast<SwitchInst*>(this)); } Value *SwitchInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<SwitchInst>::op_begin(const_cast <SwitchInst*>(this))[i_nocapture].get()); } void SwitchInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<SwitchInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned SwitchInst::getNumOperands() const { return OperandTraits<SwitchInst>::operands(this); } template <int Idx_nocapture> Use &SwitchInst::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &SwitchInst::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
3570 | |
3571 | //===----------------------------------------------------------------------===// |
3572 | // IndirectBrInst Class |
3573 | //===----------------------------------------------------------------------===// |
3574 | |
3575 | //===--------------------------------------------------------------------------- |
3576 | /// Indirect Branch Instruction. |
3577 | /// |
3578 | class IndirectBrInst : public Instruction { |
3579 | unsigned ReservedSpace; |
3580 | |
3581 | // Operand[0] = Address to jump to |
3582 | // Operand[n+1] = n-th destination |
3583 | IndirectBrInst(const IndirectBrInst &IBI); |
3584 | |
3585 | /// Create a new indirectbr instruction, specifying an |
3586 | /// Address to jump to. The number of expected destinations can be specified |
3587 | /// here to make memory allocation more efficient. This constructor can also |
3588 | /// autoinsert before another instruction. |
3589 | IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore); |
3590 | |
3591 | /// Create a new indirectbr instruction, specifying an |
3592 | /// Address to jump to. The number of expected destinations can be specified |
3593 | /// here to make memory allocation more efficient. This constructor also |
3594 | /// autoinserts at the end of the specified BasicBlock. |
3595 | IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd); |
3596 | |
3597 | // allocate space for exactly zero operands |
3598 | void *operator new(size_t S) { return User::operator new(S); } |
3599 | |
3600 | void init(Value *Address, unsigned NumDests); |
3601 | void growOperands(); |
3602 | |
3603 | protected: |
3604 | // Note: Instruction needs to be a friend here to call cloneImpl. |
3605 | friend class Instruction; |
3606 | |
3607 | IndirectBrInst *cloneImpl() const; |
3608 | |
3609 | public: |
3610 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
3611 | |
3612 | /// Iterator type that casts an operand to a basic block. |
3613 | /// |
3614 | /// This only makes sense because the successors are stored as adjacent |
3615 | /// operands for indirectbr instructions. |
3616 | struct succ_op_iterator |
3617 | : iterator_adaptor_base<succ_op_iterator, value_op_iterator, |
3618 | std::random_access_iterator_tag, BasicBlock *, |
3619 | ptrdiff_t, BasicBlock *, BasicBlock *> { |
3620 | explicit succ_op_iterator(value_op_iterator I) : iterator_adaptor_base(I) {} |
3621 | |
3622 | BasicBlock *operator*() const { return cast<BasicBlock>(*I); } |
3623 | BasicBlock *operator->() const { return operator*(); } |
3624 | }; |
3625 | |
3626 | /// The const version of `succ_op_iterator`. |
3627 | struct const_succ_op_iterator |
3628 | : iterator_adaptor_base<const_succ_op_iterator, const_value_op_iterator, |
3629 | std::random_access_iterator_tag, |
3630 | const BasicBlock *, ptrdiff_t, const BasicBlock *, |
3631 | const BasicBlock *> { |
3632 | explicit const_succ_op_iterator(const_value_op_iterator I) |
3633 | : iterator_adaptor_base(I) {} |
3634 | |
3635 | const BasicBlock *operator*() const { return cast<BasicBlock>(*I); } |
3636 | const BasicBlock *operator->() const { return operator*(); } |
3637 | }; |
3638 | |
3639 | static IndirectBrInst *Create(Value *Address, unsigned NumDests, |
3640 | Instruction *InsertBefore = nullptr) { |
3641 | return new IndirectBrInst(Address, NumDests, InsertBefore); |
3642 | } |
3643 | |
3644 | static IndirectBrInst *Create(Value *Address, unsigned NumDests, |
3645 | BasicBlock *InsertAtEnd) { |
3646 | return new IndirectBrInst(Address, NumDests, InsertAtEnd); |
3647 | } |
3648 | |
3649 | /// Provide fast operand accessors. |
3650 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
3651 | |
3652 | // Accessor Methods for IndirectBrInst instruction. |
3653 | Value *getAddress() { return getOperand(0); } |
3654 | const Value *getAddress() const { return getOperand(0); } |
3655 | void setAddress(Value *V) { setOperand(0, V); } |
3656 | |
3657 | /// return the number of possible destinations in this |
3658 | /// indirectbr instruction. |
3659 | unsigned getNumDestinations() const { return getNumOperands()-1; } |
3660 | |
3661 | /// Return the specified destination. |
3662 | BasicBlock *getDestination(unsigned i) { return getSuccessor(i); } |
3663 | const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); } |
3664 | |
3665 | /// Add a destination. |
3666 | /// |
3667 | void addDestination(BasicBlock *Dest); |
3668 | |
3669 | /// This method removes the specified successor from the |
3670 | /// indirectbr instruction. |
3671 | void removeDestination(unsigned i); |
3672 | |
3673 | unsigned getNumSuccessors() const { return getNumOperands()-1; } |
3674 | BasicBlock *getSuccessor(unsigned i) const { |
3675 | return cast<BasicBlock>(getOperand(i+1)); |
3676 | } |
3677 | void setSuccessor(unsigned i, BasicBlock *NewSucc) { |
3678 | setOperand(i + 1, NewSucc); |
3679 | } |
3680 | |
3681 | iterator_range<succ_op_iterator> successors() { |
3682 | return make_range(succ_op_iterator(std::next(value_op_begin())), |
3683 | succ_op_iterator(value_op_end())); |
3684 | } |
3685 | |
3686 | iterator_range<const_succ_op_iterator> successors() const { |
3687 | return make_range(const_succ_op_iterator(std::next(value_op_begin())), |
3688 | const_succ_op_iterator(value_op_end())); |
3689 | } |
3690 | |
3691 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
3692 | static bool classof(const Instruction *I) { |
3693 | return I->getOpcode() == Instruction::IndirectBr; |
3694 | } |
3695 | static bool classof(const Value *V) { |
3696 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
3697 | } |
3698 | }; |
3699 | |
3700 | template <> |
3701 | struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> { |
3702 | }; |
3703 | |
3704 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)IndirectBrInst::op_iterator IndirectBrInst::op_begin() { return OperandTraits<IndirectBrInst>::op_begin(this); } IndirectBrInst ::const_op_iterator IndirectBrInst::op_begin() const { return OperandTraits<IndirectBrInst>::op_begin(const_cast< IndirectBrInst*>(this)); } IndirectBrInst::op_iterator IndirectBrInst ::op_end() { return OperandTraits<IndirectBrInst>::op_end (this); } IndirectBrInst::const_op_iterator IndirectBrInst::op_end () const { return OperandTraits<IndirectBrInst>::op_end (const_cast<IndirectBrInst*>(this)); } Value *IndirectBrInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<IndirectBrInst>::op_begin( const_cast<IndirectBrInst*>(this))[i_nocapture].get()); } void IndirectBrInst::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<IndirectBrInst >::op_begin(this)[i_nocapture] = Val_nocapture; } unsigned IndirectBrInst::getNumOperands() const { return OperandTraits <IndirectBrInst>::operands(this); } template <int Idx_nocapture > Use &IndirectBrInst::Op() { return this->OpFrom< Idx_nocapture>(this); } template <int Idx_nocapture> const Use &IndirectBrInst::Op() const { return this-> OpFrom<Idx_nocapture>(this); } |
3705 | |
3706 | //===----------------------------------------------------------------------===// |
3707 | // InvokeInst Class |
3708 | //===----------------------------------------------------------------------===// |
3709 | |
3710 | /// Invoke instruction. The SubclassData field is used to hold the |
3711 | /// calling convention of the call. |
3712 | /// |
3713 | class InvokeInst : public CallBase { |
3714 | /// The number of operands for this call beyond the called function, |
3715 | /// arguments, and operand bundles. |
3716 | static constexpr int NumExtraOperands = 2; |
3717 | |
3718 | /// The index from the end of the operand array to the normal destination. |
3719 | static constexpr int NormalDestOpEndIdx = -3; |
3720 | |
3721 | /// The index from the end of the operand array to the unwind destination. |
3722 | static constexpr int UnwindDestOpEndIdx = -2; |
3723 | |
3724 | InvokeInst(const InvokeInst &BI); |
3725 | |
3726 | /// Construct an InvokeInst given a range of arguments. |
3727 | /// |
3728 | /// Construct an InvokeInst from a range of arguments |
3729 | inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3730 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3731 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3732 | const Twine &NameStr, Instruction *InsertBefore); |
3733 | |
3734 | inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3735 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3736 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3737 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
3738 | |
3739 | void init(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3740 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3741 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr); |
3742 | |
3743 | /// Compute the number of operands to allocate. |
3744 | static int ComputeNumOperands(int NumArgs, int NumBundleInputs = 0) { |
3745 | // We need one operand for the called function, plus our extra operands and |
3746 | // the input operand counts provided. |
3747 | return 1 + NumExtraOperands + NumArgs + NumBundleInputs; |
3748 | } |
3749 | |
3750 | protected: |
3751 | // Note: Instruction needs to be a friend here to call cloneImpl. |
3752 | friend class Instruction; |
3753 | |
3754 | InvokeInst *cloneImpl() const; |
3755 | |
3756 | public: |
3757 | static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3758 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3759 | const Twine &NameStr, |
3760 | Instruction *InsertBefore = nullptr) { |
3761 | int NumOperands = ComputeNumOperands(Args.size()); |
3762 | return new (NumOperands) |
3763 | InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands, |
3764 | NameStr, InsertBefore); |
3765 | } |
3766 | |
3767 | static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3768 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3769 | ArrayRef<OperandBundleDef> Bundles = None, |
3770 | const Twine &NameStr = "", |
3771 | Instruction *InsertBefore = nullptr) { |
3772 | int NumOperands = |
3773 | ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); |
3774 | unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
3775 | |
3776 | return new (NumOperands, DescriptorBytes) |
3777 | InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands, |
3778 | NameStr, InsertBefore); |
3779 | } |
3780 | |
3781 | static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3782 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3783 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
3784 | int NumOperands = ComputeNumOperands(Args.size()); |
3785 | return new (NumOperands) |
3786 | InvokeInst(Ty, Func, IfNormal, IfException, Args, None, NumOperands, |
3787 | NameStr, InsertAtEnd); |
3788 | } |
3789 | |
3790 | static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3791 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3792 | ArrayRef<OperandBundleDef> Bundles, |
3793 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
3794 | int NumOperands = |
3795 | ComputeNumOperands(Args.size(), CountBundleInputs(Bundles)); |
3796 | unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
3797 | |
3798 | return new (NumOperands, DescriptorBytes) |
3799 | InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, NumOperands, |
3800 | NameStr, InsertAtEnd); |
3801 | } |
3802 | |
3803 | static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal, |
3804 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3805 | const Twine &NameStr, |
3806 | Instruction *InsertBefore = nullptr) { |
3807 | return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, |
3808 | IfException, Args, None, NameStr, InsertBefore); |
3809 | } |
3810 | |
3811 | static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal, |
3812 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3813 | ArrayRef<OperandBundleDef> Bundles = None, |
3814 | const Twine &NameStr = "", |
3815 | Instruction *InsertBefore = nullptr) { |
3816 | return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, |
3817 | IfException, Args, Bundles, NameStr, InsertBefore); |
3818 | } |
3819 | |
3820 | static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal, |
3821 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3822 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
3823 | return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, |
3824 | IfException, Args, NameStr, InsertAtEnd); |
3825 | } |
3826 | |
3827 | static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal, |
3828 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3829 | ArrayRef<OperandBundleDef> Bundles, |
3830 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
3831 | return Create(Func.getFunctionType(), Func.getCallee(), IfNormal, |
3832 | IfException, Args, Bundles, NameStr, InsertAtEnd); |
3833 | } |
3834 | |
3835 | /// Create a clone of \p II with a different set of operand bundles and |
3836 | /// insert it before \p InsertPt. |
3837 | /// |
3838 | /// The returned invoke instruction is identical to \p II in every way except |
3839 | /// that the operand bundles for the new instruction are set to the operand |
3840 | /// bundles in \p Bundles. |
3841 | static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles, |
3842 | Instruction *InsertPt = nullptr); |
3843 | |
3844 | // get*Dest - Return the destination basic blocks... |
3845 | BasicBlock *getNormalDest() const { |
3846 | return cast<BasicBlock>(Op<NormalDestOpEndIdx>()); |
3847 | } |
3848 | BasicBlock *getUnwindDest() const { |
3849 | return cast<BasicBlock>(Op<UnwindDestOpEndIdx>()); |
3850 | } |
3851 | void setNormalDest(BasicBlock *B) { |
3852 | Op<NormalDestOpEndIdx>() = reinterpret_cast<Value *>(B); |
3853 | } |
3854 | void setUnwindDest(BasicBlock *B) { |
3855 | Op<UnwindDestOpEndIdx>() = reinterpret_cast<Value *>(B); |
3856 | } |
3857 | |
3858 | /// Get the landingpad instruction from the landing pad |
3859 | /// block (the unwind destination). |
3860 | LandingPadInst *getLandingPadInst() const; |
3861 | |
3862 | BasicBlock *getSuccessor(unsigned i) const { |
3863 | assert(i < 2 && "Successor # out of range for invoke!")((void)0); |
3864 | return i == 0 ? getNormalDest() : getUnwindDest(); |
3865 | } |
3866 | |
3867 | void setSuccessor(unsigned i, BasicBlock *NewSucc) { |
3868 | assert(i < 2 && "Successor # out of range for invoke!")((void)0); |
3869 | if (i == 0) |
3870 | setNormalDest(NewSucc); |
3871 | else |
3872 | setUnwindDest(NewSucc); |
3873 | } |
3874 | |
3875 | unsigned getNumSuccessors() const { return 2; } |
3876 | |
3877 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
3878 | static bool classof(const Instruction *I) { |
3879 | return (I->getOpcode() == Instruction::Invoke); |
3880 | } |
3881 | static bool classof(const Value *V) { |
3882 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
3883 | } |
3884 | |
3885 | private: |
3886 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
3887 | // method so that subclasses cannot accidentally use it. |
3888 | template <typename Bitfield> |
3889 | void setSubclassData(typename Bitfield::Type Value) { |
3890 | Instruction::setSubclassData<Bitfield>(Value); |
3891 | } |
3892 | }; |
3893 | |
3894 | InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3895 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3896 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3897 | const Twine &NameStr, Instruction *InsertBefore) |
3898 | : CallBase(Ty->getReturnType(), Instruction::Invoke, |
3899 | OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands, |
3900 | InsertBefore) { |
3901 | init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr); |
3902 | } |
3903 | |
3904 | InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, |
3905 | BasicBlock *IfException, ArrayRef<Value *> Args, |
3906 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3907 | const Twine &NameStr, BasicBlock *InsertAtEnd) |
3908 | : CallBase(Ty->getReturnType(), Instruction::Invoke, |
3909 | OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands, |
3910 | InsertAtEnd) { |
3911 | init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr); |
3912 | } |
3913 | |
3914 | //===----------------------------------------------------------------------===// |
3915 | // CallBrInst Class |
3916 | //===----------------------------------------------------------------------===// |
3917 | |
3918 | /// CallBr instruction, tracking function calls that may not return control but |
3919 | /// instead transfer it to a third location. The SubclassData field is used to |
3920 | /// hold the calling convention of the call. |
3921 | /// |
3922 | class CallBrInst : public CallBase { |
3923 | |
3924 | unsigned NumIndirectDests; |
3925 | |
3926 | CallBrInst(const CallBrInst &BI); |
3927 | |
3928 | /// Construct a CallBrInst given a range of arguments. |
3929 | /// |
3930 | /// Construct a CallBrInst from a range of arguments |
3931 | inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, |
3932 | ArrayRef<BasicBlock *> IndirectDests, |
3933 | ArrayRef<Value *> Args, |
3934 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3935 | const Twine &NameStr, Instruction *InsertBefore); |
3936 | |
3937 | inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, |
3938 | ArrayRef<BasicBlock *> IndirectDests, |
3939 | ArrayRef<Value *> Args, |
3940 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
3941 | const Twine &NameStr, BasicBlock *InsertAtEnd); |
3942 | |
3943 | void init(FunctionType *FTy, Value *Func, BasicBlock *DefaultDest, |
3944 | ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args, |
3945 | ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr); |
3946 | |
3947 | /// Should the Indirect Destinations change, scan + update the Arg list. |
3948 | void updateArgBlockAddresses(unsigned i, BasicBlock *B); |
3949 | |
3950 | /// Compute the number of operands to allocate. |
3951 | static int ComputeNumOperands(int NumArgs, int NumIndirectDests, |
3952 | int NumBundleInputs = 0) { |
3953 | // We need one operand for the called function, plus our extra operands and |
3954 | // the input operand counts provided. |
3955 | return 2 + NumIndirectDests + NumArgs + NumBundleInputs; |
3956 | } |
3957 | |
3958 | protected: |
3959 | // Note: Instruction needs to be a friend here to call cloneImpl. |
3960 | friend class Instruction; |
3961 | |
3962 | CallBrInst *cloneImpl() const; |
3963 | |
3964 | public: |
3965 | static CallBrInst *Create(FunctionType *Ty, Value *Func, |
3966 | BasicBlock *DefaultDest, |
3967 | ArrayRef<BasicBlock *> IndirectDests, |
3968 | ArrayRef<Value *> Args, const Twine &NameStr, |
3969 | Instruction *InsertBefore = nullptr) { |
3970 | int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size()); |
3971 | return new (NumOperands) |
3972 | CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None, |
3973 | NumOperands, NameStr, InsertBefore); |
3974 | } |
3975 | |
3976 | static CallBrInst *Create(FunctionType *Ty, Value *Func, |
3977 | BasicBlock *DefaultDest, |
3978 | ArrayRef<BasicBlock *> IndirectDests, |
3979 | ArrayRef<Value *> Args, |
3980 | ArrayRef<OperandBundleDef> Bundles = None, |
3981 | const Twine &NameStr = "", |
3982 | Instruction *InsertBefore = nullptr) { |
3983 | int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(), |
3984 | CountBundleInputs(Bundles)); |
3985 | unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
3986 | |
3987 | return new (NumOperands, DescriptorBytes) |
3988 | CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, |
3989 | NumOperands, NameStr, InsertBefore); |
3990 | } |
3991 | |
3992 | static CallBrInst *Create(FunctionType *Ty, Value *Func, |
3993 | BasicBlock *DefaultDest, |
3994 | ArrayRef<BasicBlock *> IndirectDests, |
3995 | ArrayRef<Value *> Args, const Twine &NameStr, |
3996 | BasicBlock *InsertAtEnd) { |
3997 | int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size()); |
3998 | return new (NumOperands) |
3999 | CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None, |
4000 | NumOperands, NameStr, InsertAtEnd); |
4001 | } |
4002 | |
4003 | static CallBrInst *Create(FunctionType *Ty, Value *Func, |
4004 | BasicBlock *DefaultDest, |
4005 | ArrayRef<BasicBlock *> IndirectDests, |
4006 | ArrayRef<Value *> Args, |
4007 | ArrayRef<OperandBundleDef> Bundles, |
4008 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
4009 | int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(), |
4010 | CountBundleInputs(Bundles)); |
4011 | unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo); |
4012 | |
4013 | return new (NumOperands, DescriptorBytes) |
4014 | CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, |
4015 | NumOperands, NameStr, InsertAtEnd); |
4016 | } |
4017 | |
4018 | static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest, |
4019 | ArrayRef<BasicBlock *> IndirectDests, |
4020 | ArrayRef<Value *> Args, const Twine &NameStr, |
4021 | Instruction *InsertBefore = nullptr) { |
4022 | return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest, |
4023 | IndirectDests, Args, NameStr, InsertBefore); |
4024 | } |
4025 | |
4026 | static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest, |
4027 | ArrayRef<BasicBlock *> IndirectDests, |
4028 | ArrayRef<Value *> Args, |
4029 | ArrayRef<OperandBundleDef> Bundles = None, |
4030 | const Twine &NameStr = "", |
4031 | Instruction *InsertBefore = nullptr) { |
4032 | return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest, |
4033 | IndirectDests, Args, Bundles, NameStr, InsertBefore); |
4034 | } |
4035 | |
4036 | static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest, |
4037 | ArrayRef<BasicBlock *> IndirectDests, |
4038 | ArrayRef<Value *> Args, const Twine &NameStr, |
4039 | BasicBlock *InsertAtEnd) { |
4040 | return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest, |
4041 | IndirectDests, Args, NameStr, InsertAtEnd); |
4042 | } |
4043 | |
4044 | static CallBrInst *Create(FunctionCallee Func, |
4045 | BasicBlock *DefaultDest, |
4046 | ArrayRef<BasicBlock *> IndirectDests, |
4047 | ArrayRef<Value *> Args, |
4048 | ArrayRef<OperandBundleDef> Bundles, |
4049 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
4050 | return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest, |
4051 | IndirectDests, Args, Bundles, NameStr, InsertAtEnd); |
4052 | } |
4053 | |
4054 | /// Create a clone of \p CBI with a different set of operand bundles and |
4055 | /// insert it before \p InsertPt. |
4056 | /// |
4057 | /// The returned callbr instruction is identical to \p CBI in every way |
4058 | /// except that the operand bundles for the new instruction are set to the |
4059 | /// operand bundles in \p Bundles. |
4060 | static CallBrInst *Create(CallBrInst *CBI, |
4061 | ArrayRef<OperandBundleDef> Bundles, |
4062 | Instruction *InsertPt = nullptr); |
4063 | |
4064 | /// Return the number of callbr indirect dest labels. |
4065 | /// |
4066 | unsigned getNumIndirectDests() const { return NumIndirectDests; } |
4067 | |
4068 | /// getIndirectDestLabel - Return the i-th indirect dest label. |
4069 | /// |
4070 | Value *getIndirectDestLabel(unsigned i) const { |
4071 | assert(i < getNumIndirectDests() && "Out of bounds!")((void)0); |
4072 | return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() + |
4073 | 1); |
4074 | } |
4075 | |
4076 | Value *getIndirectDestLabelUse(unsigned i) const { |
4077 | assert(i < getNumIndirectDests() && "Out of bounds!")((void)0); |
4078 | return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() + |
4079 | 1); |
4080 | } |
4081 | |
4082 | // Return the destination basic blocks... |
4083 | BasicBlock *getDefaultDest() const { |
4084 | return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() - 1)); |
4085 | } |
4086 | BasicBlock *getIndirectDest(unsigned i) const { |
4087 | return cast_or_null<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() + i)); |
4088 | } |
4089 | SmallVector<BasicBlock *, 16> getIndirectDests() const { |
4090 | SmallVector<BasicBlock *, 16> IndirectDests; |
4091 | for (unsigned i = 0, e = getNumIndirectDests(); i < e; ++i) |
4092 | IndirectDests.push_back(getIndirectDest(i)); |
4093 | return IndirectDests; |
4094 | } |
4095 | void setDefaultDest(BasicBlock *B) { |
4096 | *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B); |
4097 | } |
4098 | void setIndirectDest(unsigned i, BasicBlock *B) { |
4099 | updateArgBlockAddresses(i, B); |
4100 | *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B); |
4101 | } |
4102 | |
4103 | BasicBlock *getSuccessor(unsigned i) const { |
4104 | assert(i < getNumSuccessors() + 1 &&((void)0) |
4105 | "Successor # out of range for callbr!")((void)0); |
4106 | return i == 0 ? getDefaultDest() : getIndirectDest(i - 1); |
4107 | } |
4108 | |
4109 | void setSuccessor(unsigned i, BasicBlock *NewSucc) { |
4110 | assert(i < getNumIndirectDests() + 1 &&((void)0) |
4111 | "Successor # out of range for callbr!")((void)0); |
4112 | return i == 0 ? setDefaultDest(NewSucc) : setIndirectDest(i - 1, NewSucc); |
4113 | } |
4114 | |
4115 | unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; } |
4116 | |
4117 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4118 | static bool classof(const Instruction *I) { |
4119 | return (I->getOpcode() == Instruction::CallBr); |
4120 | } |
4121 | static bool classof(const Value *V) { |
4122 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4123 | } |
4124 | |
4125 | private: |
4126 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
4127 | // method so that subclasses cannot accidentally use it. |
4128 | template <typename Bitfield> |
4129 | void setSubclassData(typename Bitfield::Type Value) { |
4130 | Instruction::setSubclassData<Bitfield>(Value); |
4131 | } |
4132 | }; |
4133 | |
4134 | CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, |
4135 | ArrayRef<BasicBlock *> IndirectDests, |
4136 | ArrayRef<Value *> Args, |
4137 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
4138 | const Twine &NameStr, Instruction *InsertBefore) |
4139 | : CallBase(Ty->getReturnType(), Instruction::CallBr, |
4140 | OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands, |
4141 | InsertBefore) { |
4142 | init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr); |
4143 | } |
4144 | |
4145 | CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, |
4146 | ArrayRef<BasicBlock *> IndirectDests, |
4147 | ArrayRef<Value *> Args, |
4148 | ArrayRef<OperandBundleDef> Bundles, int NumOperands, |
4149 | const Twine &NameStr, BasicBlock *InsertAtEnd) |
4150 | : CallBase(Ty->getReturnType(), Instruction::CallBr, |
4151 | OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands, |
4152 | InsertAtEnd) { |
4153 | init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr); |
4154 | } |
4155 | |
4156 | //===----------------------------------------------------------------------===// |
4157 | // ResumeInst Class |
4158 | //===----------------------------------------------------------------------===// |
4159 | |
4160 | //===--------------------------------------------------------------------------- |
4161 | /// Resume the propagation of an exception. |
4162 | /// |
4163 | class ResumeInst : public Instruction { |
4164 | ResumeInst(const ResumeInst &RI); |
4165 | |
4166 | explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr); |
4167 | ResumeInst(Value *Exn, BasicBlock *InsertAtEnd); |
4168 | |
4169 | protected: |
4170 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4171 | friend class Instruction; |
4172 | |
4173 | ResumeInst *cloneImpl() const; |
4174 | |
4175 | public: |
4176 | static ResumeInst *Create(Value *Exn, Instruction *InsertBefore = nullptr) { |
4177 | return new(1) ResumeInst(Exn, InsertBefore); |
4178 | } |
4179 | |
4180 | static ResumeInst *Create(Value *Exn, BasicBlock *InsertAtEnd) { |
4181 | return new(1) ResumeInst(Exn, InsertAtEnd); |
4182 | } |
4183 | |
4184 | /// Provide fast operand accessors |
4185 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
4186 | |
4187 | /// Convenience accessor. |
4188 | Value *getValue() const { return Op<0>(); } |
4189 | |
4190 | unsigned getNumSuccessors() const { return 0; } |
4191 | |
4192 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4193 | static bool classof(const Instruction *I) { |
4194 | return I->getOpcode() == Instruction::Resume; |
4195 | } |
4196 | static bool classof(const Value *V) { |
4197 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4198 | } |
4199 | |
4200 | private: |
4201 | BasicBlock *getSuccessor(unsigned idx) const { |
4202 | llvm_unreachable("ResumeInst has no successors!")__builtin_unreachable(); |
4203 | } |
4204 | |
4205 | void setSuccessor(unsigned idx, BasicBlock *NewSucc) { |
4206 | llvm_unreachable("ResumeInst has no successors!")__builtin_unreachable(); |
4207 | } |
4208 | }; |
4209 | |
4210 | template <> |
4211 | struct OperandTraits<ResumeInst> : |
4212 | public FixedNumOperandTraits<ResumeInst, 1> { |
4213 | }; |
4214 | |
4215 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)ResumeInst::op_iterator ResumeInst::op_begin() { return OperandTraits <ResumeInst>::op_begin(this); } ResumeInst::const_op_iterator ResumeInst::op_begin() const { return OperandTraits<ResumeInst >::op_begin(const_cast<ResumeInst*>(this)); } ResumeInst ::op_iterator ResumeInst::op_end() { return OperandTraits< ResumeInst>::op_end(this); } ResumeInst::const_op_iterator ResumeInst::op_end() const { return OperandTraits<ResumeInst >::op_end(const_cast<ResumeInst*>(this)); } Value *ResumeInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<ResumeInst>::op_begin(const_cast <ResumeInst*>(this))[i_nocapture].get()); } void ResumeInst ::setOperand(unsigned i_nocapture, Value *Val_nocapture) { (( void)0); OperandTraits<ResumeInst>::op_begin(this)[i_nocapture ] = Val_nocapture; } unsigned ResumeInst::getNumOperands() const { return OperandTraits<ResumeInst>::operands(this); } template <int Idx_nocapture> Use &ResumeInst::Op() { return this->OpFrom<Idx_nocapture>(this); } template <int Idx_nocapture> const Use &ResumeInst::Op() const { return this->OpFrom<Idx_nocapture>(this); } |
4216 | |
4217 | //===----------------------------------------------------------------------===// |
4218 | // CatchSwitchInst Class |
4219 | //===----------------------------------------------------------------------===// |
4220 | class CatchSwitchInst : public Instruction { |
4221 | using UnwindDestField = BoolBitfieldElementT<0>; |
4222 | |
4223 | /// The number of operands actually allocated. NumOperands is |
4224 | /// the number actually in use. |
4225 | unsigned ReservedSpace; |
4226 | |
4227 | // Operand[0] = Outer scope |
4228 | // Operand[1] = Unwind block destination |
4229 | // Operand[n] = BasicBlock to go to on match |
4230 | CatchSwitchInst(const CatchSwitchInst &CSI); |
4231 | |
4232 | /// Create a new switch instruction, specifying a |
4233 | /// default destination. The number of additional handlers can be specified |
4234 | /// here to make memory allocation more efficient. |
4235 | /// This constructor can also autoinsert before another instruction. |
4236 | CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, |
4237 | unsigned NumHandlers, const Twine &NameStr, |
4238 | Instruction *InsertBefore); |
4239 | |
4240 | /// Create a new switch instruction, specifying a |
4241 | /// default destination. The number of additional handlers can be specified |
4242 | /// here to make memory allocation more efficient. |
4243 | /// This constructor also autoinserts at the end of the specified BasicBlock. |
4244 | CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest, |
4245 | unsigned NumHandlers, const Twine &NameStr, |
4246 | BasicBlock *InsertAtEnd); |
4247 | |
4248 | // allocate space for exactly zero operands |
4249 | void *operator new(size_t S) { return User::operator new(S); } |
4250 | |
4251 | void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved); |
4252 | void growOperands(unsigned Size); |
4253 | |
4254 | protected: |
4255 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4256 | friend class Instruction; |
4257 | |
4258 | CatchSwitchInst *cloneImpl() const; |
4259 | |
4260 | public: |
4261 | void operator delete(void *Ptr) { return User::operator delete(Ptr); } |
4262 | |
4263 | static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, |
4264 | unsigned NumHandlers, |
4265 | const Twine &NameStr = "", |
4266 | Instruction *InsertBefore = nullptr) { |
4267 | return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr, |
4268 | InsertBefore); |
4269 | } |
4270 | |
4271 | static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, |
4272 | unsigned NumHandlers, const Twine &NameStr, |
4273 | BasicBlock *InsertAtEnd) { |
4274 | return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr, |
4275 | InsertAtEnd); |
4276 | } |
4277 | |
4278 | /// Provide fast operand accessors |
4279 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
4280 | |
4281 | // Accessor Methods for CatchSwitch stmt |
4282 | Value *getParentPad() const { return getOperand(0); } |
4283 | void setParentPad(Value *ParentPad) { setOperand(0, ParentPad); } |
4284 | |
4285 | // Accessor Methods for CatchSwitch stmt |
4286 | bool hasUnwindDest() const { return getSubclassData<UnwindDestField>(); } |
4287 | bool unwindsToCaller() const { return !hasUnwindDest(); } |
4288 | BasicBlock *getUnwindDest() const { |
4289 | if (hasUnwindDest()) |
4290 | return cast<BasicBlock>(getOperand(1)); |
4291 | return nullptr; |
4292 | } |
4293 | void setUnwindDest(BasicBlock *UnwindDest) { |
4294 | assert(UnwindDest)((void)0); |
4295 | assert(hasUnwindDest())((void)0); |
4296 | setOperand(1, UnwindDest); |
4297 | } |
4298 | |
4299 | /// return the number of 'handlers' in this catchswitch |
4300 | /// instruction, except the default handler |
4301 | unsigned getNumHandlers() const { |
4302 | if (hasUnwindDest()) |
4303 | return getNumOperands() - 2; |
4304 | return getNumOperands() - 1; |
4305 | } |
4306 | |
4307 | private: |
4308 | static BasicBlock *handler_helper(Value *V) { return cast<BasicBlock>(V); } |
4309 | static const BasicBlock *handler_helper(const Value *V) { |
4310 | return cast<BasicBlock>(V); |
4311 | } |
4312 | |
4313 | public: |
4314 | using DerefFnTy = BasicBlock *(*)(Value *); |
4315 | using handler_iterator = mapped_iterator<op_iterator, DerefFnTy>; |
4316 | using handler_range = iterator_range<handler_iterator>; |
4317 | using ConstDerefFnTy = const BasicBlock *(*)(const Value *); |
4318 | using const_handler_iterator = |
4319 | mapped_iterator<const_op_iterator, ConstDerefFnTy>; |
4320 | using const_handler_range = iterator_range<const_handler_iterator>; |
4321 | |
4322 | /// Returns an iterator that points to the first handler in CatchSwitchInst. |
4323 | handler_iterator handler_begin() { |
4324 | op_iterator It = op_begin() + 1; |
4325 | if (hasUnwindDest()) |
4326 | ++It; |
4327 | return handler_iterator(It, DerefFnTy(handler_helper)); |
4328 | } |
4329 | |
4330 | /// Returns an iterator that points to the first handler in the |
4331 | /// CatchSwitchInst. |
4332 | const_handler_iterator handler_begin() const { |
4333 | const_op_iterator It = op_begin() + 1; |
4334 | if (hasUnwindDest()) |
4335 | ++It; |
4336 | return const_handler_iterator(It, ConstDerefFnTy(handler_helper)); |
4337 | } |
4338 | |
4339 | /// Returns a read-only iterator that points one past the last |
4340 | /// handler in the CatchSwitchInst. |
4341 | handler_iterator handler_end() { |
4342 | return handler_iterator(op_end(), DerefFnTy(handler_helper)); |
4343 | } |
4344 | |
4345 | /// Returns an iterator that points one past the last handler in the |
4346 | /// CatchSwitchInst. |
4347 | const_handler_iterator handler_end() const { |
4348 | return const_handler_iterator(op_end(), ConstDerefFnTy(handler_helper)); |
4349 | } |
4350 | |
4351 | /// iteration adapter for range-for loops. |
4352 | handler_range handlers() { |
4353 | return make_range(handler_begin(), handler_end()); |
4354 | } |
4355 | |
4356 | /// iteration adapter for range-for loops. |
4357 | const_handler_range handlers() const { |
4358 | return make_range(handler_begin(), handler_end()); |
4359 | } |
4360 | |
4361 | /// Add an entry to the switch instruction... |
4362 | /// Note: |
4363 | /// This action invalidates handler_end(). Old handler_end() iterator will |
4364 | /// point to the added handler. |
4365 | void addHandler(BasicBlock *Dest); |
4366 | |
4367 | void removeHandler(handler_iterator HI); |
4368 | |
4369 | unsigned getNumSuccessors() const { return getNumOperands() - 1; } |
4370 | BasicBlock *getSuccessor(unsigned Idx) const { |
4371 | assert(Idx < getNumSuccessors() &&((void)0) |
4372 | "Successor # out of range for catchswitch!")((void)0); |
4373 | return cast<BasicBlock>(getOperand(Idx + 1)); |
4374 | } |
4375 | void setSuccessor(unsigned Idx, BasicBlock *NewSucc) { |
4376 | assert(Idx < getNumSuccessors() &&((void)0) |
4377 | "Successor # out of range for catchswitch!")((void)0); |
4378 | setOperand(Idx + 1, NewSucc); |
4379 | } |
4380 | |
4381 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4382 | static bool classof(const Instruction *I) { |
4383 | return I->getOpcode() == Instruction::CatchSwitch; |
4384 | } |
4385 | static bool classof(const Value *V) { |
4386 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4387 | } |
4388 | }; |
4389 | |
4390 | template <> |
4391 | struct OperandTraits<CatchSwitchInst> : public HungoffOperandTraits<2> {}; |
4392 | |
4393 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchSwitchInst, Value)CatchSwitchInst::op_iterator CatchSwitchInst::op_begin() { return OperandTraits<CatchSwitchInst>::op_begin(this); } CatchSwitchInst ::const_op_iterator CatchSwitchInst::op_begin() const { return OperandTraits<CatchSwitchInst>::op_begin(const_cast< CatchSwitchInst*>(this)); } CatchSwitchInst::op_iterator CatchSwitchInst ::op_end() { return OperandTraits<CatchSwitchInst>::op_end (this); } CatchSwitchInst::const_op_iterator CatchSwitchInst:: op_end() const { return OperandTraits<CatchSwitchInst>:: op_end(const_cast<CatchSwitchInst*>(this)); } Value *CatchSwitchInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<CatchSwitchInst>::op_begin (const_cast<CatchSwitchInst*>(this))[i_nocapture].get() ); } void CatchSwitchInst::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<CatchSwitchInst >::op_begin(this)[i_nocapture] = Val_nocapture; } unsigned CatchSwitchInst::getNumOperands() const { return OperandTraits <CatchSwitchInst>::operands(this); } template <int Idx_nocapture > Use &CatchSwitchInst::Op() { return this->OpFrom< Idx_nocapture>(this); } template <int Idx_nocapture> const Use &CatchSwitchInst::Op() const { return this-> OpFrom<Idx_nocapture>(this); } |
4394 | |
4395 | //===----------------------------------------------------------------------===// |
4396 | // CleanupPadInst Class |
4397 | //===----------------------------------------------------------------------===// |
4398 | class CleanupPadInst : public FuncletPadInst { |
4399 | private: |
4400 | explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args, |
4401 | unsigned Values, const Twine &NameStr, |
4402 | Instruction *InsertBefore) |
4403 | : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values, |
4404 | NameStr, InsertBefore) {} |
4405 | explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args, |
4406 | unsigned Values, const Twine &NameStr, |
4407 | BasicBlock *InsertAtEnd) |
4408 | : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values, |
4409 | NameStr, InsertAtEnd) {} |
4410 | |
4411 | public: |
4412 | static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args = None, |
4413 | const Twine &NameStr = "", |
4414 | Instruction *InsertBefore = nullptr) { |
4415 | unsigned Values = 1 + Args.size(); |
4416 | return new (Values) |
4417 | CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore); |
4418 | } |
4419 | |
4420 | static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args, |
4421 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
4422 | unsigned Values = 1 + Args.size(); |
4423 | return new (Values) |
4424 | CleanupPadInst(ParentPad, Args, Values, NameStr, InsertAtEnd); |
4425 | } |
4426 | |
4427 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4428 | static bool classof(const Instruction *I) { |
4429 | return I->getOpcode() == Instruction::CleanupPad; |
4430 | } |
4431 | static bool classof(const Value *V) { |
4432 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4433 | } |
4434 | }; |
4435 | |
4436 | //===----------------------------------------------------------------------===// |
4437 | // CatchPadInst Class |
4438 | //===----------------------------------------------------------------------===// |
4439 | class CatchPadInst : public FuncletPadInst { |
4440 | private: |
4441 | explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args, |
4442 | unsigned Values, const Twine &NameStr, |
4443 | Instruction *InsertBefore) |
4444 | : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values, |
4445 | NameStr, InsertBefore) {} |
4446 | explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args, |
4447 | unsigned Values, const Twine &NameStr, |
4448 | BasicBlock *InsertAtEnd) |
4449 | : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values, |
4450 | NameStr, InsertAtEnd) {} |
4451 | |
4452 | public: |
4453 | static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args, |
4454 | const Twine &NameStr = "", |
4455 | Instruction *InsertBefore = nullptr) { |
4456 | unsigned Values = 1 + Args.size(); |
4457 | return new (Values) |
4458 | CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore); |
4459 | } |
4460 | |
4461 | static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args, |
4462 | const Twine &NameStr, BasicBlock *InsertAtEnd) { |
4463 | unsigned Values = 1 + Args.size(); |
4464 | return new (Values) |
4465 | CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertAtEnd); |
4466 | } |
4467 | |
4468 | /// Convenience accessors |
4469 | CatchSwitchInst *getCatchSwitch() const { |
4470 | return cast<CatchSwitchInst>(Op<-1>()); |
4471 | } |
4472 | void setCatchSwitch(Value *CatchSwitch) { |
4473 | assert(CatchSwitch)((void)0); |
4474 | Op<-1>() = CatchSwitch; |
4475 | } |
4476 | |
4477 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4478 | static bool classof(const Instruction *I) { |
4479 | return I->getOpcode() == Instruction::CatchPad; |
4480 | } |
4481 | static bool classof(const Value *V) { |
4482 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4483 | } |
4484 | }; |
4485 | |
4486 | //===----------------------------------------------------------------------===// |
4487 | // CatchReturnInst Class |
4488 | //===----------------------------------------------------------------------===// |
4489 | |
4490 | class CatchReturnInst : public Instruction { |
4491 | CatchReturnInst(const CatchReturnInst &RI); |
4492 | CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore); |
4493 | CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd); |
4494 | |
4495 | void init(Value *CatchPad, BasicBlock *BB); |
4496 | |
4497 | protected: |
4498 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4499 | friend class Instruction; |
4500 | |
4501 | CatchReturnInst *cloneImpl() const; |
4502 | |
4503 | public: |
4504 | static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB, |
4505 | Instruction *InsertBefore = nullptr) { |
4506 | assert(CatchPad)((void)0); |
4507 | assert(BB)((void)0); |
4508 | return new (2) CatchReturnInst(CatchPad, BB, InsertBefore); |
4509 | } |
4510 | |
4511 | static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB, |
4512 | BasicBlock *InsertAtEnd) { |
4513 | assert(CatchPad)((void)0); |
4514 | assert(BB)((void)0); |
4515 | return new (2) CatchReturnInst(CatchPad, BB, InsertAtEnd); |
4516 | } |
4517 | |
4518 | /// Provide fast operand accessors |
4519 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
4520 | |
4521 | /// Convenience accessors. |
4522 | CatchPadInst *getCatchPad() const { return cast<CatchPadInst>(Op<0>()); } |
4523 | void setCatchPad(CatchPadInst *CatchPad) { |
4524 | assert(CatchPad)((void)0); |
4525 | Op<0>() = CatchPad; |
4526 | } |
4527 | |
4528 | BasicBlock *getSuccessor() const { return cast<BasicBlock>(Op<1>()); } |
4529 | void setSuccessor(BasicBlock *NewSucc) { |
4530 | assert(NewSucc)((void)0); |
4531 | Op<1>() = NewSucc; |
4532 | } |
4533 | unsigned getNumSuccessors() const { return 1; } |
4534 | |
4535 | /// Get the parentPad of this catchret's catchpad's catchswitch. |
4536 | /// The successor block is implicitly a member of this funclet. |
4537 | Value *getCatchSwitchParentPad() const { |
4538 | return getCatchPad()->getCatchSwitch()->getParentPad(); |
4539 | } |
4540 | |
4541 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4542 | static bool classof(const Instruction *I) { |
4543 | return (I->getOpcode() == Instruction::CatchRet); |
4544 | } |
4545 | static bool classof(const Value *V) { |
4546 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4547 | } |
4548 | |
4549 | private: |
4550 | BasicBlock *getSuccessor(unsigned Idx) const { |
4551 | assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!")((void)0); |
4552 | return getSuccessor(); |
4553 | } |
4554 | |
4555 | void setSuccessor(unsigned Idx, BasicBlock *B) { |
4556 | assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!")((void)0); |
4557 | setSuccessor(B); |
4558 | } |
4559 | }; |
4560 | |
4561 | template <> |
4562 | struct OperandTraits<CatchReturnInst> |
4563 | : public FixedNumOperandTraits<CatchReturnInst, 2> {}; |
4564 | |
4565 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)CatchReturnInst::op_iterator CatchReturnInst::op_begin() { return OperandTraits<CatchReturnInst>::op_begin(this); } CatchReturnInst ::const_op_iterator CatchReturnInst::op_begin() const { return OperandTraits<CatchReturnInst>::op_begin(const_cast< CatchReturnInst*>(this)); } CatchReturnInst::op_iterator CatchReturnInst ::op_end() { return OperandTraits<CatchReturnInst>::op_end (this); } CatchReturnInst::const_op_iterator CatchReturnInst:: op_end() const { return OperandTraits<CatchReturnInst>:: op_end(const_cast<CatchReturnInst*>(this)); } Value *CatchReturnInst ::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null <Value>( OperandTraits<CatchReturnInst>::op_begin (const_cast<CatchReturnInst*>(this))[i_nocapture].get() ); } void CatchReturnInst::setOperand(unsigned i_nocapture, Value *Val_nocapture) { ((void)0); OperandTraits<CatchReturnInst >::op_begin(this)[i_nocapture] = Val_nocapture; } unsigned CatchReturnInst::getNumOperands() const { return OperandTraits <CatchReturnInst>::operands(this); } template <int Idx_nocapture > Use &CatchReturnInst::Op() { return this->OpFrom< Idx_nocapture>(this); } template <int Idx_nocapture> const Use &CatchReturnInst::Op() const { return this-> OpFrom<Idx_nocapture>(this); } |
4566 | |
4567 | //===----------------------------------------------------------------------===// |
4568 | // CleanupReturnInst Class |
4569 | //===----------------------------------------------------------------------===// |
4570 | |
4571 | class CleanupReturnInst : public Instruction { |
4572 | using UnwindDestField = BoolBitfieldElementT<0>; |
4573 | |
4574 | private: |
4575 | CleanupReturnInst(const CleanupReturnInst &RI); |
4576 | CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values, |
4577 | Instruction *InsertBefore = nullptr); |
4578 | CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values, |
4579 | BasicBlock *InsertAtEnd); |
4580 | |
4581 | void init(Value *CleanupPad, BasicBlock *UnwindBB); |
4582 | |
4583 | protected: |
4584 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4585 | friend class Instruction; |
4586 | |
4587 | CleanupReturnInst *cloneImpl() const; |
4588 | |
4589 | public: |
4590 | static CleanupReturnInst *Create(Value *CleanupPad, |
4591 | BasicBlock *UnwindBB = nullptr, |
4592 | Instruction *InsertBefore = nullptr) { |
4593 | assert(CleanupPad)((void)0); |
4594 | unsigned Values = 1; |
4595 | if (UnwindBB) |
4596 | ++Values; |
4597 | return new (Values) |
4598 | CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore); |
4599 | } |
4600 | |
4601 | static CleanupReturnInst *Create(Value *CleanupPad, BasicBlock *UnwindBB, |
4602 | BasicBlock *InsertAtEnd) { |
4603 | assert(CleanupPad)((void)0); |
4604 | unsigned Values = 1; |
4605 | if (UnwindBB) |
4606 | ++Values; |
4607 | return new (Values) |
4608 | CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertAtEnd); |
4609 | } |
4610 | |
4611 | /// Provide fast operand accessors |
4612 | DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value)public: inline Value *getOperand(unsigned) const; inline void setOperand(unsigned, Value*); inline op_iterator op_begin(); inline const_op_iterator op_begin() const; inline op_iterator op_end(); inline const_op_iterator op_end() const; protected : template <int> inline Use &Op(); template <int > inline const Use &Op() const; public: inline unsigned getNumOperands() const; |
4613 | |
4614 | bool hasUnwindDest() const { return getSubclassData<UnwindDestField>(); } |
4615 | bool unwindsToCaller() const { return !hasUnwindDest(); } |
4616 | |
4617 | /// Convenience accessor. |
4618 | CleanupPadInst *getCleanupPad() const { |
4619 | return cast<CleanupPadInst>(Op<0>()); |
4620 | } |
4621 | void setCleanupPad(CleanupPadInst *CleanupPad) { |
4622 | assert(CleanupPad)((void)0); |
4623 | Op<0>() = CleanupPad; |
4624 | } |
4625 | |
4626 | unsigned getNumSuccessors() const { return hasUnwindDest() ? 1 : 0; } |
4627 | |
4628 | BasicBlock *getUnwindDest() const { |
4629 | return hasUnwindDest() ? cast<BasicBlock>(Op<1>()) : nullptr; |
4630 | } |
4631 | void setUnwindDest(BasicBlock *NewDest) { |
4632 | assert(NewDest)((void)0); |
4633 | assert(hasUnwindDest())((void)0); |
4634 | Op<1>() = NewDest; |
4635 | } |
4636 | |
4637 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4638 | static bool classof(const Instruction *I) { |
4639 | return (I->getOpcode() == Instruction::CleanupRet); |
4640 | } |
4641 | static bool classof(const Value *V) { |
4642 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4643 | } |
4644 | |
4645 | private: |
4646 | BasicBlock *getSuccessor(unsigned Idx) const { |
4647 | assert(Idx == 0)((void)0); |
4648 | return getUnwindDest(); |
4649 | } |
4650 | |
4651 | void setSuccessor(unsigned Idx, BasicBlock *B) { |
4652 | assert(Idx == 0)((void)0); |
4653 | setUnwindDest(B); |
4654 | } |
4655 | |
4656 | // Shadow Instruction::setInstructionSubclassData with a private forwarding |
4657 | // method so that subclasses cannot accidentally use it. |
4658 | template <typename Bitfield> |
4659 | void setSubclassData(typename Bitfield::Type Value) { |
4660 | Instruction::setSubclassData<Bitfield>(Value); |
4661 | } |
4662 | }; |
4663 | |
4664 | template <> |
4665 | struct OperandTraits<CleanupReturnInst> |
4666 | : public VariadicOperandTraits<CleanupReturnInst, /*MINARITY=*/1> {}; |
4667 | |
4668 | DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)CleanupReturnInst::op_iterator CleanupReturnInst::op_begin() { return OperandTraits<CleanupReturnInst>::op_begin(this ); } CleanupReturnInst::const_op_iterator CleanupReturnInst:: op_begin() const { return OperandTraits<CleanupReturnInst> ::op_begin(const_cast<CleanupReturnInst*>(this)); } CleanupReturnInst ::op_iterator CleanupReturnInst::op_end() { return OperandTraits <CleanupReturnInst>::op_end(this); } CleanupReturnInst:: const_op_iterator CleanupReturnInst::op_end() const { return OperandTraits <CleanupReturnInst>::op_end(const_cast<CleanupReturnInst *>(this)); } Value *CleanupReturnInst::getOperand(unsigned i_nocapture) const { ((void)0); return cast_or_null<Value >( OperandTraits<CleanupReturnInst>::op_begin(const_cast <CleanupReturnInst*>(this))[i_nocapture].get()); } void CleanupReturnInst::setOperand(unsigned i_nocapture, Value *Val_nocapture ) { ((void)0); OperandTraits<CleanupReturnInst>::op_begin (this)[i_nocapture] = Val_nocapture; } unsigned CleanupReturnInst ::getNumOperands() const { return OperandTraits<CleanupReturnInst >::operands(this); } template <int Idx_nocapture> Use &CleanupReturnInst::Op() { return this->OpFrom<Idx_nocapture >(this); } template <int Idx_nocapture> const Use & CleanupReturnInst::Op() const { return this->OpFrom<Idx_nocapture >(this); } |
4669 | |
4670 | //===----------------------------------------------------------------------===// |
4671 | // UnreachableInst Class |
4672 | //===----------------------------------------------------------------------===// |
4673 | |
4674 | //===--------------------------------------------------------------------------- |
4675 | /// This function has undefined behavior. In particular, the |
4676 | /// presence of this instruction indicates some higher level knowledge that the |
4677 | /// end of the block cannot be reached. |
4678 | /// |
4679 | class UnreachableInst : public Instruction { |
4680 | protected: |
4681 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4682 | friend class Instruction; |
4683 | |
4684 | UnreachableInst *cloneImpl() const; |
4685 | |
4686 | public: |
4687 | explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = nullptr); |
4688 | explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd); |
4689 | |
4690 | // allocate space for exactly zero operands |
4691 | void *operator new(size_t S) { return User::operator new(S, 0); } |
4692 | void operator delete(void *Ptr) { User::operator delete(Ptr); } |
4693 | |
4694 | unsigned getNumSuccessors() const { return 0; } |
4695 | |
4696 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
4697 | static bool classof(const Instruction *I) { |
4698 | return I->getOpcode() == Instruction::Unreachable; |
4699 | } |
4700 | static bool classof(const Value *V) { |
4701 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4702 | } |
4703 | |
4704 | private: |
4705 | BasicBlock *getSuccessor(unsigned idx) const { |
4706 | llvm_unreachable("UnreachableInst has no successors!")__builtin_unreachable(); |
4707 | } |
4708 | |
4709 | void setSuccessor(unsigned idx, BasicBlock *B) { |
4710 | llvm_unreachable("UnreachableInst has no successors!")__builtin_unreachable(); |
4711 | } |
4712 | }; |
4713 | |
4714 | //===----------------------------------------------------------------------===// |
4715 | // TruncInst Class |
4716 | //===----------------------------------------------------------------------===// |
4717 | |
4718 | /// This class represents a truncation of integer types. |
4719 | class TruncInst : public CastInst { |
4720 | protected: |
4721 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4722 | friend class Instruction; |
4723 | |
4724 | /// Clone an identical TruncInst |
4725 | TruncInst *cloneImpl() const; |
4726 | |
4727 | public: |
4728 | /// Constructor with insert-before-instruction semantics |
4729 | TruncInst( |
4730 | Value *S, ///< The value to be truncated |
4731 | Type *Ty, ///< The (smaller) type to truncate to |
4732 | const Twine &NameStr = "", ///< A name for the new instruction |
4733 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4734 | ); |
4735 | |
4736 | /// Constructor with insert-at-end-of-block semantics |
4737 | TruncInst( |
4738 | Value *S, ///< The value to be truncated |
4739 | Type *Ty, ///< The (smaller) type to truncate to |
4740 | const Twine &NameStr, ///< A name for the new instruction |
4741 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4742 | ); |
4743 | |
4744 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4745 | static bool classof(const Instruction *I) { |
4746 | return I->getOpcode() == Trunc; |
4747 | } |
4748 | static bool classof(const Value *V) { |
4749 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4750 | } |
4751 | }; |
4752 | |
4753 | //===----------------------------------------------------------------------===// |
4754 | // ZExtInst Class |
4755 | //===----------------------------------------------------------------------===// |
4756 | |
4757 | /// This class represents zero extension of integer types. |
4758 | class ZExtInst : public CastInst { |
4759 | protected: |
4760 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4761 | friend class Instruction; |
4762 | |
4763 | /// Clone an identical ZExtInst |
4764 | ZExtInst *cloneImpl() const; |
4765 | |
4766 | public: |
4767 | /// Constructor with insert-before-instruction semantics |
4768 | ZExtInst( |
4769 | Value *S, ///< The value to be zero extended |
4770 | Type *Ty, ///< The type to zero extend to |
4771 | const Twine &NameStr = "", ///< A name for the new instruction |
4772 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4773 | ); |
4774 | |
4775 | /// Constructor with insert-at-end semantics. |
4776 | ZExtInst( |
4777 | Value *S, ///< The value to be zero extended |
4778 | Type *Ty, ///< The type to zero extend to |
4779 | const Twine &NameStr, ///< A name for the new instruction |
4780 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4781 | ); |
4782 | |
4783 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4784 | static bool classof(const Instruction *I) { |
4785 | return I->getOpcode() == ZExt; |
4786 | } |
4787 | static bool classof(const Value *V) { |
4788 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4789 | } |
4790 | }; |
4791 | |
4792 | //===----------------------------------------------------------------------===// |
4793 | // SExtInst Class |
4794 | //===----------------------------------------------------------------------===// |
4795 | |
4796 | /// This class represents a sign extension of integer types. |
4797 | class SExtInst : public CastInst { |
4798 | protected: |
4799 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4800 | friend class Instruction; |
4801 | |
4802 | /// Clone an identical SExtInst |
4803 | SExtInst *cloneImpl() const; |
4804 | |
4805 | public: |
4806 | /// Constructor with insert-before-instruction semantics |
4807 | SExtInst( |
4808 | Value *S, ///< The value to be sign extended |
4809 | Type *Ty, ///< The type to sign extend to |
4810 | const Twine &NameStr = "", ///< A name for the new instruction |
4811 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4812 | ); |
4813 | |
4814 | /// Constructor with insert-at-end-of-block semantics |
4815 | SExtInst( |
4816 | Value *S, ///< The value to be sign extended |
4817 | Type *Ty, ///< The type to sign extend to |
4818 | const Twine &NameStr, ///< A name for the new instruction |
4819 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4820 | ); |
4821 | |
4822 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4823 | static bool classof(const Instruction *I) { |
4824 | return I->getOpcode() == SExt; |
4825 | } |
4826 | static bool classof(const Value *V) { |
4827 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4828 | } |
4829 | }; |
4830 | |
4831 | //===----------------------------------------------------------------------===// |
4832 | // FPTruncInst Class |
4833 | //===----------------------------------------------------------------------===// |
4834 | |
4835 | /// This class represents a truncation of floating point types. |
4836 | class FPTruncInst : public CastInst { |
4837 | protected: |
4838 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4839 | friend class Instruction; |
4840 | |
4841 | /// Clone an identical FPTruncInst |
4842 | FPTruncInst *cloneImpl() const; |
4843 | |
4844 | public: |
4845 | /// Constructor with insert-before-instruction semantics |
4846 | FPTruncInst( |
4847 | Value *S, ///< The value to be truncated |
4848 | Type *Ty, ///< The type to truncate to |
4849 | const Twine &NameStr = "", ///< A name for the new instruction |
4850 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4851 | ); |
4852 | |
4853 | /// Constructor with insert-before-instruction semantics |
4854 | FPTruncInst( |
4855 | Value *S, ///< The value to be truncated |
4856 | Type *Ty, ///< The type to truncate to |
4857 | const Twine &NameStr, ///< A name for the new instruction |
4858 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4859 | ); |
4860 | |
4861 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4862 | static bool classof(const Instruction *I) { |
4863 | return I->getOpcode() == FPTrunc; |
4864 | } |
4865 | static bool classof(const Value *V) { |
4866 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4867 | } |
4868 | }; |
4869 | |
4870 | //===----------------------------------------------------------------------===// |
4871 | // FPExtInst Class |
4872 | //===----------------------------------------------------------------------===// |
4873 | |
4874 | /// This class represents an extension of floating point types. |
4875 | class FPExtInst : public CastInst { |
4876 | protected: |
4877 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4878 | friend class Instruction; |
4879 | |
4880 | /// Clone an identical FPExtInst |
4881 | FPExtInst *cloneImpl() const; |
4882 | |
4883 | public: |
4884 | /// Constructor with insert-before-instruction semantics |
4885 | FPExtInst( |
4886 | Value *S, ///< The value to be extended |
4887 | Type *Ty, ///< The type to extend to |
4888 | const Twine &NameStr = "", ///< A name for the new instruction |
4889 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4890 | ); |
4891 | |
4892 | /// Constructor with insert-at-end-of-block semantics |
4893 | FPExtInst( |
4894 | Value *S, ///< The value to be extended |
4895 | Type *Ty, ///< The type to extend to |
4896 | const Twine &NameStr, ///< A name for the new instruction |
4897 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4898 | ); |
4899 | |
4900 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4901 | static bool classof(const Instruction *I) { |
4902 | return I->getOpcode() == FPExt; |
4903 | } |
4904 | static bool classof(const Value *V) { |
4905 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4906 | } |
4907 | }; |
4908 | |
4909 | //===----------------------------------------------------------------------===// |
4910 | // UIToFPInst Class |
4911 | //===----------------------------------------------------------------------===// |
4912 | |
4913 | /// This class represents a cast unsigned integer to floating point. |
4914 | class UIToFPInst : public CastInst { |
4915 | protected: |
4916 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4917 | friend class Instruction; |
4918 | |
4919 | /// Clone an identical UIToFPInst |
4920 | UIToFPInst *cloneImpl() const; |
4921 | |
4922 | public: |
4923 | /// Constructor with insert-before-instruction semantics |
4924 | UIToFPInst( |
4925 | Value *S, ///< The value to be converted |
4926 | Type *Ty, ///< The type to convert to |
4927 | const Twine &NameStr = "", ///< A name for the new instruction |
4928 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4929 | ); |
4930 | |
4931 | /// Constructor with insert-at-end-of-block semantics |
4932 | UIToFPInst( |
4933 | Value *S, ///< The value to be converted |
4934 | Type *Ty, ///< The type to convert to |
4935 | const Twine &NameStr, ///< A name for the new instruction |
4936 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4937 | ); |
4938 | |
4939 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4940 | static bool classof(const Instruction *I) { |
4941 | return I->getOpcode() == UIToFP; |
4942 | } |
4943 | static bool classof(const Value *V) { |
4944 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4945 | } |
4946 | }; |
4947 | |
4948 | //===----------------------------------------------------------------------===// |
4949 | // SIToFPInst Class |
4950 | //===----------------------------------------------------------------------===// |
4951 | |
4952 | /// This class represents a cast from signed integer to floating point. |
4953 | class SIToFPInst : public CastInst { |
4954 | protected: |
4955 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4956 | friend class Instruction; |
4957 | |
4958 | /// Clone an identical SIToFPInst |
4959 | SIToFPInst *cloneImpl() const; |
4960 | |
4961 | public: |
4962 | /// Constructor with insert-before-instruction semantics |
4963 | SIToFPInst( |
4964 | Value *S, ///< The value to be converted |
4965 | Type *Ty, ///< The type to convert to |
4966 | const Twine &NameStr = "", ///< A name for the new instruction |
4967 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
4968 | ); |
4969 | |
4970 | /// Constructor with insert-at-end-of-block semantics |
4971 | SIToFPInst( |
4972 | Value *S, ///< The value to be converted |
4973 | Type *Ty, ///< The type to convert to |
4974 | const Twine &NameStr, ///< A name for the new instruction |
4975 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
4976 | ); |
4977 | |
4978 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
4979 | static bool classof(const Instruction *I) { |
4980 | return I->getOpcode() == SIToFP; |
4981 | } |
4982 | static bool classof(const Value *V) { |
4983 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
4984 | } |
4985 | }; |
4986 | |
4987 | //===----------------------------------------------------------------------===// |
4988 | // FPToUIInst Class |
4989 | //===----------------------------------------------------------------------===// |
4990 | |
4991 | /// This class represents a cast from floating point to unsigned integer |
4992 | class FPToUIInst : public CastInst { |
4993 | protected: |
4994 | // Note: Instruction needs to be a friend here to call cloneImpl. |
4995 | friend class Instruction; |
4996 | |
4997 | /// Clone an identical FPToUIInst |
4998 | FPToUIInst *cloneImpl() const; |
4999 | |
5000 | public: |
5001 | /// Constructor with insert-before-instruction semantics |
5002 | FPToUIInst( |
5003 | Value *S, ///< The value to be converted |
5004 | Type *Ty, ///< The type to convert to |
5005 | const Twine &NameStr = "", ///< A name for the new instruction |
5006 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5007 | ); |
5008 | |
5009 | /// Constructor with insert-at-end-of-block semantics |
5010 | FPToUIInst( |
5011 | Value *S, ///< The value to be converted |
5012 | Type *Ty, ///< The type to convert to |
5013 | const Twine &NameStr, ///< A name for the new instruction |
5014 | BasicBlock *InsertAtEnd ///< Where to insert the new instruction |
5015 | ); |
5016 | |
5017 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
5018 | static bool classof(const Instruction *I) { |
5019 | return I->getOpcode() == FPToUI; |
5020 | } |
5021 | static bool classof(const Value *V) { |
5022 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5023 | } |
5024 | }; |
5025 | |
5026 | //===----------------------------------------------------------------------===// |
5027 | // FPToSIInst Class |
5028 | //===----------------------------------------------------------------------===// |
5029 | |
5030 | /// This class represents a cast from floating point to signed integer. |
5031 | class FPToSIInst : public CastInst { |
5032 | protected: |
5033 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5034 | friend class Instruction; |
5035 | |
5036 | /// Clone an identical FPToSIInst |
5037 | FPToSIInst *cloneImpl() const; |
5038 | |
5039 | public: |
5040 | /// Constructor with insert-before-instruction semantics |
5041 | FPToSIInst( |
5042 | Value *S, ///< The value to be converted |
5043 | Type *Ty, ///< The type to convert to |
5044 | const Twine &NameStr = "", ///< A name for the new instruction |
5045 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5046 | ); |
5047 | |
5048 | /// Constructor with insert-at-end-of-block semantics |
5049 | FPToSIInst( |
5050 | Value *S, ///< The value to be converted |
5051 | Type *Ty, ///< The type to convert to |
5052 | const Twine &NameStr, ///< A name for the new instruction |
5053 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
5054 | ); |
5055 | |
5056 | /// Methods for support type inquiry through isa, cast, and dyn_cast: |
5057 | static bool classof(const Instruction *I) { |
5058 | return I->getOpcode() == FPToSI; |
5059 | } |
5060 | static bool classof(const Value *V) { |
5061 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5062 | } |
5063 | }; |
5064 | |
5065 | //===----------------------------------------------------------------------===// |
5066 | // IntToPtrInst Class |
5067 | //===----------------------------------------------------------------------===// |
5068 | |
5069 | /// This class represents a cast from an integer to a pointer. |
5070 | class IntToPtrInst : public CastInst { |
5071 | public: |
5072 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5073 | friend class Instruction; |
5074 | |
5075 | /// Constructor with insert-before-instruction semantics |
5076 | IntToPtrInst( |
5077 | Value *S, ///< The value to be converted |
5078 | Type *Ty, ///< The type to convert to |
5079 | const Twine &NameStr = "", ///< A name for the new instruction |
5080 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5081 | ); |
5082 | |
5083 | /// Constructor with insert-at-end-of-block semantics |
5084 | IntToPtrInst( |
5085 | Value *S, ///< The value to be converted |
5086 | Type *Ty, ///< The type to convert to |
5087 | const Twine &NameStr, ///< A name for the new instruction |
5088 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
5089 | ); |
5090 | |
5091 | /// Clone an identical IntToPtrInst. |
5092 | IntToPtrInst *cloneImpl() const; |
5093 | |
5094 | /// Returns the address space of this instruction's pointer type. |
5095 | unsigned getAddressSpace() const { |
5096 | return getType()->getPointerAddressSpace(); |
5097 | } |
5098 | |
5099 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
5100 | static bool classof(const Instruction *I) { |
5101 | return I->getOpcode() == IntToPtr; |
5102 | } |
5103 | static bool classof(const Value *V) { |
5104 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5105 | } |
5106 | }; |
5107 | |
5108 | //===----------------------------------------------------------------------===// |
5109 | // PtrToIntInst Class |
5110 | //===----------------------------------------------------------------------===// |
5111 | |
5112 | /// This class represents a cast from a pointer to an integer. |
5113 | class PtrToIntInst : public CastInst { |
5114 | protected: |
5115 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5116 | friend class Instruction; |
5117 | |
5118 | /// Clone an identical PtrToIntInst. |
5119 | PtrToIntInst *cloneImpl() const; |
5120 | |
5121 | public: |
5122 | /// Constructor with insert-before-instruction semantics |
5123 | PtrToIntInst( |
5124 | Value *S, ///< The value to be converted |
5125 | Type *Ty, ///< The type to convert to |
5126 | const Twine &NameStr = "", ///< A name for the new instruction |
5127 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5128 | ); |
5129 | |
5130 | /// Constructor with insert-at-end-of-block semantics |
5131 | PtrToIntInst( |
5132 | Value *S, ///< The value to be converted |
5133 | Type *Ty, ///< The type to convert to |
5134 | const Twine &NameStr, ///< A name for the new instruction |
5135 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
5136 | ); |
5137 | |
5138 | /// Gets the pointer operand. |
5139 | Value *getPointerOperand() { return getOperand(0); } |
5140 | /// Gets the pointer operand. |
5141 | const Value *getPointerOperand() const { return getOperand(0); } |
5142 | /// Gets the operand index of the pointer operand. |
5143 | static unsigned getPointerOperandIndex() { return 0U; } |
5144 | |
5145 | /// Returns the address space of the pointer operand. |
5146 | unsigned getPointerAddressSpace() const { |
5147 | return getPointerOperand()->getType()->getPointerAddressSpace(); |
5148 | } |
5149 | |
5150 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
5151 | static bool classof(const Instruction *I) { |
5152 | return I->getOpcode() == PtrToInt; |
5153 | } |
5154 | static bool classof(const Value *V) { |
5155 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5156 | } |
5157 | }; |
5158 | |
5159 | //===----------------------------------------------------------------------===// |
5160 | // BitCastInst Class |
5161 | //===----------------------------------------------------------------------===// |
5162 | |
5163 | /// This class represents a no-op cast from one type to another. |
5164 | class BitCastInst : public CastInst { |
5165 | protected: |
5166 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5167 | friend class Instruction; |
5168 | |
5169 | /// Clone an identical BitCastInst. |
5170 | BitCastInst *cloneImpl() const; |
5171 | |
5172 | public: |
5173 | /// Constructor with insert-before-instruction semantics |
5174 | BitCastInst( |
5175 | Value *S, ///< The value to be casted |
5176 | Type *Ty, ///< The type to casted to |
5177 | const Twine &NameStr = "", ///< A name for the new instruction |
5178 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5179 | ); |
5180 | |
5181 | /// Constructor with insert-at-end-of-block semantics |
5182 | BitCastInst( |
5183 | Value *S, ///< The value to be casted |
5184 | Type *Ty, ///< The type to casted to |
5185 | const Twine &NameStr, ///< A name for the new instruction |
5186 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
5187 | ); |
5188 | |
5189 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
5190 | static bool classof(const Instruction *I) { |
5191 | return I->getOpcode() == BitCast; |
5192 | } |
5193 | static bool classof(const Value *V) { |
5194 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5195 | } |
5196 | }; |
5197 | |
5198 | //===----------------------------------------------------------------------===// |
5199 | // AddrSpaceCastInst Class |
5200 | //===----------------------------------------------------------------------===// |
5201 | |
5202 | /// This class represents a conversion between pointers from one address space |
5203 | /// to another. |
5204 | class AddrSpaceCastInst : public CastInst { |
5205 | protected: |
5206 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5207 | friend class Instruction; |
5208 | |
5209 | /// Clone an identical AddrSpaceCastInst. |
5210 | AddrSpaceCastInst *cloneImpl() const; |
5211 | |
5212 | public: |
5213 | /// Constructor with insert-before-instruction semantics |
5214 | AddrSpaceCastInst( |
5215 | Value *S, ///< The value to be casted |
5216 | Type *Ty, ///< The type to casted to |
5217 | const Twine &NameStr = "", ///< A name for the new instruction |
5218 | Instruction *InsertBefore = nullptr ///< Where to insert the new instruction |
5219 | ); |
5220 | |
5221 | /// Constructor with insert-at-end-of-block semantics |
5222 | AddrSpaceCastInst( |
5223 | Value *S, ///< The value to be casted |
5224 | Type *Ty, ///< The type to casted to |
5225 | const Twine &NameStr, ///< A name for the new instruction |
5226 | BasicBlock *InsertAtEnd ///< The block to insert the instruction into |
5227 | ); |
5228 | |
5229 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
5230 | static bool classof(const Instruction *I) { |
5231 | return I->getOpcode() == AddrSpaceCast; |
5232 | } |
5233 | static bool classof(const Value *V) { |
5234 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5235 | } |
5236 | |
5237 | /// Gets the pointer operand. |
5238 | Value *getPointerOperand() { |
5239 | return getOperand(0); |
5240 | } |
5241 | |
5242 | /// Gets the pointer operand. |
5243 | const Value *getPointerOperand() const { |
5244 | return getOperand(0); |
5245 | } |
5246 | |
5247 | /// Gets the operand index of the pointer operand. |
5248 | static unsigned getPointerOperandIndex() { |
5249 | return 0U; |
5250 | } |
5251 | |
5252 | /// Returns the address space of the pointer operand. |
5253 | unsigned getSrcAddressSpace() const { |
5254 | return getPointerOperand()->getType()->getPointerAddressSpace(); |
5255 | } |
5256 | |
5257 | /// Returns the address space of the result. |
5258 | unsigned getDestAddressSpace() const { |
5259 | return getType()->getPointerAddressSpace(); |
5260 | } |
5261 | }; |
5262 | |
5263 | /// A helper function that returns the pointer operand of a load or store |
5264 | /// instruction. Returns nullptr if not load or store. |
5265 | inline const Value *getLoadStorePointerOperand(const Value *V) { |
5266 | if (auto *Load = dyn_cast<LoadInst>(V)) |
5267 | return Load->getPointerOperand(); |
5268 | if (auto *Store = dyn_cast<StoreInst>(V)) |
5269 | return Store->getPointerOperand(); |
5270 | return nullptr; |
5271 | } |
5272 | inline Value *getLoadStorePointerOperand(Value *V) { |
5273 | return const_cast<Value *>( |
5274 | getLoadStorePointerOperand(static_cast<const Value *>(V))); |
5275 | } |
5276 | |
5277 | /// A helper function that returns the pointer operand of a load, store |
5278 | /// or GEP instruction. Returns nullptr if not load, store, or GEP. |
5279 | inline const Value *getPointerOperand(const Value *V) { |
5280 | if (auto *Ptr = getLoadStorePointerOperand(V)) |
5281 | return Ptr; |
5282 | if (auto *Gep = dyn_cast<GetElementPtrInst>(V)) |
5283 | return Gep->getPointerOperand(); |
5284 | return nullptr; |
5285 | } |
5286 | inline Value *getPointerOperand(Value *V) { |
5287 | return const_cast<Value *>(getPointerOperand(static_cast<const Value *>(V))); |
5288 | } |
5289 | |
5290 | /// A helper function that returns the alignment of load or store instruction. |
5291 | inline Align getLoadStoreAlignment(Value *I) { |
5292 | assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&((void)0) |
5293 | "Expected Load or Store instruction")((void)0); |
5294 | if (auto *LI = dyn_cast<LoadInst>(I)) |
5295 | return LI->getAlign(); |
5296 | return cast<StoreInst>(I)->getAlign(); |
5297 | } |
5298 | |
5299 | /// A helper function that returns the address space of the pointer operand of |
5300 | /// load or store instruction. |
5301 | inline unsigned getLoadStoreAddressSpace(Value *I) { |
5302 | assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&((void)0) |
5303 | "Expected Load or Store instruction")((void)0); |
5304 | if (auto *LI = dyn_cast<LoadInst>(I)) |
5305 | return LI->getPointerAddressSpace(); |
5306 | return cast<StoreInst>(I)->getPointerAddressSpace(); |
5307 | } |
5308 | |
5309 | /// A helper function that returns the type of a load or store instruction. |
5310 | inline Type *getLoadStoreType(Value *I) { |
5311 | assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&((void)0) |
5312 | "Expected Load or Store instruction")((void)0); |
5313 | if (auto *LI = dyn_cast<LoadInst>(I)) |
5314 | return LI->getType(); |
5315 | return cast<StoreInst>(I)->getValueOperand()->getType(); |
5316 | } |
5317 | |
5318 | //===----------------------------------------------------------------------===// |
5319 | // FreezeInst Class |
5320 | //===----------------------------------------------------------------------===// |
5321 | |
5322 | /// This class represents a freeze function that returns random concrete |
5323 | /// value if an operand is either a poison value or an undef value |
5324 | class FreezeInst : public UnaryInstruction { |
5325 | protected: |
5326 | // Note: Instruction needs to be a friend here to call cloneImpl. |
5327 | friend class Instruction; |
5328 | |
5329 | /// Clone an identical FreezeInst |
5330 | FreezeInst *cloneImpl() const; |
5331 | |
5332 | public: |
5333 | explicit FreezeInst(Value *S, |
5334 | const Twine &NameStr = "", |
5335 | Instruction *InsertBefore = nullptr); |
5336 | FreezeInst(Value *S, const Twine &NameStr, BasicBlock *InsertAtEnd); |
5337 | |
5338 | // Methods for support type inquiry through isa, cast, and dyn_cast: |
5339 | static inline bool classof(const Instruction *I) { |
5340 | return I->getOpcode() == Freeze; |
5341 | } |
5342 | static inline bool classof(const Value *V) { |
5343 | return isa<Instruction>(V) && classof(cast<Instruction>(V)); |
5344 | } |
5345 | }; |
5346 | |
5347 | } // end namespace llvm |
5348 | |
5349 | #endif // LLVM_IR_INSTRUCTIONS_H |
1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains types to represent alignments. | |||
10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
11 | // invalid manipulations. | |||
12 | // | |||
13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
15 | // | |||
16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
17 | // you can get the underlying Align type by using the getValue() method. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
23 | ||||
24 | #include "llvm/ADT/Optional.h" | |||
25 | #include "llvm/Support/MathExtras.h" | |||
26 | #include <cassert> | |||
27 | #ifndef NDEBUG1 | |||
28 | #include <string> | |||
29 | #endif // NDEBUG | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
35 | ||||
36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
37 | /// alignment. | |||
38 | /// It is suitable for use as static global constants. | |||
39 | struct Align { | |||
40 | private: | |||
41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
42 | /// ShiftValue is less than 64 by construction. | |||
43 | ||||
44 | friend struct MaybeAlign; | |||
45 | friend unsigned Log2(Align); | |||
46 | friend bool operator==(Align Lhs, Align Rhs); | |||
47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
50 | friend bool operator<(Align Lhs, Align Rhs); | |||
51 | friend bool operator>(Align Lhs, Align Rhs); | |||
52 | friend unsigned encode(struct MaybeAlign A); | |||
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
54 | ||||
55 | /// A trivial type to allow construction of constexpr Align. | |||
56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
57 | /// definition of constexpr assign operators. | |||
58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
62 | struct LogValue { | |||
63 | uint8_t Log; | |||
64 | }; | |||
65 | ||||
66 | public: | |||
67 | /// Default is byte-aligned. | |||
68 | constexpr Align() = default; | |||
69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
70 | /// checks have been performed when building `Other`. | |||
71 | constexpr Align(const Align &Other) = default; | |||
72 | constexpr Align(Align &&Other) = default; | |||
73 | Align &operator=(const Align &Other) = default; | |||
74 | Align &operator=(Align &&Other) = default; | |||
75 | ||||
76 | explicit Align(uint64_t Value) { | |||
77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
79 | ShiftValue = Log2_64(Value); | |||
80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
81 | } | |||
82 | ||||
83 | /// This is a hole in the type system and should not be abused. | |||
84 | /// Needed to interact with C for instance. | |||
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
86 | ||||
87 | /// Allow constructions of constexpr Align. | |||
88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
90 | } | |||
91 | ||||
92 | /// Allow constructions of constexpr Align from types. | |||
93 | /// Compile time equivalent to Align(alignof(T)). | |||
94 | template <typename T> constexpr static LogValue Of() { | |||
95 | return Constant<std::alignment_of<T>::value>(); | |||
96 | } | |||
97 | ||||
98 | /// Constexpr constructor from LogValue type. | |||
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
100 | }; | |||
101 | ||||
102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
103 | inline Align assumeAligned(uint64_t Value) { | |||
104 | return Value ? Align(Value) : Align(); | |||
105 | } | |||
106 | ||||
107 | /// This struct is a compact representation of a valid (power of two) or | |||
108 | /// undefined (0) alignment. | |||
109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
110 | private: | |||
111 | using UP = llvm::Optional<Align>; | |||
112 | ||||
113 | public: | |||
114 | /// Default is undefined. | |||
115 | MaybeAlign() = default; | |||
116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
117 | /// checks have been performed when building `Other`. | |||
118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
122 | ||||
123 | /// Use llvm::Optional<Align> constructor. | |||
124 | using UP::UP; | |||
125 | ||||
126 | explicit MaybeAlign(uint64_t Value) { | |||
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
129 | if (Value) | |||
130 | emplace(Value); | |||
131 | } | |||
132 | ||||
133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
135 | }; | |||
136 | ||||
137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
139 | return SizeInBytes % Lhs.value() == 0; | |||
140 | } | |||
141 | ||||
142 | /// Checks that Addr is a multiple of the alignment. | |||
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
145 | } | |||
146 | ||||
147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
149 | const uint64_t Value = A.value(); | |||
150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
151 | ||||
152 | // The division followed by a multiplication can be thought of as a right | |||
153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
155 | // are just zero. | |||
156 | ||||
157 | // Most compilers can generate this code but the pattern may be missed when | |||
158 | // multiple functions gets inlined. | |||
159 | return (Size + Value - 1) & ~(Value - 1U); | |||
160 | } | |||
161 | ||||
162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
165 | /// Skew mod \p A'. | |||
166 | /// | |||
167 | /// Examples: | |||
168 | /// \code | |||
169 | /// alignTo(5, Align(8), 7) = 7 | |||
170 | /// alignTo(17, Align(8), 1) = 17 | |||
171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
172 | /// \endcode | |||
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
174 | const uint64_t Value = A.value(); | |||
175 | Skew %= Value; | |||
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
177 | } | |||
178 | ||||
179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
180 | /// Returns `Size` if current alignment is undefined. | |||
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
183 | } | |||
184 | ||||
185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
189 | ArithAddr &&((void)0) | |||
190 | "Overflow")((void)0); | |||
191 | return alignTo(ArithAddr, Alignment); | |||
192 | } | |||
193 | ||||
194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
195 | /// or equal to \p Value and is a multiple of \p Align. | |||
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
197 | return alignTo(Value, Alignment) - Value; | |||
198 | } | |||
199 | ||||
200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
201 | /// bytes, rounding up. | |||
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
204 | } | |||
205 | ||||
206 | /// Returns the log2 of the alignment. | |||
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
208 | ||||
209 | /// Returns the alignment that satisfies both alignments. | |||
210 | /// Same semantic as MinAlign. | |||
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
212 | ||||
213 | /// Returns the alignment that satisfies both alignments. | |||
214 | /// Same semantic as MinAlign. | |||
215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
216 | return Align(MinAlign(A.value(), Offset)); | |||
217 | } | |||
218 | ||||
219 | /// Returns the alignment that satisfies both alignments. | |||
220 | /// Same semantic as MinAlign. | |||
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
223 | } | |||
224 | ||||
225 | /// Returns the alignment that satisfies both alignments. | |||
226 | /// Same semantic as MinAlign. | |||
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
229 | } | |||
230 | ||||
231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
233 | ||||
234 | /// Dual operation of the encode function above. | |||
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
236 | if (Value == 0) | |||
237 | return MaybeAlign(); | |||
238 | Align Out; | |||
239 | Out.ShiftValue = Value - 1; | |||
240 | return Out; | |||
241 | } | |||
242 | ||||
243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
244 | /// definition. | |||
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
246 | ||||
247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
250 | return Lhs.value() == Rhs; | |||
251 | } | |||
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
254 | return Lhs.value() != Rhs; | |||
255 | } | |||
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
258 | return Lhs.value() <= Rhs; | |||
259 | } | |||
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
262 | return Lhs.value() >= Rhs; | |||
263 | } | |||
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
266 | return Lhs.value() < Rhs; | |||
267 | } | |||
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
270 | return Lhs.value() > Rhs; | |||
271 | } | |||
272 | ||||
273 | /// Comparisons between MaybeAlign and scalars. | |||
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
276 | } | |||
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
279 | } | |||
280 | ||||
281 | /// Comparisons operators between Align. | |||
282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
284 | } | |||
285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
287 | } | |||
288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
290 | } | |||
291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
293 | } | |||
294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
296 | } | |||
297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
299 | } | |||
300 | ||||
301 | // Don't allow relational comparisons with MaybeAlign. | |||
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
306 | ||||
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
311 | ||||
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
316 | ||||
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
319 | return Align(Lhs.value() * Rhs); | |||
320 | } | |||
321 | ||||
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
325 | } | |||
326 | ||||
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
329 | "Divisor must be positive and a power of 2")((void)0); | |||
330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
331 | return Align(Lhs.value() / Divisor); | |||
332 | } | |||
333 | ||||
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
336 | "Divisor must be positive and a power of 2")((void)0); | |||
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
338 | } | |||
339 | ||||
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
342 | } | |||
343 | ||||
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
346 | } | |||
347 | ||||
348 | #ifndef NDEBUG1 | |||
349 | // For usage in LLVM_DEBUG macros. | |||
350 | inline std::string DebugStr(const Align &A) { | |||
351 | return std::to_string(A.value()); | |||
352 | } | |||
353 | // For usage in LLVM_DEBUG macros. | |||
354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
355 | if (MA) | |||
356 | return std::to_string(MA->value()); | |||
357 | return "None"; | |||
358 | } | |||
359 | #endif // NDEBUG | |||
360 | ||||
361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
362 | ||||
363 | } // namespace llvm | |||
364 | ||||
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains some functions that are useful for math stuff. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |
14 | #define LLVM_SUPPORT_MATHEXTRAS_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include <cassert> |
18 | #include <climits> |
19 | #include <cmath> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <type_traits> |
24 | |
25 | #ifdef __ANDROID_NDK__ |
26 | #include <android/api-level.h> |
27 | #endif |
28 | |
29 | #ifdef _MSC_VER |
30 | // Declare these intrinsics manually rather including intrin.h. It's very |
31 | // expensive, and MathExtras.h is popular. |
32 | // #include <intrin.h> |
33 | extern "C" { |
34 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); |
35 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); |
36 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); |
37 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); |
38 | } |
39 | #endif |
40 | |
41 | namespace llvm { |
42 | |
43 | /// The behavior an operation has on an input of 0. |
44 | enum ZeroBehavior { |
45 | /// The returned value is undefined. |
46 | ZB_Undefined, |
47 | /// The returned value is numeric_limits<T>::max() |
48 | ZB_Max, |
49 | /// The returned value is numeric_limits<T>::digits |
50 | ZB_Width |
51 | }; |
52 | |
53 | /// Mathematical constants. |
54 | namespace numbers { |
55 | // TODO: Track C++20 std::numbers. |
56 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |
57 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |
58 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |
59 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |
60 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |
61 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |
62 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |
63 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |
64 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |
65 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |
66 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |
67 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |
68 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |
69 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |
70 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |
71 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |
72 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |
73 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |
74 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |
75 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |
76 | log2ef = 1.44269504F, // (0x1.715476P+0) |
77 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |
78 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |
79 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |
80 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |
81 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |
82 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |
83 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |
84 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |
85 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |
86 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |
87 | } // namespace numbers |
88 | |
89 | namespace detail { |
90 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { |
91 | static unsigned count(T Val, ZeroBehavior) { |
92 | if (!Val) |
93 | return std::numeric_limits<T>::digits; |
94 | if (Val & 0x1) |
95 | return 0; |
96 | |
97 | // Bisection method. |
98 | unsigned ZeroBits = 0; |
99 | T Shift = std::numeric_limits<T>::digits >> 1; |
100 | T Mask = std::numeric_limits<T>::max() >> Shift; |
101 | while (Shift) { |
102 | if ((Val & Mask) == 0) { |
103 | Val >>= Shift; |
104 | ZeroBits |= Shift; |
105 | } |
106 | Shift >>= 1; |
107 | Mask >>= Shift; |
108 | } |
109 | return ZeroBits; |
110 | } |
111 | }; |
112 | |
113 | #if defined(__GNUC__4) || defined(_MSC_VER) |
114 | template <typename T> struct TrailingZerosCounter<T, 4> { |
115 | static unsigned count(T Val, ZeroBehavior ZB) { |
116 | if (ZB != ZB_Undefined && Val == 0) |
117 | return 32; |
118 | |
119 | #if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4) |
120 | return __builtin_ctz(Val); |
121 | #elif defined(_MSC_VER) |
122 | unsigned long Index; |
123 | _BitScanForward(&Index, Val); |
124 | return Index; |
125 | #endif |
126 | } |
127 | }; |
128 | |
129 | #if !defined(_MSC_VER) || defined(_M_X64) |
130 | template <typename T> struct TrailingZerosCounter<T, 8> { |
131 | static unsigned count(T Val, ZeroBehavior ZB) { |
132 | if (ZB != ZB_Undefined && Val == 0) |
133 | return 64; |
134 | |
135 | #if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4) |
136 | return __builtin_ctzll(Val); |
137 | #elif defined(_MSC_VER) |
138 | unsigned long Index; |
139 | _BitScanForward64(&Index, Val); |
140 | return Index; |
141 | #endif |
142 | } |
143 | }; |
144 | #endif |
145 | #endif |
146 | } // namespace detail |
147 | |
148 | /// Count number of 0's from the least significant bit to the most |
149 | /// stopping at the first 1. |
150 | /// |
151 | /// Only unsigned integral types are allowed. |
152 | /// |
153 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
154 | /// valid arguments. |
155 | template <typename T> |
156 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
157 | static_assert(std::numeric_limits<T>::is_integer && |
158 | !std::numeric_limits<T>::is_signed, |
159 | "Only unsigned integral types are allowed."); |
160 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
161 | } |
162 | |
163 | namespace detail { |
164 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { |
165 | static unsigned count(T Val, ZeroBehavior) { |
166 | if (!Val) |
167 | return std::numeric_limits<T>::digits; |
168 | |
169 | // Bisection method. |
170 | unsigned ZeroBits = 0; |
171 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { |
172 | T Tmp = Val >> Shift; |
173 | if (Tmp) |
174 | Val = Tmp; |
175 | else |
176 | ZeroBits |= Shift; |
177 | } |
178 | return ZeroBits; |
179 | } |
180 | }; |
181 | |
182 | #if defined(__GNUC__4) || defined(_MSC_VER) |
183 | template <typename T> struct LeadingZerosCounter<T, 4> { |
184 | static unsigned count(T Val, ZeroBehavior ZB) { |
185 | if (ZB != ZB_Undefined && Val == 0) |
186 | return 32; |
187 | |
188 | #if __has_builtin(__builtin_clz)1 || defined(__GNUC__4) |
189 | return __builtin_clz(Val); |
190 | #elif defined(_MSC_VER) |
191 | unsigned long Index; |
192 | _BitScanReverse(&Index, Val); |
193 | return Index ^ 31; |
194 | #endif |
195 | } |
196 | }; |
197 | |
198 | #if !defined(_MSC_VER) || defined(_M_X64) |
199 | template <typename T> struct LeadingZerosCounter<T, 8> { |
200 | static unsigned count(T Val, ZeroBehavior ZB) { |
201 | if (ZB != ZB_Undefined && Val == 0) |
202 | return 64; |
203 | |
204 | #if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4) |
205 | return __builtin_clzll(Val); |
206 | #elif defined(_MSC_VER) |
207 | unsigned long Index; |
208 | _BitScanReverse64(&Index, Val); |
209 | return Index ^ 63; |
210 | #endif |
211 | } |
212 | }; |
213 | #endif |
214 | #endif |
215 | } // namespace detail |
216 | |
217 | /// Count number of 0's from the most significant bit to the least |
218 | /// stopping at the first 1. |
219 | /// |
220 | /// Only unsigned integral types are allowed. |
221 | /// |
222 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
223 | /// valid arguments. |
224 | template <typename T> |
225 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
226 | static_assert(std::numeric_limits<T>::is_integer && |
227 | !std::numeric_limits<T>::is_signed, |
228 | "Only unsigned integral types are allowed."); |
229 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
230 | } |
231 | |
232 | /// Get the index of the first set bit starting from the least |
233 | /// significant bit. |
234 | /// |
235 | /// Only unsigned integral types are allowed. |
236 | /// |
237 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
238 | /// valid arguments. |
239 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { |
240 | if (ZB == ZB_Max && Val == 0) |
241 | return std::numeric_limits<T>::max(); |
242 | |
243 | return countTrailingZeros(Val, ZB_Undefined); |
244 | } |
245 | |
246 | /// Create a bitmask with the N right-most bits set to 1, and all other |
247 | /// bits set to 0. Only unsigned types are allowed. |
248 | template <typename T> T maskTrailingOnes(unsigned N) { |
249 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); |
250 | const unsigned Bits = CHAR_BIT8 * sizeof(T); |
251 | assert(N <= Bits && "Invalid bit index")((void)0); |
252 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |
253 | } |
254 | |
255 | /// Create a bitmask with the N left-most bits set to 1, and all other |
256 | /// bits set to 0. Only unsigned types are allowed. |
257 | template <typename T> T maskLeadingOnes(unsigned N) { |
258 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
259 | } |
260 | |
261 | /// Create a bitmask with the N right-most bits set to 0, and all other |
262 | /// bits set to 1. Only unsigned types are allowed. |
263 | template <typename T> T maskTrailingZeros(unsigned N) { |
264 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
265 | } |
266 | |
267 | /// Create a bitmask with the N left-most bits set to 0, and all other |
268 | /// bits set to 1. Only unsigned types are allowed. |
269 | template <typename T> T maskLeadingZeros(unsigned N) { |
270 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
271 | } |
272 | |
273 | /// Get the index of the last set bit starting from the least |
274 | /// significant bit. |
275 | /// |
276 | /// Only unsigned integral types are allowed. |
277 | /// |
278 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
279 | /// valid arguments. |
280 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { |
281 | if (ZB == ZB_Max && Val == 0) |
282 | return std::numeric_limits<T>::max(); |
283 | |
284 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ |
285 | // in the __builtin_clz intrinsic on x86. |
286 | return countLeadingZeros(Val, ZB_Undefined) ^ |
287 | (std::numeric_limits<T>::digits - 1); |
288 | } |
289 | |
290 | /// Macro compressed bit reversal table for 256 bits. |
291 | /// |
292 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |
293 | static const unsigned char BitReverseTable256[256] = { |
294 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |
295 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |
296 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |
297 | R6(0), R6(2), R6(1), R6(3) |
298 | #undef R2 |
299 | #undef R4 |
300 | #undef R6 |
301 | }; |
302 | |
303 | /// Reverse the bits in \p Val. |
304 | template <typename T> |
305 | T reverseBits(T Val) { |
306 | unsigned char in[sizeof(Val)]; |
307 | unsigned char out[sizeof(Val)]; |
308 | std::memcpy(in, &Val, sizeof(Val)); |
309 | for (unsigned i = 0; i < sizeof(Val); ++i) |
310 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |
311 | std::memcpy(&Val, out, sizeof(Val)); |
312 | return Val; |
313 | } |
314 | |
315 | #if __has_builtin(__builtin_bitreverse8)1 |
316 | template<> |
317 | inline uint8_t reverseBits<uint8_t>(uint8_t Val) { |
318 | return __builtin_bitreverse8(Val); |
319 | } |
320 | #endif |
321 | |
322 | #if __has_builtin(__builtin_bitreverse16)1 |
323 | template<> |
324 | inline uint16_t reverseBits<uint16_t>(uint16_t Val) { |
325 | return __builtin_bitreverse16(Val); |
326 | } |
327 | #endif |
328 | |
329 | #if __has_builtin(__builtin_bitreverse32)1 |
330 | template<> |
331 | inline uint32_t reverseBits<uint32_t>(uint32_t Val) { |
332 | return __builtin_bitreverse32(Val); |
333 | } |
334 | #endif |
335 | |
336 | #if __has_builtin(__builtin_bitreverse64)1 |
337 | template<> |
338 | inline uint64_t reverseBits<uint64_t>(uint64_t Val) { |
339 | return __builtin_bitreverse64(Val); |
340 | } |
341 | #endif |
342 | |
343 | // NOTE: The following support functions use the _32/_64 extensions instead of |
344 | // type overloading so that signed and unsigned integers can be used without |
345 | // ambiguity. |
346 | |
347 | /// Return the high 32 bits of a 64 bit value. |
348 | constexpr inline uint32_t Hi_32(uint64_t Value) { |
349 | return static_cast<uint32_t>(Value >> 32); |
350 | } |
351 | |
352 | /// Return the low 32 bits of a 64 bit value. |
353 | constexpr inline uint32_t Lo_32(uint64_t Value) { |
354 | return static_cast<uint32_t>(Value); |
355 | } |
356 | |
357 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |
358 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |
359 | return ((uint64_t)High << 32) | (uint64_t)Low; |
360 | } |
361 | |
362 | /// Checks if an integer fits into the given bit width. |
363 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |
364 | return N >= 64 || (-(INT64_C(1)1LL<<(N-1)) <= x && x < (INT64_C(1)1LL<<(N-1))); |
365 | } |
366 | // Template specializations to get better code for common cases. |
367 | template <> constexpr inline bool isInt<8>(int64_t x) { |
368 | return static_cast<int8_t>(x) == x; |
369 | } |
370 | template <> constexpr inline bool isInt<16>(int64_t x) { |
371 | return static_cast<int16_t>(x) == x; |
372 | } |
373 | template <> constexpr inline bool isInt<32>(int64_t x) { |
374 | return static_cast<int32_t>(x) == x; |
375 | } |
376 | |
377 | /// Checks if a signed integer is an N bit number shifted left by S. |
378 | template <unsigned N, unsigned S> |
379 | constexpr inline bool isShiftedInt(int64_t x) { |
380 | static_assert( |
381 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |
382 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |
383 | return isInt<N + S>(x) && (x % (UINT64_C(1)1ULL << S) == 0); |
384 | } |
385 | |
386 | /// Checks if an unsigned integer fits into the given bit width. |
387 | /// |
388 | /// This is written as two functions rather than as simply |
389 | /// |
390 | /// return N >= 64 || X < (UINT64_C(1) << N); |
391 | /// |
392 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting |
393 | /// left too many places. |
394 | template <unsigned N> |
395 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { |
396 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |
397 | return X < (UINT64_C(1)1ULL << (N)); |
398 | } |
399 | template <unsigned N> |
400 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) { |
401 | return true; |
402 | } |
403 | |
404 | // Template specializations to get better code for common cases. |
405 | template <> constexpr inline bool isUInt<8>(uint64_t x) { |
406 | return static_cast<uint8_t>(x) == x; |
407 | } |
408 | template <> constexpr inline bool isUInt<16>(uint64_t x) { |
409 | return static_cast<uint16_t>(x) == x; |
410 | } |
411 | template <> constexpr inline bool isUInt<32>(uint64_t x) { |
412 | return static_cast<uint32_t>(x) == x; |
413 | } |
414 | |
415 | /// Checks if a unsigned integer is an N bit number shifted left by S. |
416 | template <unsigned N, unsigned S> |
417 | constexpr inline bool isShiftedUInt(uint64_t x) { |
418 | static_assert( |
419 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |
420 | static_assert(N + S <= 64, |
421 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |
422 | // Per the two static_asserts above, S must be strictly less than 64. So |
423 | // 1 << S is not undefined behavior. |
424 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1ULL << S) == 0); |
425 | } |
426 | |
427 | /// Gets the maximum value for a N-bit unsigned integer. |
428 | inline uint64_t maxUIntN(uint64_t N) { |
429 | assert(N > 0 && N <= 64 && "integer width out of range")((void)0); |
430 | |
431 | // uint64_t(1) << 64 is undefined behavior, so we can't do |
432 | // (uint64_t(1) << N) - 1 |
433 | // without checking first that N != 64. But this works and doesn't have a |
434 | // branch. |
435 | return UINT64_MAX0xffffffffffffffffULL >> (64 - N); |
436 | } |
437 | |
438 | /// Gets the minimum value for a N-bit signed integer. |
439 | inline int64_t minIntN(int64_t N) { |
440 | assert(N > 0 && N <= 64 && "integer width out of range")((void)0); |
441 | |
442 | return UINT64_C(1)1ULL + ~(UINT64_C(1)1ULL << (N - 1)); |
443 | } |
444 | |
445 | /// Gets the maximum value for a N-bit signed integer. |
446 | inline int64_t maxIntN(int64_t N) { |
447 | assert(N > 0 && N <= 64 && "integer width out of range")((void)0); |
448 | |
449 | // This relies on two's complement wraparound when N == 64, so we convert to |
450 | // int64_t only at the very end to avoid UB. |
451 | return (UINT64_C(1)1ULL << (N - 1)) - 1; |
452 | } |
453 | |
454 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |
455 | inline bool isUIntN(unsigned N, uint64_t x) { |
456 | return N >= 64 || x <= maxUIntN(N); |
457 | } |
458 | |
459 | /// Checks if an signed integer fits into the given (dynamic) bit width. |
460 | inline bool isIntN(unsigned N, int64_t x) { |
461 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |
462 | } |
463 | |
464 | /// Return true if the argument is a non-empty sequence of ones starting at the |
465 | /// least significant bit with the remainder zero (32 bit version). |
466 | /// Ex. isMask_32(0x0000FFFFU) == true. |
467 | constexpr inline bool isMask_32(uint32_t Value) { |
468 | return Value && ((Value + 1) & Value) == 0; |
469 | } |
470 | |
471 | /// Return true if the argument is a non-empty sequence of ones starting at the |
472 | /// least significant bit with the remainder zero (64 bit version). |
473 | constexpr inline bool isMask_64(uint64_t Value) { |
474 | return Value && ((Value + 1) & Value) == 0; |
475 | } |
476 | |
477 | /// Return true if the argument contains a non-empty sequence of ones with the |
478 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
479 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |
480 | return Value && isMask_32((Value - 1) | Value); |
481 | } |
482 | |
483 | /// Return true if the argument contains a non-empty sequence of ones with the |
484 | /// remainder zero (64 bit version.) |
485 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |
486 | return Value && isMask_64((Value - 1) | Value); |
487 | } |
488 | |
489 | /// Return true if the argument is a power of two > 0. |
490 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |
491 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |
492 | return Value && !(Value & (Value - 1)); |
493 | } |
494 | |
495 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |
496 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |
497 | return Value && !(Value & (Value - 1)); |
498 | } |
499 | |
500 | /// Count the number of ones from the most significant bit to the first |
501 | /// zero bit. |
502 | /// |
503 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |
504 | /// Only unsigned integral types are allowed. |
505 | /// |
506 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
507 | /// ZB_Undefined are valid arguments. |
508 | template <typename T> |
509 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
510 | static_assert(std::numeric_limits<T>::is_integer && |
511 | !std::numeric_limits<T>::is_signed, |
512 | "Only unsigned integral types are allowed."); |
513 | return countLeadingZeros<T>(~Value, ZB); |
514 | } |
515 | |
516 | /// Count the number of ones from the least significant bit to the first |
517 | /// zero bit. |
518 | /// |
519 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |
520 | /// Only unsigned integral types are allowed. |
521 | /// |
522 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
523 | /// ZB_Undefined are valid arguments. |
524 | template <typename T> |
525 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
526 | static_assert(std::numeric_limits<T>::is_integer && |
527 | !std::numeric_limits<T>::is_signed, |
528 | "Only unsigned integral types are allowed."); |
529 | return countTrailingZeros<T>(~Value, ZB); |
530 | } |
531 | |
532 | namespace detail { |
533 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { |
534 | static unsigned count(T Value) { |
535 | // Generic version, forward to 32 bits. |
536 | static_assert(SizeOfT <= 4, "Not implemented!"); |
537 | #if defined(__GNUC__4) |
538 | return __builtin_popcount(Value); |
539 | #else |
540 | uint32_t v = Value; |
541 | v = v - ((v >> 1) & 0x55555555); |
542 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
543 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; |
544 | #endif |
545 | } |
546 | }; |
547 | |
548 | template <typename T> struct PopulationCounter<T, 8> { |
549 | static unsigned count(T Value) { |
550 | #if defined(__GNUC__4) |
551 | return __builtin_popcountll(Value); |
552 | #else |
553 | uint64_t v = Value; |
554 | v = v - ((v >> 1) & 0x5555555555555555ULL); |
555 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); |
556 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |
557 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); |
558 | #endif |
559 | } |
560 | }; |
561 | } // namespace detail |
562 | |
563 | /// Count the number of set bits in a value. |
564 | /// Ex. countPopulation(0xF000F000) = 8 |
565 | /// Returns 0 if the word is zero. |
566 | template <typename T> |
567 | inline unsigned countPopulation(T Value) { |
568 | static_assert(std::numeric_limits<T>::is_integer && |
569 | !std::numeric_limits<T>::is_signed, |
570 | "Only unsigned integral types are allowed."); |
571 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); |
572 | } |
573 | |
574 | /// Compile time Log2. |
575 | /// Valid only for positive powers of two. |
576 | template <size_t kValue> constexpr inline size_t CTLog2() { |
577 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |
578 | "Value is not a valid power of 2"); |
579 | return 1 + CTLog2<kValue / 2>(); |
580 | } |
581 | |
582 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |
583 | |
584 | /// Return the log base 2 of the specified value. |
585 | inline double Log2(double Value) { |
586 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 |
587 | return __builtin_log(Value) / __builtin_log(2.0); |
588 | #else |
589 | return log2(Value); |
590 | #endif |
591 | } |
592 | |
593 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
594 | /// (32 bit edition.) |
595 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |
596 | inline unsigned Log2_32(uint32_t Value) { |
597 | return 31 - countLeadingZeros(Value); |
598 | } |
599 | |
600 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
601 | /// (64 bit edition.) |
602 | inline unsigned Log2_64(uint64_t Value) { |
603 | return 63 - countLeadingZeros(Value); |
604 | } |
605 | |
606 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |
607 | /// (32 bit edition). |
608 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |
609 | inline unsigned Log2_32_Ceil(uint32_t Value) { |
610 | return 32 - countLeadingZeros(Value - 1); |
611 | } |
612 | |
613 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |
614 | /// (64 bit edition.) |
615 | inline unsigned Log2_64_Ceil(uint64_t Value) { |
616 | return 64 - countLeadingZeros(Value - 1); |
617 | } |
618 | |
619 | /// Return the greatest common divisor of the values using Euclid's algorithm. |
620 | template <typename T> |
621 | inline T greatestCommonDivisor(T A, T B) { |
622 | while (B) { |
623 | T Tmp = B; |
624 | B = A % B; |
625 | A = Tmp; |
626 | } |
627 | return A; |
628 | } |
629 | |
630 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { |
631 | return greatestCommonDivisor<uint64_t>(A, B); |
632 | } |
633 | |
634 | /// This function takes a 64-bit integer and returns the bit equivalent double. |
635 | inline double BitsToDouble(uint64_t Bits) { |
636 | double D; |
637 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
638 | memcpy(&D, &Bits, sizeof(Bits)); |
639 | return D; |
640 | } |
641 | |
642 | /// This function takes a 32-bit integer and returns the bit equivalent float. |
643 | inline float BitsToFloat(uint32_t Bits) { |
644 | float F; |
645 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
646 | memcpy(&F, &Bits, sizeof(Bits)); |
647 | return F; |
648 | } |
649 | |
650 | /// This function takes a double and returns the bit equivalent 64-bit integer. |
651 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |
652 | /// notably x86, so this routine cannot be used if these bits are needed. |
653 | inline uint64_t DoubleToBits(double Double) { |
654 | uint64_t Bits; |
655 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
656 | memcpy(&Bits, &Double, sizeof(Double)); |
657 | return Bits; |
658 | } |
659 | |
660 | /// This function takes a float and returns the bit equivalent 32-bit integer. |
661 | /// Note that copying floats around changes the bits of NaNs on some hosts, |
662 | /// notably x86, so this routine cannot be used if these bits are needed. |
663 | inline uint32_t FloatToBits(float Float) { |
664 | uint32_t Bits; |
665 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
666 | memcpy(&Bits, &Float, sizeof(Float)); |
667 | return Bits; |
668 | } |
669 | |
670 | /// A and B are either alignments or offsets. Return the minimum alignment that |
671 | /// may be assumed after adding the two together. |
672 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |
673 | // The largest power of 2 that divides both A and B. |
674 | // |
675 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |
676 | // MSVC warning C4146 |
677 | // return (A | B) & -(A | B); |
678 | return (A | B) & (1 + ~(A | B)); |
679 | } |
680 | |
681 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |
682 | /// Returns zero on overflow. |
683 | inline uint64_t NextPowerOf2(uint64_t A) { |
684 | A |= (A >> 1); |
685 | A |= (A >> 2); |
686 | A |= (A >> 4); |
687 | A |= (A >> 8); |
688 | A |= (A >> 16); |
689 | A |= (A >> 32); |
690 | return A + 1; |
691 | } |
692 | |
693 | /// Returns the power of two which is less than or equal to the given value. |
694 | /// Essentially, it is a floor operation across the domain of powers of two. |
695 | inline uint64_t PowerOf2Floor(uint64_t A) { |
696 | if (!A) return 0; |
697 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); |
698 | } |
699 | |
700 | /// Returns the power of two which is greater than or equal to the given value. |
701 | /// Essentially, it is a ceil operation across the domain of powers of two. |
702 | inline uint64_t PowerOf2Ceil(uint64_t A) { |
703 | if (!A) |
704 | return 0; |
705 | return NextPowerOf2(A - 1); |
706 | } |
707 | |
708 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
709 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |
710 | /// |
711 | /// If non-zero \p Skew is specified, the return value will be a minimal |
712 | /// integer that is greater than or equal to \p Value and equal to |
713 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than |
714 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. |
715 | /// |
716 | /// Examples: |
717 | /// \code |
718 | /// alignTo(5, 8) = 8 |
719 | /// alignTo(17, 8) = 24 |
720 | /// alignTo(~0LL, 8) = 0 |
721 | /// alignTo(321, 255) = 510 |
722 | /// |
723 | /// alignTo(5, 8, 7) = 7 |
724 | /// alignTo(17, 8, 1) = 17 |
725 | /// alignTo(~0LL, 8, 3) = 3 |
726 | /// alignTo(321, 255, 42) = 552 |
727 | /// \endcode |
728 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
729 | assert(Align != 0u && "Align can't be 0.")((void)0); |
730 | Skew %= Align; |
731 | return (Value + Align - 1 - Skew) / Align * Align + Skew; |
732 | } |
733 | |
734 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
735 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |
736 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |
737 | static_assert(Align != 0u, "Align must be non-zero"); |
738 | return (Value + Align - 1) / Align * Align; |
739 | } |
740 | |
741 | /// Returns the integer ceil(Numerator / Denominator). |
742 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |
743 | return alignTo(Numerator, Denominator) / Denominator; |
744 | } |
745 | |
746 | /// Returns the integer nearest(Numerator / Denominator). |
747 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |
748 | return (Numerator + (Denominator / 2)) / Denominator; |
749 | } |
750 | |
751 | /// Returns the largest uint64_t less than or equal to \p Value and is |
752 | /// \p Skew mod \p Align. \p Align must be non-zero |
753 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
754 | assert(Align != 0u && "Align can't be 0.")((void)0); |
755 | Skew %= Align; |
756 | return (Value - Skew) / Align * Align + Skew; |
757 | } |
758 | |
759 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
760 | /// Requires 0 < B <= 32. |
761 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |
762 | static_assert(B > 0, "Bit width can't be 0."); |
763 | static_assert(B <= 32, "Bit width out of range."); |
764 | return int32_t(X << (32 - B)) >> (32 - B); |
765 | } |
766 | |
767 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
768 | /// Requires 0 < B <= 32. |
769 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |
770 | assert(B > 0 && "Bit width can't be 0.")((void)0); |
771 | assert(B <= 32 && "Bit width out of range.")((void)0); |
772 | return int32_t(X << (32 - B)) >> (32 - B); |
773 | } |
774 | |
775 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
776 | /// Requires 0 < B <= 64. |
777 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |
778 | static_assert(B > 0, "Bit width can't be 0."); |
779 | static_assert(B <= 64, "Bit width out of range."); |
780 | return int64_t(x << (64 - B)) >> (64 - B); |
781 | } |
782 | |
783 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
784 | /// Requires 0 < B <= 64. |
785 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |
786 | assert(B > 0 && "Bit width can't be 0.")((void)0); |
787 | assert(B <= 64 && "Bit width out of range.")((void)0); |
788 | return int64_t(X << (64 - B)) >> (64 - B); |
789 | } |
790 | |
791 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |
792 | /// value of the result. |
793 | template <typename T> |
794 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { |
795 | return X > Y ? (X - Y) : (Y - X); |
796 | } |
797 | |
798 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |
799 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
800 | /// the result is larger than the maximum representable value of type T. |
801 | template <typename T> |
802 | std::enable_if_t<std::is_unsigned<T>::value, T> |
803 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |
804 | bool Dummy; |
805 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
806 | // Hacker's Delight, p. 29 |
807 | T Z = X + Y; |
808 | Overflowed = (Z < X || Z < Y); |
809 | if (Overflowed) |
810 | return std::numeric_limits<T>::max(); |
811 | else |
812 | return Z; |
813 | } |
814 | |
815 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |
816 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
817 | /// the result is larger than the maximum representable value of type T. |
818 | template <typename T> |
819 | std::enable_if_t<std::is_unsigned<T>::value, T> |
820 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |
821 | bool Dummy; |
822 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
823 | |
824 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |
825 | // because it fails for uint16_t (where multiplication can have undefined |
826 | // behavior due to promotion to int), and requires a division in addition |
827 | // to the multiplication. |
828 | |
829 | Overflowed = false; |
830 | |
831 | // Log2(Z) would be either Log2Z or Log2Z + 1. |
832 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |
833 | // will necessarily be less than Log2Max as desired. |
834 | int Log2Z = Log2_64(X) + Log2_64(Y); |
835 | const T Max = std::numeric_limits<T>::max(); |
836 | int Log2Max = Log2_64(Max); |
837 | if (Log2Z < Log2Max) { |
838 | return X * Y; |
839 | } |
840 | if (Log2Z > Log2Max) { |
841 | Overflowed = true; |
842 | return Max; |
843 | } |
844 | |
845 | // We're going to use the top bit, and maybe overflow one |
846 | // bit past it. Multiply all but the bottom bit then add |
847 | // that on at the end. |
848 | T Z = (X >> 1) * Y; |
849 | if (Z & ~(Max >> 1)) { |
850 | Overflowed = true; |
851 | return Max; |
852 | } |
853 | Z <<= 1; |
854 | if (X & 1) |
855 | return SaturatingAdd(Z, Y, ResultOverflowed); |
856 | |
857 | return Z; |
858 | } |
859 | |
860 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |
861 | /// the product. Clamp the result to the maximum representable value of T on |
862 | /// overflow. ResultOverflowed indicates if the result is larger than the |
863 | /// maximum representable value of type T. |
864 | template <typename T> |
865 | std::enable_if_t<std::is_unsigned<T>::value, T> |
866 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |
867 | bool Dummy; |
868 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
869 | |
870 | T Product = SaturatingMultiply(X, Y, &Overflowed); |
871 | if (Overflowed) |
872 | return Product; |
873 | |
874 | return SaturatingAdd(A, Product, &Overflowed); |
875 | } |
876 | |
877 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |
878 | extern const float huge_valf; |
879 | |
880 | |
881 | /// Add two signed integers, computing the two's complement truncated result, |
882 | /// returning true if overflow occured. |
883 | template <typename T> |
884 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { |
885 | #if __has_builtin(__builtin_add_overflow)1 |
886 | return __builtin_add_overflow(X, Y, &Result); |
887 | #else |
888 | // Perform the unsigned addition. |
889 | using U = std::make_unsigned_t<T>; |
890 | const U UX = static_cast<U>(X); |
891 | const U UY = static_cast<U>(Y); |
892 | const U UResult = UX + UY; |
893 | |
894 | // Convert to signed. |
895 | Result = static_cast<T>(UResult); |
896 | |
897 | // Adding two positive numbers should result in a positive number. |
898 | if (X > 0 && Y > 0) |
899 | return Result <= 0; |
900 | // Adding two negatives should result in a negative number. |
901 | if (X < 0 && Y < 0) |
902 | return Result >= 0; |
903 | return false; |
904 | #endif |
905 | } |
906 | |
907 | /// Subtract two signed integers, computing the two's complement truncated |
908 | /// result, returning true if an overflow ocurred. |
909 | template <typename T> |
910 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { |
911 | #if __has_builtin(__builtin_sub_overflow)1 |
912 | return __builtin_sub_overflow(X, Y, &Result); |
913 | #else |
914 | // Perform the unsigned addition. |
915 | using U = std::make_unsigned_t<T>; |
916 | const U UX = static_cast<U>(X); |
917 | const U UY = static_cast<U>(Y); |
918 | const U UResult = UX - UY; |
919 | |
920 | // Convert to signed. |
921 | Result = static_cast<T>(UResult); |
922 | |
923 | // Subtracting a positive number from a negative results in a negative number. |
924 | if (X <= 0 && Y > 0) |
925 | return Result >= 0; |
926 | // Subtracting a negative number from a positive results in a positive number. |
927 | if (X >= 0 && Y < 0) |
928 | return Result <= 0; |
929 | return false; |
930 | #endif |
931 | } |
932 | |
933 | /// Multiply two signed integers, computing the two's complement truncated |
934 | /// result, returning true if an overflow ocurred. |
935 | template <typename T> |
936 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { |
937 | // Perform the unsigned multiplication on absolute values. |
938 | using U = std::make_unsigned_t<T>; |
939 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |
940 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |
941 | const U UResult = UX * UY; |
942 | |
943 | // Convert to signed. |
944 | const bool IsNegative = (X < 0) ^ (Y < 0); |
945 | Result = IsNegative ? (0 - UResult) : UResult; |
946 | |
947 | // If any of the args was 0, result is 0 and no overflow occurs. |
948 | if (UX == 0 || UY == 0) |
949 | return false; |
950 | |
951 | // UX and UY are in [1, 2^n], where n is the number of digits. |
952 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |
953 | // positive) divided by an argument compares to the other. |
954 | if (IsNegative) |
955 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |
956 | else |
957 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |
958 | } |
959 | |
960 | } // End llvm namespace |
961 | |
962 | #endif |