File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/AtomicExpandPass.cpp |
Warning: | line 1742, column 8 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file contains a pass (at IR level) to replace atomic instructions with | ||||
10 | // __atomic_* library calls, or target specific instruction which implement the | ||||
11 | // same semantics in a way which better fits the target backend. This can | ||||
12 | // include the use of (intrinsic-based) load-linked/store-conditional loops, | ||||
13 | // AtomicCmpXchg, or type coercions. | ||||
14 | // | ||||
15 | //===----------------------------------------------------------------------===// | ||||
16 | |||||
17 | #include "llvm/ADT/ArrayRef.h" | ||||
18 | #include "llvm/ADT/STLExtras.h" | ||||
19 | #include "llvm/ADT/SmallVector.h" | ||||
20 | #include "llvm/CodeGen/AtomicExpandUtils.h" | ||||
21 | #include "llvm/CodeGen/RuntimeLibcalls.h" | ||||
22 | #include "llvm/CodeGen/TargetLowering.h" | ||||
23 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||
24 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||
25 | #include "llvm/CodeGen/ValueTypes.h" | ||||
26 | #include "llvm/IR/Attributes.h" | ||||
27 | #include "llvm/IR/BasicBlock.h" | ||||
28 | #include "llvm/IR/Constant.h" | ||||
29 | #include "llvm/IR/Constants.h" | ||||
30 | #include "llvm/IR/DataLayout.h" | ||||
31 | #include "llvm/IR/DerivedTypes.h" | ||||
32 | #include "llvm/IR/Function.h" | ||||
33 | #include "llvm/IR/IRBuilder.h" | ||||
34 | #include "llvm/IR/InstIterator.h" | ||||
35 | #include "llvm/IR/Instruction.h" | ||||
36 | #include "llvm/IR/Instructions.h" | ||||
37 | #include "llvm/IR/Module.h" | ||||
38 | #include "llvm/IR/Type.h" | ||||
39 | #include "llvm/IR/User.h" | ||||
40 | #include "llvm/IR/Value.h" | ||||
41 | #include "llvm/InitializePasses.h" | ||||
42 | #include "llvm/Pass.h" | ||||
43 | #include "llvm/Support/AtomicOrdering.h" | ||||
44 | #include "llvm/Support/Casting.h" | ||||
45 | #include "llvm/Support/Debug.h" | ||||
46 | #include "llvm/Support/ErrorHandling.h" | ||||
47 | #include "llvm/Support/raw_ostream.h" | ||||
48 | #include "llvm/Target/TargetMachine.h" | ||||
49 | #include <cassert> | ||||
50 | #include <cstdint> | ||||
51 | #include <iterator> | ||||
52 | |||||
53 | using namespace llvm; | ||||
54 | |||||
55 | #define DEBUG_TYPE"atomic-expand" "atomic-expand" | ||||
56 | |||||
57 | namespace { | ||||
58 | |||||
59 | class AtomicExpand: public FunctionPass { | ||||
60 | const TargetLowering *TLI = nullptr; | ||||
61 | |||||
62 | public: | ||||
63 | static char ID; // Pass identification, replacement for typeid | ||||
64 | |||||
65 | AtomicExpand() : FunctionPass(ID) { | ||||
66 | initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); | ||||
67 | } | ||||
68 | |||||
69 | bool runOnFunction(Function &F) override; | ||||
70 | |||||
71 | private: | ||||
72 | bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); | ||||
73 | IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); | ||||
74 | LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); | ||||
75 | bool tryExpandAtomicLoad(LoadInst *LI); | ||||
76 | bool expandAtomicLoadToLL(LoadInst *LI); | ||||
77 | bool expandAtomicLoadToCmpXchg(LoadInst *LI); | ||||
78 | StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); | ||||
79 | bool expandAtomicStore(StoreInst *SI); | ||||
80 | bool tryExpandAtomicRMW(AtomicRMWInst *AI); | ||||
81 | AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); | ||||
82 | Value * | ||||
83 | insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, | ||||
84 | Align AddrAlign, AtomicOrdering MemOpOrder, | ||||
85 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | ||||
86 | void expandAtomicOpToLLSC( | ||||
87 | Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, | ||||
88 | AtomicOrdering MemOpOrder, | ||||
89 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | ||||
90 | void expandPartwordAtomicRMW( | ||||
91 | AtomicRMWInst *I, | ||||
92 | TargetLoweringBase::AtomicExpansionKind ExpansionKind); | ||||
93 | AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); | ||||
94 | bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); | ||||
95 | void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); | ||||
96 | void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); | ||||
97 | |||||
98 | AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); | ||||
99 | static Value *insertRMWCmpXchgLoop( | ||||
100 | IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, | ||||
101 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | ||||
102 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | ||||
103 | CreateCmpXchgInstFun CreateCmpXchg); | ||||
104 | bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); | ||||
105 | |||||
106 | bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); | ||||
107 | bool isIdempotentRMW(AtomicRMWInst *RMWI); | ||||
108 | bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); | ||||
109 | |||||
110 | bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, | ||||
111 | Value *PointerOperand, Value *ValueOperand, | ||||
112 | Value *CASExpected, AtomicOrdering Ordering, | ||||
113 | AtomicOrdering Ordering2, | ||||
114 | ArrayRef<RTLIB::Libcall> Libcalls); | ||||
115 | void expandAtomicLoadToLibcall(LoadInst *LI); | ||||
116 | void expandAtomicStoreToLibcall(StoreInst *LI); | ||||
117 | void expandAtomicRMWToLibcall(AtomicRMWInst *I); | ||||
118 | void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); | ||||
119 | |||||
120 | friend bool | ||||
121 | llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | ||||
122 | CreateCmpXchgInstFun CreateCmpXchg); | ||||
123 | }; | ||||
124 | |||||
125 | } // end anonymous namespace | ||||
126 | |||||
127 | char AtomicExpand::ID = 0; | ||||
128 | |||||
129 | char &llvm::AtomicExpandID = AtomicExpand::ID; | ||||
130 | |||||
131 | INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | ||||
132 | false, false)static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | ||||
133 | |||||
134 | FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } | ||||
135 | |||||
136 | // Helper functions to retrieve the size of atomic instructions. | ||||
137 | static unsigned getAtomicOpSize(LoadInst *LI) { | ||||
138 | const DataLayout &DL = LI->getModule()->getDataLayout(); | ||||
139 | return DL.getTypeStoreSize(LI->getType()); | ||||
140 | } | ||||
141 | |||||
142 | static unsigned getAtomicOpSize(StoreInst *SI) { | ||||
143 | const DataLayout &DL = SI->getModule()->getDataLayout(); | ||||
144 | return DL.getTypeStoreSize(SI->getValueOperand()->getType()); | ||||
145 | } | ||||
146 | |||||
147 | static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { | ||||
148 | const DataLayout &DL = RMWI->getModule()->getDataLayout(); | ||||
149 | return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); | ||||
150 | } | ||||
151 | |||||
152 | static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { | ||||
153 | const DataLayout &DL = CASI->getModule()->getDataLayout(); | ||||
154 | return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); | ||||
155 | } | ||||
156 | |||||
157 | // Determine if a particular atomic operation has a supported size, | ||||
158 | // and is of appropriate alignment, to be passed through for target | ||||
159 | // lowering. (Versus turning into a __atomic libcall) | ||||
160 | template <typename Inst> | ||||
161 | static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { | ||||
162 | unsigned Size = getAtomicOpSize(I); | ||||
163 | Align Alignment = I->getAlign(); | ||||
164 | return Alignment >= Size && | ||||
165 | Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; | ||||
166 | } | ||||
167 | |||||
168 | bool AtomicExpand::runOnFunction(Function &F) { | ||||
169 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | ||||
170 | if (!TPC) | ||||
| |||||
171 | return false; | ||||
172 | |||||
173 | auto &TM = TPC->getTM<TargetMachine>(); | ||||
174 | if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) | ||||
175 | return false; | ||||
176 | TLI = TM.getSubtargetImpl(F)->getTargetLowering(); | ||||
177 | |||||
178 | SmallVector<Instruction *, 1> AtomicInsts; | ||||
179 | |||||
180 | // Changing control-flow while iterating through it is a bad idea, so gather a | ||||
181 | // list of all atomic instructions before we start. | ||||
182 | for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { | ||||
183 | Instruction *I = &*II; | ||||
184 | if (I->isAtomic() && !isa<FenceInst>(I)) | ||||
185 | AtomicInsts.push_back(I); | ||||
186 | } | ||||
187 | |||||
188 | bool MadeChange = false; | ||||
189 | for (auto I : AtomicInsts) { | ||||
190 | auto LI = dyn_cast<LoadInst>(I); | ||||
191 | auto SI = dyn_cast<StoreInst>(I); | ||||
192 | auto RMWI = dyn_cast<AtomicRMWInst>(I); | ||||
193 | auto CASI = dyn_cast<AtomicCmpXchgInst>(I); | ||||
194 | assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction")((void)0); | ||||
195 | |||||
196 | // If the Size/Alignment is not supported, replace with a libcall. | ||||
197 | if (LI
| ||||
198 | if (!atomicSizeSupported(TLI, LI)) { | ||||
199 | expandAtomicLoadToLibcall(LI); | ||||
200 | MadeChange = true; | ||||
201 | continue; | ||||
202 | } | ||||
203 | } else if (SI
| ||||
204 | if (!atomicSizeSupported(TLI, SI)) { | ||||
205 | expandAtomicStoreToLibcall(SI); | ||||
206 | MadeChange = true; | ||||
207 | continue; | ||||
208 | } | ||||
209 | } else if (RMWI
| ||||
210 | if (!atomicSizeSupported(TLI, RMWI)) { | ||||
211 | expandAtomicRMWToLibcall(RMWI); | ||||
212 | MadeChange = true; | ||||
213 | continue; | ||||
214 | } | ||||
215 | } else if (CASI) { | ||||
216 | if (!atomicSizeSupported(TLI, CASI)) { | ||||
217 | expandAtomicCASToLibcall(CASI); | ||||
218 | MadeChange = true; | ||||
219 | continue; | ||||
220 | } | ||||
221 | } | ||||
222 | |||||
223 | if (TLI->shouldInsertFencesForAtomic(I)) { | ||||
224 | auto FenceOrdering = AtomicOrdering::Monotonic; | ||||
225 | if (LI && isAcquireOrStronger(LI->getOrdering())) { | ||||
226 | FenceOrdering = LI->getOrdering(); | ||||
227 | LI->setOrdering(AtomicOrdering::Monotonic); | ||||
228 | } else if (SI && isReleaseOrStronger(SI->getOrdering())) { | ||||
229 | FenceOrdering = SI->getOrdering(); | ||||
230 | SI->setOrdering(AtomicOrdering::Monotonic); | ||||
231 | } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || | ||||
232 | isAcquireOrStronger(RMWI->getOrdering()))) { | ||||
233 | FenceOrdering = RMWI->getOrdering(); | ||||
234 | RMWI->setOrdering(AtomicOrdering::Monotonic); | ||||
235 | } else if (CASI && | ||||
236 | TLI->shouldExpandAtomicCmpXchgInIR(CASI) == | ||||
237 | TargetLoweringBase::AtomicExpansionKind::None && | ||||
238 | (isReleaseOrStronger(CASI->getSuccessOrdering()) || | ||||
239 | isAcquireOrStronger(CASI->getSuccessOrdering()) || | ||||
240 | isAcquireOrStronger(CASI->getFailureOrdering()))) { | ||||
241 | // If a compare and swap is lowered to LL/SC, we can do smarter fence | ||||
242 | // insertion, with a stronger one on the success path than on the | ||||
243 | // failure path. As a result, fence insertion is directly done by | ||||
244 | // expandAtomicCmpXchg in that case. | ||||
245 | FenceOrdering = CASI->getMergedOrdering(); | ||||
246 | CASI->setSuccessOrdering(AtomicOrdering::Monotonic); | ||||
247 | CASI->setFailureOrdering(AtomicOrdering::Monotonic); | ||||
248 | } | ||||
249 | |||||
250 | if (FenceOrdering != AtomicOrdering::Monotonic) { | ||||
251 | MadeChange |= bracketInstWithFences(I, FenceOrdering); | ||||
252 | } | ||||
253 | } | ||||
254 | |||||
255 | if (LI) { | ||||
256 | if (LI->getType()->isFloatingPointTy()) { | ||||
257 | // TODO: add a TLI hook to control this so that each target can | ||||
258 | // convert to lowering the original type one at a time. | ||||
259 | LI = convertAtomicLoadToIntegerType(LI); | ||||
260 | assert(LI->getType()->isIntegerTy() && "invariant broken")((void)0); | ||||
261 | MadeChange = true; | ||||
262 | } | ||||
263 | |||||
264 | MadeChange |= tryExpandAtomicLoad(LI); | ||||
265 | } else if (SI) { | ||||
266 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) { | ||||
267 | // TODO: add a TLI hook to control this so that each target can | ||||
268 | // convert to lowering the original type one at a time. | ||||
269 | SI = convertAtomicStoreToIntegerType(SI); | ||||
270 | assert(SI->getValueOperand()->getType()->isIntegerTy() &&((void)0) | ||||
271 | "invariant broken")((void)0); | ||||
272 | MadeChange = true; | ||||
273 | } | ||||
274 | |||||
275 | if (TLI->shouldExpandAtomicStoreInIR(SI)) | ||||
276 | MadeChange |= expandAtomicStore(SI); | ||||
277 | } else if (RMWI) { | ||||
278 | // There are two different ways of expanding RMW instructions: | ||||
279 | // - into a load if it is idempotent | ||||
280 | // - into a Cmpxchg/LL-SC loop otherwise | ||||
281 | // we try them in that order. | ||||
282 | |||||
283 | if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { | ||||
284 | MadeChange = true; | ||||
285 | } else { | ||||
286 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | ||||
287 | if (Op == AtomicRMWInst::Xchg && | ||||
288 | RMWI->getValOperand()->getType()->isFloatingPointTy()) { | ||||
289 | // TODO: add a TLI hook to control this so that each target can | ||||
290 | // convert to lowering the original type one at a time. | ||||
291 | RMWI = convertAtomicXchgToIntegerType(RMWI); | ||||
292 | assert(RMWI->getValOperand()->getType()->isIntegerTy() &&((void)0) | ||||
293 | "invariant broken")((void)0); | ||||
294 | MadeChange = true; | ||||
295 | } | ||||
296 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | ||||
297 | unsigned ValueSize = getAtomicOpSize(RMWI); | ||||
298 | if (ValueSize < MinCASSize && | ||||
299 | (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || | ||||
300 | Op == AtomicRMWInst::And)) { | ||||
301 | RMWI = widenPartwordAtomicRMW(RMWI); | ||||
302 | MadeChange = true; | ||||
303 | } | ||||
304 | |||||
305 | MadeChange |= tryExpandAtomicRMW(RMWI); | ||||
306 | } | ||||
307 | } else if (CASI) { | ||||
308 | // TODO: when we're ready to make the change at the IR level, we can | ||||
309 | // extend convertCmpXchgToInteger for floating point too. | ||||
310 | assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&((void)0) | ||||
311 | "unimplemented - floating point not legal at IR level")((void)0); | ||||
312 | if (CASI->getCompareOperand()->getType()->isPointerTy() ) { | ||||
313 | // TODO: add a TLI hook to control this so that each target can | ||||
314 | // convert to lowering the original type one at a time. | ||||
315 | CASI = convertCmpXchgToIntegerType(CASI); | ||||
316 | assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&((void)0) | ||||
317 | "invariant broken")((void)0); | ||||
318 | MadeChange = true; | ||||
319 | } | ||||
320 | |||||
321 | MadeChange |= tryExpandAtomicCmpXchg(CASI); | ||||
322 | } | ||||
323 | } | ||||
324 | return MadeChange; | ||||
325 | } | ||||
326 | |||||
327 | bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { | ||||
328 | IRBuilder<> Builder(I); | ||||
329 | |||||
330 | auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); | ||||
331 | |||||
332 | auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); | ||||
333 | // We have a guard here because not every atomic operation generates a | ||||
334 | // trailing fence. | ||||
335 | if (TrailingFence) | ||||
336 | TrailingFence->moveAfter(I); | ||||
337 | |||||
338 | return (LeadingFence || TrailingFence); | ||||
339 | } | ||||
340 | |||||
341 | /// Get the iX type with the same bitwidth as T. | ||||
342 | IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, | ||||
343 | const DataLayout &DL) { | ||||
344 | EVT VT = TLI->getMemValueType(DL, T); | ||||
345 | unsigned BitWidth = VT.getStoreSizeInBits(); | ||||
346 | assert(BitWidth == VT.getSizeInBits() && "must be a power of two")((void)0); | ||||
347 | return IntegerType::get(T->getContext(), BitWidth); | ||||
348 | } | ||||
349 | |||||
350 | /// Convert an atomic load of a non-integral type to an integer load of the | ||||
351 | /// equivalent bitwidth. See the function comment on | ||||
352 | /// convertAtomicStoreToIntegerType for background. | ||||
353 | LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { | ||||
354 | auto *M = LI->getModule(); | ||||
355 | Type *NewTy = getCorrespondingIntegerType(LI->getType(), | ||||
356 | M->getDataLayout()); | ||||
357 | |||||
358 | IRBuilder<> Builder(LI); | ||||
359 | |||||
360 | Value *Addr = LI->getPointerOperand(); | ||||
361 | Type *PT = PointerType::get(NewTy, | ||||
362 | Addr->getType()->getPointerAddressSpace()); | ||||
363 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | ||||
364 | |||||
365 | auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); | ||||
366 | NewLI->setAlignment(LI->getAlign()); | ||||
367 | NewLI->setVolatile(LI->isVolatile()); | ||||
368 | NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); | ||||
369 | LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n")do { } while (false); | ||||
370 | |||||
371 | Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); | ||||
372 | LI->replaceAllUsesWith(NewVal); | ||||
373 | LI->eraseFromParent(); | ||||
374 | return NewLI; | ||||
375 | } | ||||
376 | |||||
377 | AtomicRMWInst * | ||||
378 | AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { | ||||
379 | auto *M = RMWI->getModule(); | ||||
380 | Type *NewTy = | ||||
381 | getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); | ||||
382 | |||||
383 | IRBuilder<> Builder(RMWI); | ||||
384 | |||||
385 | Value *Addr = RMWI->getPointerOperand(); | ||||
386 | Value *Val = RMWI->getValOperand(); | ||||
387 | Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); | ||||
388 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | ||||
389 | Value *NewVal = Builder.CreateBitCast(Val, NewTy); | ||||
390 | |||||
391 | auto *NewRMWI = | ||||
392 | Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, | ||||
393 | RMWI->getAlign(), RMWI->getOrdering()); | ||||
394 | NewRMWI->setVolatile(RMWI->isVolatile()); | ||||
395 | LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n")do { } while (false); | ||||
396 | |||||
397 | Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); | ||||
398 | RMWI->replaceAllUsesWith(NewRVal); | ||||
399 | RMWI->eraseFromParent(); | ||||
400 | return NewRMWI; | ||||
401 | } | ||||
402 | |||||
403 | bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { | ||||
404 | switch (TLI->shouldExpandAtomicLoadInIR(LI)) { | ||||
405 | case TargetLoweringBase::AtomicExpansionKind::None: | ||||
406 | return false; | ||||
407 | case TargetLoweringBase::AtomicExpansionKind::LLSC: | ||||
408 | expandAtomicOpToLLSC( | ||||
409 | LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), | ||||
410 | LI->getOrdering(), | ||||
411 | [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); | ||||
412 | return true; | ||||
413 | case TargetLoweringBase::AtomicExpansionKind::LLOnly: | ||||
414 | return expandAtomicLoadToLL(LI); | ||||
415 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: | ||||
416 | return expandAtomicLoadToCmpXchg(LI); | ||||
417 | default: | ||||
418 | llvm_unreachable("Unhandled case in tryExpandAtomicLoad")__builtin_unreachable(); | ||||
419 | } | ||||
420 | } | ||||
421 | |||||
422 | bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { | ||||
423 | IRBuilder<> Builder(LI); | ||||
424 | |||||
425 | // On some architectures, load-linked instructions are atomic for larger | ||||
426 | // sizes than normal loads. For example, the only 64-bit load guaranteed | ||||
427 | // to be single-copy atomic by ARM is an ldrexd (A3.5.3). | ||||
428 | Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), | ||||
429 | LI->getPointerOperand(), LI->getOrdering()); | ||||
430 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | ||||
431 | |||||
432 | LI->replaceAllUsesWith(Val); | ||||
433 | LI->eraseFromParent(); | ||||
434 | |||||
435 | return true; | ||||
436 | } | ||||
437 | |||||
438 | bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { | ||||
439 | IRBuilder<> Builder(LI); | ||||
440 | AtomicOrdering Order = LI->getOrdering(); | ||||
441 | if (Order == AtomicOrdering::Unordered) | ||||
442 | Order = AtomicOrdering::Monotonic; | ||||
443 | |||||
444 | Value *Addr = LI->getPointerOperand(); | ||||
445 | Type *Ty = LI->getType(); | ||||
446 | Constant *DummyVal = Constant::getNullValue(Ty); | ||||
447 | |||||
448 | Value *Pair = Builder.CreateAtomicCmpXchg( | ||||
449 | Addr, DummyVal, DummyVal, LI->getAlign(), Order, | ||||
450 | AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); | ||||
451 | Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); | ||||
452 | |||||
453 | LI->replaceAllUsesWith(Loaded); | ||||
454 | LI->eraseFromParent(); | ||||
455 | |||||
456 | return true; | ||||
457 | } | ||||
458 | |||||
459 | /// Convert an atomic store of a non-integral type to an integer store of the | ||||
460 | /// equivalent bitwidth. We used to not support floating point or vector | ||||
461 | /// atomics in the IR at all. The backends learned to deal with the bitcast | ||||
462 | /// idiom because that was the only way of expressing the notion of a atomic | ||||
463 | /// float or vector store. The long term plan is to teach each backend to | ||||
464 | /// instruction select from the original atomic store, but as a migration | ||||
465 | /// mechanism, we convert back to the old format which the backends understand. | ||||
466 | /// Each backend will need individual work to recognize the new format. | ||||
467 | StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { | ||||
468 | IRBuilder<> Builder(SI); | ||||
469 | auto *M = SI->getModule(); | ||||
470 | Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), | ||||
471 | M->getDataLayout()); | ||||
472 | Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); | ||||
473 | |||||
474 | Value *Addr = SI->getPointerOperand(); | ||||
475 | Type *PT = PointerType::get(NewTy, | ||||
476 | Addr->getType()->getPointerAddressSpace()); | ||||
477 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | ||||
478 | |||||
479 | StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); | ||||
480 | NewSI->setAlignment(SI->getAlign()); | ||||
481 | NewSI->setVolatile(SI->isVolatile()); | ||||
482 | NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); | ||||
483 | LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n")do { } while (false); | ||||
484 | SI->eraseFromParent(); | ||||
485 | return NewSI; | ||||
486 | } | ||||
487 | |||||
488 | bool AtomicExpand::expandAtomicStore(StoreInst *SI) { | ||||
489 | // This function is only called on atomic stores that are too large to be | ||||
490 | // atomic if implemented as a native store. So we replace them by an | ||||
491 | // atomic swap, that can be implemented for example as a ldrex/strex on ARM | ||||
492 | // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. | ||||
493 | // It is the responsibility of the target to only signal expansion via | ||||
494 | // shouldExpandAtomicRMW in cases where this is required and possible. | ||||
495 | IRBuilder<> Builder(SI); | ||||
496 | AtomicRMWInst *AI = Builder.CreateAtomicRMW( | ||||
497 | AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), | ||||
498 | SI->getAlign(), SI->getOrdering()); | ||||
499 | SI->eraseFromParent(); | ||||
500 | |||||
501 | // Now we have an appropriate swap instruction, lower it as usual. | ||||
502 | return tryExpandAtomicRMW(AI); | ||||
503 | } | ||||
504 | |||||
505 | static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, | ||||
506 | Value *Loaded, Value *NewVal, Align AddrAlign, | ||||
507 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | ||||
508 | Value *&Success, Value *&NewLoaded) { | ||||
509 | Type *OrigTy = NewVal->getType(); | ||||
510 | |||||
511 | // This code can go away when cmpxchg supports FP types. | ||||
512 | bool NeedBitcast = OrigTy->isFloatingPointTy(); | ||||
513 | if (NeedBitcast) { | ||||
514 | IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); | ||||
515 | unsigned AS = Addr->getType()->getPointerAddressSpace(); | ||||
516 | Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); | ||||
517 | NewVal = Builder.CreateBitCast(NewVal, IntTy); | ||||
518 | Loaded = Builder.CreateBitCast(Loaded, IntTy); | ||||
519 | } | ||||
520 | |||||
521 | Value *Pair = Builder.CreateAtomicCmpXchg( | ||||
522 | Addr, Loaded, NewVal, AddrAlign, MemOpOrder, | ||||
523 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | ||||
524 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | ||||
525 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | ||||
526 | |||||
527 | if (NeedBitcast) | ||||
528 | NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); | ||||
529 | } | ||||
530 | |||||
531 | /// Emit IR to implement the given atomicrmw operation on values in registers, | ||||
532 | /// returning the new value. | ||||
533 | static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, | ||||
534 | Value *Loaded, Value *Inc) { | ||||
535 | Value *NewVal; | ||||
536 | switch (Op) { | ||||
537 | case AtomicRMWInst::Xchg: | ||||
538 | return Inc; | ||||
539 | case AtomicRMWInst::Add: | ||||
540 | return Builder.CreateAdd(Loaded, Inc, "new"); | ||||
541 | case AtomicRMWInst::Sub: | ||||
542 | return Builder.CreateSub(Loaded, Inc, "new"); | ||||
543 | case AtomicRMWInst::And: | ||||
544 | return Builder.CreateAnd(Loaded, Inc, "new"); | ||||
545 | case AtomicRMWInst::Nand: | ||||
546 | return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); | ||||
547 | case AtomicRMWInst::Or: | ||||
548 | return Builder.CreateOr(Loaded, Inc, "new"); | ||||
549 | case AtomicRMWInst::Xor: | ||||
550 | return Builder.CreateXor(Loaded, Inc, "new"); | ||||
551 | case AtomicRMWInst::Max: | ||||
552 | NewVal = Builder.CreateICmpSGT(Loaded, Inc); | ||||
553 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | ||||
554 | case AtomicRMWInst::Min: | ||||
555 | NewVal = Builder.CreateICmpSLE(Loaded, Inc); | ||||
556 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | ||||
557 | case AtomicRMWInst::UMax: | ||||
558 | NewVal = Builder.CreateICmpUGT(Loaded, Inc); | ||||
559 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | ||||
560 | case AtomicRMWInst::UMin: | ||||
561 | NewVal = Builder.CreateICmpULE(Loaded, Inc); | ||||
562 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | ||||
563 | case AtomicRMWInst::FAdd: | ||||
564 | return Builder.CreateFAdd(Loaded, Inc, "new"); | ||||
565 | case AtomicRMWInst::FSub: | ||||
566 | return Builder.CreateFSub(Loaded, Inc, "new"); | ||||
567 | default: | ||||
568 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | ||||
569 | } | ||||
570 | } | ||||
571 | |||||
572 | bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { | ||||
573 | switch (TLI->shouldExpandAtomicRMWInIR(AI)) { | ||||
574 | case TargetLoweringBase::AtomicExpansionKind::None: | ||||
575 | return false; | ||||
576 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | ||||
577 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | ||||
578 | unsigned ValueSize = getAtomicOpSize(AI); | ||||
579 | if (ValueSize < MinCASSize) { | ||||
580 | expandPartwordAtomicRMW(AI, | ||||
581 | TargetLoweringBase::AtomicExpansionKind::LLSC); | ||||
582 | } else { | ||||
583 | auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { | ||||
584 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | ||||
585 | AI->getValOperand()); | ||||
586 | }; | ||||
587 | expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), | ||||
588 | AI->getAlign(), AI->getOrdering(), PerformOp); | ||||
589 | } | ||||
590 | return true; | ||||
591 | } | ||||
592 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { | ||||
593 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | ||||
594 | unsigned ValueSize = getAtomicOpSize(AI); | ||||
595 | if (ValueSize < MinCASSize) { | ||||
596 | // TODO: Handle atomicrmw fadd/fsub | ||||
597 | if (AI->getType()->isFloatingPointTy()) | ||||
598 | return false; | ||||
599 | |||||
600 | expandPartwordAtomicRMW(AI, | ||||
601 | TargetLoweringBase::AtomicExpansionKind::CmpXChg); | ||||
602 | } else { | ||||
603 | expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); | ||||
604 | } | ||||
605 | return true; | ||||
606 | } | ||||
607 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { | ||||
608 | expandAtomicRMWToMaskedIntrinsic(AI); | ||||
609 | return true; | ||||
610 | } | ||||
611 | default: | ||||
612 | llvm_unreachable("Unhandled case in tryExpandAtomicRMW")__builtin_unreachable(); | ||||
613 | } | ||||
614 | } | ||||
615 | |||||
616 | namespace { | ||||
617 | |||||
618 | struct PartwordMaskValues { | ||||
619 | // These three fields are guaranteed to be set by createMaskInstrs. | ||||
620 | Type *WordType = nullptr; | ||||
621 | Type *ValueType = nullptr; | ||||
622 | Value *AlignedAddr = nullptr; | ||||
623 | Align AlignedAddrAlignment; | ||||
624 | // The remaining fields can be null. | ||||
625 | Value *ShiftAmt = nullptr; | ||||
626 | Value *Mask = nullptr; | ||||
627 | Value *Inv_Mask = nullptr; | ||||
628 | }; | ||||
629 | |||||
630 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | ||||
631 | raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { | ||||
632 | auto PrintObj = [&O](auto *V) { | ||||
633 | if (V) | ||||
634 | O << *V; | ||||
635 | else | ||||
636 | O << "nullptr"; | ||||
637 | O << '\n'; | ||||
638 | }; | ||||
639 | O << "PartwordMaskValues {\n"; | ||||
640 | O << " WordType: "; | ||||
641 | PrintObj(PMV.WordType); | ||||
642 | O << " ValueType: "; | ||||
643 | PrintObj(PMV.ValueType); | ||||
644 | O << " AlignedAddr: "; | ||||
645 | PrintObj(PMV.AlignedAddr); | ||||
646 | O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; | ||||
647 | O << " ShiftAmt: "; | ||||
648 | PrintObj(PMV.ShiftAmt); | ||||
649 | O << " Mask: "; | ||||
650 | PrintObj(PMV.Mask); | ||||
651 | O << " Inv_Mask: "; | ||||
652 | PrintObj(PMV.Inv_Mask); | ||||
653 | O << "}\n"; | ||||
654 | return O; | ||||
655 | } | ||||
656 | |||||
657 | } // end anonymous namespace | ||||
658 | |||||
659 | /// This is a helper function which builds instructions to provide | ||||
660 | /// values necessary for partword atomic operations. It takes an | ||||
661 | /// incoming address, Addr, and ValueType, and constructs the address, | ||||
662 | /// shift-amounts and masks needed to work with a larger value of size | ||||
663 | /// WordSize. | ||||
664 | /// | ||||
665 | /// AlignedAddr: Addr rounded down to a multiple of WordSize | ||||
666 | /// | ||||
667 | /// ShiftAmt: Number of bits to right-shift a WordSize value loaded | ||||
668 | /// from AlignAddr for it to have the same value as if | ||||
669 | /// ValueType was loaded from Addr. | ||||
670 | /// | ||||
671 | /// Mask: Value to mask with the value loaded from AlignAddr to | ||||
672 | /// include only the part that would've been loaded from Addr. | ||||
673 | /// | ||||
674 | /// Inv_Mask: The inverse of Mask. | ||||
675 | static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, | ||||
676 | Type *ValueType, Value *Addr, | ||||
677 | Align AddrAlign, | ||||
678 | unsigned MinWordSize) { | ||||
679 | PartwordMaskValues PMV; | ||||
680 | |||||
681 | Module *M = I->getModule(); | ||||
682 | LLVMContext &Ctx = M->getContext(); | ||||
683 | const DataLayout &DL = M->getDataLayout(); | ||||
684 | unsigned ValueSize = DL.getTypeStoreSize(ValueType); | ||||
685 | |||||
686 | PMV.ValueType = ValueType; | ||||
687 | PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) | ||||
688 | : ValueType; | ||||
689 | if (PMV.ValueType == PMV.WordType) { | ||||
690 | PMV.AlignedAddr = Addr; | ||||
691 | PMV.AlignedAddrAlignment = AddrAlign; | ||||
692 | PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); | ||||
693 | PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); | ||||
694 | return PMV; | ||||
695 | } | ||||
696 | |||||
697 | assert(ValueSize < MinWordSize)((void)0); | ||||
698 | |||||
699 | Type *WordPtrType = | ||||
700 | PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); | ||||
701 | |||||
702 | // TODO: we could skip some of this if AddrAlign >= MinWordSize. | ||||
703 | Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); | ||||
704 | PMV.AlignedAddr = Builder.CreateIntToPtr( | ||||
705 | Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, | ||||
706 | "AlignedAddr"); | ||||
707 | PMV.AlignedAddrAlignment = Align(MinWordSize); | ||||
708 | |||||
709 | Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); | ||||
710 | if (DL.isLittleEndian()) { | ||||
711 | // turn bytes into bits | ||||
712 | PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); | ||||
713 | } else { | ||||
714 | // turn bytes into bits, and count from the other side. | ||||
715 | PMV.ShiftAmt = Builder.CreateShl( | ||||
716 | Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); | ||||
717 | } | ||||
718 | |||||
719 | PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); | ||||
720 | PMV.Mask = Builder.CreateShl( | ||||
721 | ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, | ||||
722 | "Mask"); | ||||
723 | PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); | ||||
724 | return PMV; | ||||
725 | } | ||||
726 | |||||
727 | static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, | ||||
728 | const PartwordMaskValues &PMV) { | ||||
729 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | ||||
730 | if (PMV.WordType == PMV.ValueType) | ||||
731 | return WideWord; | ||||
732 | |||||
733 | Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); | ||||
734 | Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); | ||||
735 | return Trunc; | ||||
736 | } | ||||
737 | |||||
738 | static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, | ||||
739 | Value *Updated, const PartwordMaskValues &PMV) { | ||||
740 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | ||||
741 | assert(Updated->getType() == PMV.ValueType && "Value type mismatch")((void)0); | ||||
742 | if (PMV.WordType == PMV.ValueType) | ||||
743 | return Updated; | ||||
744 | |||||
745 | Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); | ||||
746 | Value *Shift = | ||||
747 | Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); | ||||
748 | Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); | ||||
749 | Value *Or = Builder.CreateOr(And, Shift, "inserted"); | ||||
750 | return Or; | ||||
751 | } | ||||
752 | |||||
753 | /// Emit IR to implement a masked version of a given atomicrmw | ||||
754 | /// operation. (That is, only the bits under the Mask should be | ||||
755 | /// affected by the operation) | ||||
756 | static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, | ||||
757 | IRBuilder<> &Builder, Value *Loaded, | ||||
758 | Value *Shifted_Inc, Value *Inc, | ||||
759 | const PartwordMaskValues &PMV) { | ||||
760 | // TODO: update to use | ||||
761 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order | ||||
762 | // to merge bits from two values without requiring PMV.Inv_Mask. | ||||
763 | switch (Op) { | ||||
764 | case AtomicRMWInst::Xchg: { | ||||
765 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | ||||
766 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); | ||||
767 | return FinalVal; | ||||
768 | } | ||||
769 | case AtomicRMWInst::Or: | ||||
770 | case AtomicRMWInst::Xor: | ||||
771 | case AtomicRMWInst::And: | ||||
772 | llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW")__builtin_unreachable(); | ||||
773 | case AtomicRMWInst::Add: | ||||
774 | case AtomicRMWInst::Sub: | ||||
775 | case AtomicRMWInst::Nand: { | ||||
776 | // The other arithmetic ops need to be masked into place. | ||||
777 | Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); | ||||
778 | Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); | ||||
779 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | ||||
780 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); | ||||
781 | return FinalVal; | ||||
782 | } | ||||
783 | case AtomicRMWInst::Max: | ||||
784 | case AtomicRMWInst::Min: | ||||
785 | case AtomicRMWInst::UMax: | ||||
786 | case AtomicRMWInst::UMin: { | ||||
787 | // Finally, comparison ops will operate on the full value, so | ||||
788 | // truncate down to the original size, and expand out again after | ||||
789 | // doing the operation. | ||||
790 | Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); | ||||
791 | Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); | ||||
792 | Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); | ||||
793 | return FinalVal; | ||||
794 | } | ||||
795 | default: | ||||
796 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | ||||
797 | } | ||||
798 | } | ||||
799 | |||||
800 | /// Expand a sub-word atomicrmw operation into an appropriate | ||||
801 | /// word-sized operation. | ||||
802 | /// | ||||
803 | /// It will create an LL/SC or cmpxchg loop, as appropriate, the same | ||||
804 | /// way as a typical atomicrmw expansion. The only difference here is | ||||
805 | /// that the operation inside of the loop may operate upon only a | ||||
806 | /// part of the value. | ||||
807 | void AtomicExpand::expandPartwordAtomicRMW( | ||||
808 | AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { | ||||
809 | AtomicOrdering MemOpOrder = AI->getOrdering(); | ||||
810 | SyncScope::ID SSID = AI->getSyncScopeID(); | ||||
811 | |||||
812 | IRBuilder<> Builder(AI); | ||||
813 | |||||
814 | PartwordMaskValues PMV = | ||||
815 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | ||||
816 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
817 | |||||
818 | Value *ValOperand_Shifted = | ||||
819 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | ||||
820 | PMV.ShiftAmt, "ValOperand_Shifted"); | ||||
821 | |||||
822 | auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { | ||||
823 | return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, | ||||
824 | ValOperand_Shifted, AI->getValOperand(), PMV); | ||||
825 | }; | ||||
826 | |||||
827 | Value *OldResult; | ||||
828 | if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { | ||||
829 | OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, | ||||
830 | PMV.AlignedAddrAlignment, MemOpOrder, | ||||
831 | SSID, PerformPartwordOp, | ||||
832 | createCmpXchgInstFun); | ||||
833 | } else { | ||||
834 | assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC)((void)0); | ||||
835 | OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, | ||||
836 | PMV.AlignedAddrAlignment, MemOpOrder, | ||||
837 | PerformPartwordOp); | ||||
838 | } | ||||
839 | |||||
840 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | ||||
841 | AI->replaceAllUsesWith(FinalOldResult); | ||||
842 | AI->eraseFromParent(); | ||||
843 | } | ||||
844 | |||||
845 | // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. | ||||
846 | AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { | ||||
847 | IRBuilder<> Builder(AI); | ||||
848 | AtomicRMWInst::BinOp Op = AI->getOperation(); | ||||
849 | |||||
850 | assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||((void)0) | ||||
851 | Op == AtomicRMWInst::And) &&((void)0) | ||||
852 | "Unable to widen operation")((void)0); | ||||
853 | |||||
854 | PartwordMaskValues PMV = | ||||
855 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | ||||
856 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
857 | |||||
858 | Value *ValOperand_Shifted = | ||||
859 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | ||||
860 | PMV.ShiftAmt, "ValOperand_Shifted"); | ||||
861 | |||||
862 | Value *NewOperand; | ||||
863 | |||||
864 | if (Op == AtomicRMWInst::And) | ||||
865 | NewOperand = | ||||
866 | Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); | ||||
867 | else | ||||
868 | NewOperand = ValOperand_Shifted; | ||||
869 | |||||
870 | AtomicRMWInst *NewAI = | ||||
871 | Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, | ||||
872 | PMV.AlignedAddrAlignment, AI->getOrdering()); | ||||
873 | |||||
874 | Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); | ||||
875 | AI->replaceAllUsesWith(FinalOldResult); | ||||
876 | AI->eraseFromParent(); | ||||
877 | return NewAI; | ||||
878 | } | ||||
879 | |||||
880 | bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { | ||||
881 | // The basic idea here is that we're expanding a cmpxchg of a | ||||
882 | // smaller memory size up to a word-sized cmpxchg. To do this, we | ||||
883 | // need to add a retry-loop for strong cmpxchg, so that | ||||
884 | // modifications to other parts of the word don't cause a spurious | ||||
885 | // failure. | ||||
886 | |||||
887 | // This generates code like the following: | ||||
888 | // [[Setup mask values PMV.*]] | ||||
889 | // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt | ||||
890 | // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt | ||||
891 | // %InitLoaded = load i32* %addr | ||||
892 | // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask | ||||
893 | // br partword.cmpxchg.loop | ||||
894 | // partword.cmpxchg.loop: | ||||
895 | // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], | ||||
896 | // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] | ||||
897 | // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted | ||||
898 | // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted | ||||
899 | // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, | ||||
900 | // i32 %FullWord_NewVal success_ordering failure_ordering | ||||
901 | // %OldVal = extractvalue { i32, i1 } %NewCI, 0 | ||||
902 | // %Success = extractvalue { i32, i1 } %NewCI, 1 | ||||
903 | // br i1 %Success, label %partword.cmpxchg.end, | ||||
904 | // label %partword.cmpxchg.failure | ||||
905 | // partword.cmpxchg.failure: | ||||
906 | // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask | ||||
907 | // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut | ||||
908 | // br i1 %ShouldContinue, label %partword.cmpxchg.loop, | ||||
909 | // label %partword.cmpxchg.end | ||||
910 | // partword.cmpxchg.end: | ||||
911 | // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt | ||||
912 | // %FinalOldVal = trunc i32 %tmp1 to i8 | ||||
913 | // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 | ||||
914 | // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 | ||||
915 | |||||
916 | Value *Addr = CI->getPointerOperand(); | ||||
917 | Value *Cmp = CI->getCompareOperand(); | ||||
918 | Value *NewVal = CI->getNewValOperand(); | ||||
919 | |||||
920 | BasicBlock *BB = CI->getParent(); | ||||
921 | Function *F = BB->getParent(); | ||||
922 | IRBuilder<> Builder(CI); | ||||
923 | LLVMContext &Ctx = Builder.getContext(); | ||||
924 | |||||
925 | BasicBlock *EndBB = | ||||
926 | BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); | ||||
927 | auto FailureBB = | ||||
928 | BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); | ||||
929 | auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); | ||||
930 | |||||
931 | // The split call above "helpfully" added a branch at the end of BB | ||||
932 | // (to the wrong place). | ||||
933 | std::prev(BB->end())->eraseFromParent(); | ||||
934 | Builder.SetInsertPoint(BB); | ||||
935 | |||||
936 | PartwordMaskValues PMV = | ||||
937 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | ||||
938 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
939 | |||||
940 | // Shift the incoming values over, into the right location in the word. | ||||
941 | Value *NewVal_Shifted = | ||||
942 | Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); | ||||
943 | Value *Cmp_Shifted = | ||||
944 | Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); | ||||
945 | |||||
946 | // Load the entire current word, and mask into place the expected and new | ||||
947 | // values | ||||
948 | LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); | ||||
949 | InitLoaded->setVolatile(CI->isVolatile()); | ||||
950 | Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); | ||||
951 | Builder.CreateBr(LoopBB); | ||||
952 | |||||
953 | // partword.cmpxchg.loop: | ||||
954 | Builder.SetInsertPoint(LoopBB); | ||||
955 | PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); | ||||
956 | Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); | ||||
957 | |||||
958 | // Mask/Or the expected and new values into place in the loaded word. | ||||
959 | Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); | ||||
960 | Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); | ||||
961 | AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( | ||||
962 | PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, | ||||
963 | CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); | ||||
964 | NewCI->setVolatile(CI->isVolatile()); | ||||
965 | // When we're building a strong cmpxchg, we need a loop, so you | ||||
966 | // might think we could use a weak cmpxchg inside. But, using strong | ||||
967 | // allows the below comparison for ShouldContinue, and we're | ||||
968 | // expecting the underlying cmpxchg to be a machine instruction, | ||||
969 | // which is strong anyways. | ||||
970 | NewCI->setWeak(CI->isWeak()); | ||||
971 | |||||
972 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | ||||
973 | Value *Success = Builder.CreateExtractValue(NewCI, 1); | ||||
974 | |||||
975 | if (CI->isWeak()) | ||||
976 | Builder.CreateBr(EndBB); | ||||
977 | else | ||||
978 | Builder.CreateCondBr(Success, EndBB, FailureBB); | ||||
979 | |||||
980 | // partword.cmpxchg.failure: | ||||
981 | Builder.SetInsertPoint(FailureBB); | ||||
982 | // Upon failure, verify that the masked-out part of the loaded value | ||||
983 | // has been modified. If it didn't, abort the cmpxchg, since the | ||||
984 | // masked-in part must've. | ||||
985 | Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); | ||||
986 | Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); | ||||
987 | Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); | ||||
988 | |||||
989 | // Add the second value to the phi from above | ||||
990 | Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); | ||||
991 | |||||
992 | // partword.cmpxchg.end: | ||||
993 | Builder.SetInsertPoint(CI); | ||||
994 | |||||
995 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | ||||
996 | Value *Res = UndefValue::get(CI->getType()); | ||||
997 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | ||||
998 | Res = Builder.CreateInsertValue(Res, Success, 1); | ||||
999 | |||||
1000 | CI->replaceAllUsesWith(Res); | ||||
1001 | CI->eraseFromParent(); | ||||
1002 | return true; | ||||
1003 | } | ||||
1004 | |||||
1005 | void AtomicExpand::expandAtomicOpToLLSC( | ||||
1006 | Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, | ||||
1007 | AtomicOrdering MemOpOrder, | ||||
1008 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | ||||
1009 | IRBuilder<> Builder(I); | ||||
1010 | Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, | ||||
1011 | MemOpOrder, PerformOp); | ||||
1012 | |||||
1013 | I->replaceAllUsesWith(Loaded); | ||||
1014 | I->eraseFromParent(); | ||||
1015 | } | ||||
1016 | |||||
1017 | void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { | ||||
1018 | IRBuilder<> Builder(AI); | ||||
1019 | |||||
1020 | PartwordMaskValues PMV = | ||||
1021 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | ||||
1022 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
1023 | |||||
1024 | // The value operand must be sign-extended for signed min/max so that the | ||||
1025 | // target's signed comparison instructions can be used. Otherwise, just | ||||
1026 | // zero-ext. | ||||
1027 | Instruction::CastOps CastOp = Instruction::ZExt; | ||||
1028 | AtomicRMWInst::BinOp RMWOp = AI->getOperation(); | ||||
1029 | if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) | ||||
1030 | CastOp = Instruction::SExt; | ||||
1031 | |||||
1032 | Value *ValOperand_Shifted = Builder.CreateShl( | ||||
1033 | Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), | ||||
1034 | PMV.ShiftAmt, "ValOperand_Shifted"); | ||||
1035 | Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( | ||||
1036 | Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, | ||||
1037 | AI->getOrdering()); | ||||
1038 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | ||||
1039 | AI->replaceAllUsesWith(FinalOldResult); | ||||
1040 | AI->eraseFromParent(); | ||||
1041 | } | ||||
1042 | |||||
1043 | void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { | ||||
1044 | IRBuilder<> Builder(CI); | ||||
1045 | |||||
1046 | PartwordMaskValues PMV = createMaskInstrs( | ||||
1047 | Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), | ||||
1048 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
1049 | |||||
1050 | Value *CmpVal_Shifted = Builder.CreateShl( | ||||
1051 | Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, | ||||
1052 | "CmpVal_Shifted"); | ||||
1053 | Value *NewVal_Shifted = Builder.CreateShl( | ||||
1054 | Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, | ||||
1055 | "NewVal_Shifted"); | ||||
1056 | Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( | ||||
1057 | Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, | ||||
1058 | CI->getMergedOrdering()); | ||||
1059 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | ||||
1060 | Value *Res = UndefValue::get(CI->getType()); | ||||
1061 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | ||||
1062 | Value *Success = Builder.CreateICmpEQ( | ||||
1063 | CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); | ||||
1064 | Res = Builder.CreateInsertValue(Res, Success, 1); | ||||
1065 | |||||
1066 | CI->replaceAllUsesWith(Res); | ||||
1067 | CI->eraseFromParent(); | ||||
1068 | } | ||||
1069 | |||||
1070 | Value *AtomicExpand::insertRMWLLSCLoop( | ||||
1071 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | ||||
1072 | AtomicOrdering MemOpOrder, | ||||
1073 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | ||||
1074 | LLVMContext &Ctx = Builder.getContext(); | ||||
1075 | BasicBlock *BB = Builder.GetInsertBlock(); | ||||
1076 | Function *F = BB->getParent(); | ||||
1077 | |||||
1078 | assert(AddrAlign >=((void)0) | ||||
1079 | F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&((void)0) | ||||
1080 | "Expected at least natural alignment at this point.")((void)0); | ||||
1081 | |||||
1082 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | ||||
1083 | // | ||||
1084 | // The standard expansion we produce is: | ||||
1085 | // [...] | ||||
1086 | // atomicrmw.start: | ||||
1087 | // %loaded = @load.linked(%addr) | ||||
1088 | // %new = some_op iN %loaded, %incr | ||||
1089 | // %stored = @store_conditional(%new, %addr) | ||||
1090 | // %try_again = icmp i32 ne %stored, 0 | ||||
1091 | // br i1 %try_again, label %loop, label %atomicrmw.end | ||||
1092 | // atomicrmw.end: | ||||
1093 | // [...] | ||||
1094 | BasicBlock *ExitBB = | ||||
1095 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | ||||
1096 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | ||||
1097 | |||||
1098 | // The split call above "helpfully" added a branch at the end of BB (to the | ||||
1099 | // wrong place). | ||||
1100 | std::prev(BB->end())->eraseFromParent(); | ||||
1101 | Builder.SetInsertPoint(BB); | ||||
1102 | Builder.CreateBr(LoopBB); | ||||
1103 | |||||
1104 | // Start the main loop block now that we've taken care of the preliminaries. | ||||
1105 | Builder.SetInsertPoint(LoopBB); | ||||
1106 | Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); | ||||
1107 | |||||
1108 | Value *NewVal = PerformOp(Builder, Loaded); | ||||
1109 | |||||
1110 | Value *StoreSuccess = | ||||
1111 | TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); | ||||
1112 | Value *TryAgain = Builder.CreateICmpNE( | ||||
1113 | StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); | ||||
1114 | Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); | ||||
1115 | |||||
1116 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | ||||
1117 | return Loaded; | ||||
1118 | } | ||||
1119 | |||||
1120 | /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of | ||||
1121 | /// the equivalent bitwidth. We used to not support pointer cmpxchg in the | ||||
1122 | /// IR. As a migration step, we convert back to what use to be the standard | ||||
1123 | /// way to represent a pointer cmpxchg so that we can update backends one by | ||||
1124 | /// one. | ||||
1125 | AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { | ||||
1126 | auto *M = CI->getModule(); | ||||
1127 | Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), | ||||
1128 | M->getDataLayout()); | ||||
1129 | |||||
1130 | IRBuilder<> Builder(CI); | ||||
1131 | |||||
1132 | Value *Addr = CI->getPointerOperand(); | ||||
1133 | Type *PT = PointerType::get(NewTy, | ||||
1134 | Addr->getType()->getPointerAddressSpace()); | ||||
1135 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | ||||
1136 | |||||
1137 | Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); | ||||
1138 | Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); | ||||
1139 | |||||
1140 | auto *NewCI = Builder.CreateAtomicCmpXchg( | ||||
1141 | NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), | ||||
1142 | CI->getFailureOrdering(), CI->getSyncScopeID()); | ||||
1143 | NewCI->setVolatile(CI->isVolatile()); | ||||
1144 | NewCI->setWeak(CI->isWeak()); | ||||
1145 | LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n")do { } while (false); | ||||
1146 | |||||
1147 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | ||||
1148 | Value *Succ = Builder.CreateExtractValue(NewCI, 1); | ||||
1149 | |||||
1150 | OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); | ||||
1151 | |||||
1152 | Value *Res = UndefValue::get(CI->getType()); | ||||
1153 | Res = Builder.CreateInsertValue(Res, OldVal, 0); | ||||
1154 | Res = Builder.CreateInsertValue(Res, Succ, 1); | ||||
1155 | |||||
1156 | CI->replaceAllUsesWith(Res); | ||||
1157 | CI->eraseFromParent(); | ||||
1158 | return NewCI; | ||||
1159 | } | ||||
1160 | |||||
1161 | bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | ||||
1162 | AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); | ||||
1163 | AtomicOrdering FailureOrder = CI->getFailureOrdering(); | ||||
1164 | Value *Addr = CI->getPointerOperand(); | ||||
1165 | BasicBlock *BB = CI->getParent(); | ||||
1166 | Function *F = BB->getParent(); | ||||
1167 | LLVMContext &Ctx = F->getContext(); | ||||
1168 | // If shouldInsertFencesForAtomic() returns true, then the target does not | ||||
1169 | // want to deal with memory orders, and emitLeading/TrailingFence should take | ||||
1170 | // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we | ||||
1171 | // should preserve the ordering. | ||||
1172 | bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); | ||||
1173 | AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic | ||||
1174 | ? AtomicOrdering::Monotonic | ||||
1175 | : CI->getMergedOrdering(); | ||||
1176 | |||||
1177 | // In implementations which use a barrier to achieve release semantics, we can | ||||
1178 | // delay emitting this barrier until we know a store is actually going to be | ||||
1179 | // attempted. The cost of this delay is that we need 2 copies of the block | ||||
1180 | // emitting the load-linked, affecting code size. | ||||
1181 | // | ||||
1182 | // Ideally, this logic would be unconditional except for the minsize check | ||||
1183 | // since in other cases the extra blocks naturally collapse down to the | ||||
1184 | // minimal loop. Unfortunately, this puts too much stress on later | ||||
1185 | // optimisations so we avoid emitting the extra logic in those cases too. | ||||
1186 | bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && | ||||
1187 | SuccessOrder != AtomicOrdering::Monotonic && | ||||
1188 | SuccessOrder != AtomicOrdering::Acquire && | ||||
1189 | !F->hasMinSize(); | ||||
1190 | |||||
1191 | // There's no overhead for sinking the release barrier in a weak cmpxchg, so | ||||
1192 | // do it even on minsize. | ||||
1193 | bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); | ||||
1194 | |||||
1195 | // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord | ||||
1196 | // | ||||
1197 | // The full expansion we produce is: | ||||
1198 | // [...] | ||||
1199 | // %aligned.addr = ... | ||||
1200 | // cmpxchg.start: | ||||
1201 | // %unreleasedload = @load.linked(%aligned.addr) | ||||
1202 | // %unreleasedload.extract = extract value from %unreleasedload | ||||
1203 | // %should_store = icmp eq %unreleasedload.extract, %desired | ||||
1204 | // br i1 %should_store, label %cmpxchg.releasingstore, | ||||
1205 | // label %cmpxchg.nostore | ||||
1206 | // cmpxchg.releasingstore: | ||||
1207 | // fence? | ||||
1208 | // br label cmpxchg.trystore | ||||
1209 | // cmpxchg.trystore: | ||||
1210 | // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], | ||||
1211 | // [%releasedload, %cmpxchg.releasedload] | ||||
1212 | // %updated.new = insert %new into %loaded.trystore | ||||
1213 | // %stored = @store_conditional(%updated.new, %aligned.addr) | ||||
1214 | // %success = icmp eq i32 %stored, 0 | ||||
1215 | // br i1 %success, label %cmpxchg.success, | ||||
1216 | // label %cmpxchg.releasedload/%cmpxchg.failure | ||||
1217 | // cmpxchg.releasedload: | ||||
1218 | // %releasedload = @load.linked(%aligned.addr) | ||||
1219 | // %releasedload.extract = extract value from %releasedload | ||||
1220 | // %should_store = icmp eq %releasedload.extract, %desired | ||||
1221 | // br i1 %should_store, label %cmpxchg.trystore, | ||||
1222 | // label %cmpxchg.failure | ||||
1223 | // cmpxchg.success: | ||||
1224 | // fence? | ||||
1225 | // br label %cmpxchg.end | ||||
1226 | // cmpxchg.nostore: | ||||
1227 | // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], | ||||
1228 | // [%releasedload, | ||||
1229 | // %cmpxchg.releasedload/%cmpxchg.trystore] | ||||
1230 | // @load_linked_fail_balance()? | ||||
1231 | // br label %cmpxchg.failure | ||||
1232 | // cmpxchg.failure: | ||||
1233 | // fence? | ||||
1234 | // br label %cmpxchg.end | ||||
1235 | // cmpxchg.end: | ||||
1236 | // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], | ||||
1237 | // [%loaded.trystore, %cmpxchg.trystore] | ||||
1238 | // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] | ||||
1239 | // %loaded = extract value from %loaded.exit | ||||
1240 | // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 | ||||
1241 | // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 | ||||
1242 | // [...] | ||||
1243 | BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); | ||||
1244 | auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); | ||||
1245 | auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); | ||||
1246 | auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); | ||||
1247 | auto ReleasedLoadBB = | ||||
1248 | BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); | ||||
1249 | auto TryStoreBB = | ||||
1250 | BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); | ||||
1251 | auto ReleasingStoreBB = | ||||
1252 | BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); | ||||
1253 | auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); | ||||
1254 | |||||
1255 | // This grabs the DebugLoc from CI | ||||
1256 | IRBuilder<> Builder(CI); | ||||
1257 | |||||
1258 | // The split call above "helpfully" added a branch at the end of BB (to the | ||||
1259 | // wrong place), but we might want a fence too. It's easiest to just remove | ||||
1260 | // the branch entirely. | ||||
1261 | std::prev(BB->end())->eraseFromParent(); | ||||
1262 | Builder.SetInsertPoint(BB); | ||||
1263 | if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) | ||||
1264 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | ||||
1265 | |||||
1266 | PartwordMaskValues PMV = | ||||
1267 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | ||||
1268 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | ||||
1269 | Builder.CreateBr(StartBB); | ||||
1270 | |||||
1271 | // Start the main loop block now that we've taken care of the preliminaries. | ||||
1272 | Builder.SetInsertPoint(StartBB); | ||||
1273 | Value *UnreleasedLoad = | ||||
1274 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | ||||
1275 | Value *UnreleasedLoadExtract = | ||||
1276 | extractMaskedValue(Builder, UnreleasedLoad, PMV); | ||||
1277 | Value *ShouldStore = Builder.CreateICmpEQ( | ||||
1278 | UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); | ||||
1279 | |||||
1280 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | ||||
1281 | // jump straight past that fence instruction (if it exists). | ||||
1282 | Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); | ||||
1283 | |||||
1284 | Builder.SetInsertPoint(ReleasingStoreBB); | ||||
1285 | if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) | ||||
1286 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | ||||
1287 | Builder.CreateBr(TryStoreBB); | ||||
1288 | |||||
1289 | Builder.SetInsertPoint(TryStoreBB); | ||||
1290 | PHINode *LoadedTryStore = | ||||
1291 | Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); | ||||
1292 | LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); | ||||
1293 | Value *NewValueInsert = | ||||
1294 | insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); | ||||
1295 | Value *StoreSuccess = | ||||
1296 | TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, | ||||
1297 | MemOpOrder); | ||||
1298 | StoreSuccess = Builder.CreateICmpEQ( | ||||
1299 | StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); | ||||
1300 | BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; | ||||
1301 | Builder.CreateCondBr(StoreSuccess, SuccessBB, | ||||
1302 | CI->isWeak() ? FailureBB : RetryBB); | ||||
1303 | |||||
1304 | Builder.SetInsertPoint(ReleasedLoadBB); | ||||
1305 | Value *SecondLoad; | ||||
1306 | if (HasReleasedLoadBB) { | ||||
1307 | SecondLoad = | ||||
1308 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | ||||
1309 | Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); | ||||
1310 | ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, | ||||
1311 | CI->getCompareOperand(), "should_store"); | ||||
1312 | |||||
1313 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | ||||
1314 | // jump straight past that fence instruction (if it exists). | ||||
1315 | Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); | ||||
1316 | // Update PHI node in TryStoreBB. | ||||
1317 | LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); | ||||
1318 | } else | ||||
1319 | Builder.CreateUnreachable(); | ||||
1320 | |||||
1321 | // Make sure later instructions don't get reordered with a fence if | ||||
1322 | // necessary. | ||||
1323 | Builder.SetInsertPoint(SuccessBB); | ||||
1324 | if (ShouldInsertFencesForAtomic) | ||||
1325 | TLI->emitTrailingFence(Builder, CI, SuccessOrder); | ||||
1326 | Builder.CreateBr(ExitBB); | ||||
1327 | |||||
1328 | Builder.SetInsertPoint(NoStoreBB); | ||||
1329 | PHINode *LoadedNoStore = | ||||
1330 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); | ||||
1331 | LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); | ||||
1332 | if (HasReleasedLoadBB) | ||||
1333 | LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); | ||||
1334 | |||||
1335 | // In the failing case, where we don't execute the store-conditional, the | ||||
1336 | // target might want to balance out the load-linked with a dedicated | ||||
1337 | // instruction (e.g., on ARM, clearing the exclusive monitor). | ||||
1338 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | ||||
1339 | Builder.CreateBr(FailureBB); | ||||
1340 | |||||
1341 | Builder.SetInsertPoint(FailureBB); | ||||
1342 | PHINode *LoadedFailure = | ||||
1343 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); | ||||
1344 | LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); | ||||
1345 | if (CI->isWeak()) | ||||
1346 | LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); | ||||
1347 | if (ShouldInsertFencesForAtomic) | ||||
1348 | TLI->emitTrailingFence(Builder, CI, FailureOrder); | ||||
1349 | Builder.CreateBr(ExitBB); | ||||
1350 | |||||
1351 | // Finally, we have control-flow based knowledge of whether the cmpxchg | ||||
1352 | // succeeded or not. We expose this to later passes by converting any | ||||
1353 | // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate | ||||
1354 | // PHI. | ||||
1355 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | ||||
1356 | PHINode *LoadedExit = | ||||
1357 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); | ||||
1358 | LoadedExit->addIncoming(LoadedTryStore, SuccessBB); | ||||
1359 | LoadedExit->addIncoming(LoadedFailure, FailureBB); | ||||
1360 | PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); | ||||
1361 | Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); | ||||
1362 | Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); | ||||
1363 | |||||
1364 | // This is the "exit value" from the cmpxchg expansion. It may be of | ||||
1365 | // a type wider than the one in the cmpxchg instruction. | ||||
1366 | Value *LoadedFull = LoadedExit; | ||||
1367 | |||||
1368 | Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); | ||||
1369 | Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); | ||||
1370 | |||||
1371 | // Look for any users of the cmpxchg that are just comparing the loaded value | ||||
1372 | // against the desired one, and replace them with the CFG-derived version. | ||||
1373 | SmallVector<ExtractValueInst *, 2> PrunedInsts; | ||||
1374 | for (auto User : CI->users()) { | ||||
1375 | ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); | ||||
1376 | if (!EV) | ||||
1377 | continue; | ||||
1378 | |||||
1379 | assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&((void)0) | ||||
1380 | "weird extraction from { iN, i1 }")((void)0); | ||||
1381 | |||||
1382 | if (EV->getIndices()[0] == 0) | ||||
1383 | EV->replaceAllUsesWith(Loaded); | ||||
1384 | else | ||||
1385 | EV->replaceAllUsesWith(Success); | ||||
1386 | |||||
1387 | PrunedInsts.push_back(EV); | ||||
1388 | } | ||||
1389 | |||||
1390 | // We can remove the instructions now we're no longer iterating through them. | ||||
1391 | for (auto EV : PrunedInsts) | ||||
1392 | EV->eraseFromParent(); | ||||
1393 | |||||
1394 | if (!CI->use_empty()) { | ||||
1395 | // Some use of the full struct return that we don't understand has happened, | ||||
1396 | // so we've got to reconstruct it properly. | ||||
1397 | Value *Res; | ||||
1398 | Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); | ||||
1399 | Res = Builder.CreateInsertValue(Res, Success, 1); | ||||
1400 | |||||
1401 | CI->replaceAllUsesWith(Res); | ||||
1402 | } | ||||
1403 | |||||
1404 | CI->eraseFromParent(); | ||||
1405 | return true; | ||||
1406 | } | ||||
1407 | |||||
1408 | bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { | ||||
1409 | auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); | ||||
1410 | if(!C) | ||||
1411 | return false; | ||||
1412 | |||||
1413 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | ||||
1414 | switch(Op) { | ||||
1415 | case AtomicRMWInst::Add: | ||||
1416 | case AtomicRMWInst::Sub: | ||||
1417 | case AtomicRMWInst::Or: | ||||
1418 | case AtomicRMWInst::Xor: | ||||
1419 | return C->isZero(); | ||||
1420 | case AtomicRMWInst::And: | ||||
1421 | return C->isMinusOne(); | ||||
1422 | // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... | ||||
1423 | default: | ||||
1424 | return false; | ||||
1425 | } | ||||
1426 | } | ||||
1427 | |||||
1428 | bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { | ||||
1429 | if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { | ||||
1430 | tryExpandAtomicLoad(ResultingLoad); | ||||
1431 | return true; | ||||
1432 | } | ||||
1433 | return false; | ||||
1434 | } | ||||
1435 | |||||
1436 | Value *AtomicExpand::insertRMWCmpXchgLoop( | ||||
1437 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | ||||
1438 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | ||||
1439 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | ||||
1440 | CreateCmpXchgInstFun CreateCmpXchg) { | ||||
1441 | LLVMContext &Ctx = Builder.getContext(); | ||||
1442 | BasicBlock *BB = Builder.GetInsertBlock(); | ||||
1443 | Function *F = BB->getParent(); | ||||
1444 | |||||
1445 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | ||||
1446 | // | ||||
1447 | // The standard expansion we produce is: | ||||
1448 | // [...] | ||||
1449 | // %init_loaded = load atomic iN* %addr | ||||
1450 | // br label %loop | ||||
1451 | // loop: | ||||
1452 | // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] | ||||
1453 | // %new = some_op iN %loaded, %incr | ||||
1454 | // %pair = cmpxchg iN* %addr, iN %loaded, iN %new | ||||
1455 | // %new_loaded = extractvalue { iN, i1 } %pair, 0 | ||||
1456 | // %success = extractvalue { iN, i1 } %pair, 1 | ||||
1457 | // br i1 %success, label %atomicrmw.end, label %loop | ||||
1458 | // atomicrmw.end: | ||||
1459 | // [...] | ||||
1460 | BasicBlock *ExitBB = | ||||
1461 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | ||||
1462 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | ||||
1463 | |||||
1464 | // The split call above "helpfully" added a branch at the end of BB (to the | ||||
1465 | // wrong place), but we want a load. It's easiest to just remove | ||||
1466 | // the branch entirely. | ||||
1467 | std::prev(BB->end())->eraseFromParent(); | ||||
1468 | Builder.SetInsertPoint(BB); | ||||
1469 | LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); | ||||
1470 | Builder.CreateBr(LoopBB); | ||||
1471 | |||||
1472 | // Start the main loop block now that we've taken care of the preliminaries. | ||||
1473 | Builder.SetInsertPoint(LoopBB); | ||||
1474 | PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); | ||||
1475 | Loaded->addIncoming(InitLoaded, BB); | ||||
1476 | |||||
1477 | Value *NewVal = PerformOp(Builder, Loaded); | ||||
1478 | |||||
1479 | Value *NewLoaded = nullptr; | ||||
1480 | Value *Success = nullptr; | ||||
1481 | |||||
1482 | CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, | ||||
1483 | MemOpOrder == AtomicOrdering::Unordered | ||||
1484 | ? AtomicOrdering::Monotonic | ||||
1485 | : MemOpOrder, | ||||
1486 | SSID, Success, NewLoaded); | ||||
1487 | assert(Success && NewLoaded)((void)0); | ||||
1488 | |||||
1489 | Loaded->addIncoming(NewLoaded, LoopBB); | ||||
1490 | |||||
1491 | Builder.CreateCondBr(Success, ExitBB, LoopBB); | ||||
1492 | |||||
1493 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | ||||
1494 | return NewLoaded; | ||||
1495 | } | ||||
1496 | |||||
1497 | bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | ||||
1498 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | ||||
1499 | unsigned ValueSize = getAtomicOpSize(CI); | ||||
1500 | |||||
1501 | switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { | ||||
1502 | default: | ||||
1503 | llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg")__builtin_unreachable(); | ||||
1504 | case TargetLoweringBase::AtomicExpansionKind::None: | ||||
1505 | if (ValueSize < MinCASSize) | ||||
1506 | return expandPartwordCmpXchg(CI); | ||||
1507 | return false; | ||||
1508 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | ||||
1509 | return expandAtomicCmpXchg(CI); | ||||
1510 | } | ||||
1511 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: | ||||
1512 | expandAtomicCmpXchgToMaskedIntrinsic(CI); | ||||
1513 | return true; | ||||
1514 | } | ||||
1515 | } | ||||
1516 | |||||
1517 | // Note: This function is exposed externally by AtomicExpandUtils.h | ||||
1518 | bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | ||||
1519 | CreateCmpXchgInstFun CreateCmpXchg) { | ||||
1520 | IRBuilder<> Builder(AI); | ||||
1521 | Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( | ||||
1522 | Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), | ||||
1523 | AI->getOrdering(), AI->getSyncScopeID(), | ||||
1524 | [&](IRBuilder<> &Builder, Value *Loaded) { | ||||
1525 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | ||||
1526 | AI->getValOperand()); | ||||
1527 | }, | ||||
1528 | CreateCmpXchg); | ||||
1529 | |||||
1530 | AI->replaceAllUsesWith(Loaded); | ||||
1531 | AI->eraseFromParent(); | ||||
1532 | return true; | ||||
1533 | } | ||||
1534 | |||||
1535 | // In order to use one of the sized library calls such as | ||||
1536 | // __atomic_fetch_add_4, the alignment must be sufficient, the size | ||||
1537 | // must be one of the potentially-specialized sizes, and the value | ||||
1538 | // type must actually exist in C on the target (otherwise, the | ||||
1539 | // function wouldn't actually be defined.) | ||||
1540 | static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, | ||||
1541 | const DataLayout &DL) { | ||||
1542 | // TODO: "LargestSize" is an approximation for "largest type that | ||||
1543 | // you can express in C". It seems to be the case that int128 is | ||||
1544 | // supported on all 64-bit platforms, otherwise only up to 64-bit | ||||
1545 | // integers are supported. If we get this wrong, then we'll try to | ||||
1546 | // call a sized libcall that doesn't actually exist. There should | ||||
1547 | // really be some more reliable way in LLVM of determining integer | ||||
1548 | // sizes which are valid in the target's C ABI... | ||||
1549 | unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; | ||||
1550 | return Alignment >= Size && | ||||
1551 | (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && | ||||
1552 | Size <= LargestSize; | ||||
1553 | } | ||||
1554 | |||||
1555 | void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { | ||||
1556 | static const RTLIB::Libcall Libcalls[6] = { | ||||
1557 | RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, | ||||
1558 | RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; | ||||
1559 | unsigned Size = getAtomicOpSize(I); | ||||
1560 | |||||
1561 | bool expanded = expandAtomicOpToLibcall( | ||||
1562 | I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, | ||||
1563 | I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | ||||
1564 | if (!expanded) | ||||
1565 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); | ||||
1566 | } | ||||
1567 | |||||
1568 | void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { | ||||
1569 | static const RTLIB::Libcall Libcalls[6] = { | ||||
1570 | RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, | ||||
1571 | RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; | ||||
1572 | unsigned Size = getAtomicOpSize(I); | ||||
1573 | |||||
1574 | bool expanded = expandAtomicOpToLibcall( | ||||
1575 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), | ||||
1576 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | ||||
1577 | if (!expanded) | ||||
1578 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); | ||||
1579 | } | ||||
1580 | |||||
1581 | void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { | ||||
1582 | static const RTLIB::Libcall Libcalls[6] = { | ||||
1583 | RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, | ||||
1584 | RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, | ||||
1585 | RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; | ||||
1586 | unsigned Size = getAtomicOpSize(I); | ||||
1587 | |||||
1588 | bool expanded = expandAtomicOpToLibcall( | ||||
1589 | I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), | ||||
1590 | I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), | ||||
1591 | Libcalls); | ||||
1592 | if (!expanded) | ||||
1593 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); | ||||
1594 | } | ||||
1595 | |||||
1596 | static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { | ||||
1597 | static const RTLIB::Libcall LibcallsXchg[6] = { | ||||
1598 | RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, | ||||
1599 | RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, | ||||
1600 | RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; | ||||
1601 | static const RTLIB::Libcall LibcallsAdd[6] = { | ||||
1602 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, | ||||
1603 | RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, | ||||
1604 | RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; | ||||
1605 | static const RTLIB::Libcall LibcallsSub[6] = { | ||||
1606 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, | ||||
1607 | RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, | ||||
1608 | RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; | ||||
1609 | static const RTLIB::Libcall LibcallsAnd[6] = { | ||||
1610 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, | ||||
1611 | RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, | ||||
1612 | RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; | ||||
1613 | static const RTLIB::Libcall LibcallsOr[6] = { | ||||
1614 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, | ||||
1615 | RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, | ||||
1616 | RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; | ||||
1617 | static const RTLIB::Libcall LibcallsXor[6] = { | ||||
1618 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, | ||||
1619 | RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, | ||||
1620 | RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; | ||||
1621 | static const RTLIB::Libcall LibcallsNand[6] = { | ||||
1622 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, | ||||
1623 | RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, | ||||
1624 | RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; | ||||
1625 | |||||
1626 | switch (Op) { | ||||
1627 | case AtomicRMWInst::BAD_BINOP: | ||||
1628 | llvm_unreachable("Should not have BAD_BINOP.")__builtin_unreachable(); | ||||
1629 | case AtomicRMWInst::Xchg: | ||||
1630 | return makeArrayRef(LibcallsXchg); | ||||
1631 | case AtomicRMWInst::Add: | ||||
1632 | return makeArrayRef(LibcallsAdd); | ||||
1633 | case AtomicRMWInst::Sub: | ||||
1634 | return makeArrayRef(LibcallsSub); | ||||
1635 | case AtomicRMWInst::And: | ||||
1636 | return makeArrayRef(LibcallsAnd); | ||||
1637 | case AtomicRMWInst::Or: | ||||
1638 | return makeArrayRef(LibcallsOr); | ||||
1639 | case AtomicRMWInst::Xor: | ||||
1640 | return makeArrayRef(LibcallsXor); | ||||
1641 | case AtomicRMWInst::Nand: | ||||
1642 | return makeArrayRef(LibcallsNand); | ||||
1643 | case AtomicRMWInst::Max: | ||||
1644 | case AtomicRMWInst::Min: | ||||
1645 | case AtomicRMWInst::UMax: | ||||
1646 | case AtomicRMWInst::UMin: | ||||
1647 | case AtomicRMWInst::FAdd: | ||||
1648 | case AtomicRMWInst::FSub: | ||||
1649 | // No atomic libcalls are available for max/min/umax/umin. | ||||
1650 | return {}; | ||||
1651 | } | ||||
1652 | llvm_unreachable("Unexpected AtomicRMW operation.")__builtin_unreachable(); | ||||
1653 | } | ||||
1654 | |||||
1655 | void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { | ||||
1656 | ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); | ||||
1657 | |||||
1658 | unsigned Size = getAtomicOpSize(I); | ||||
1659 | |||||
1660 | bool Success = false; | ||||
1661 | if (!Libcalls.empty()) | ||||
1662 | Success = expandAtomicOpToLibcall( | ||||
1663 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), | ||||
1664 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | ||||
1665 | |||||
1666 | // The expansion failed: either there were no libcalls at all for | ||||
1667 | // the operation (min/max), or there were only size-specialized | ||||
1668 | // libcalls (add/sub/etc) and we needed a generic. So, expand to a | ||||
1669 | // CAS libcall, via a CAS loop, instead. | ||||
1670 | if (!Success
| ||||
1671 | expandAtomicRMWToCmpXchg( | ||||
1672 | I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, | ||||
1673 | Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, | ||||
1674 | SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { | ||||
1675 | // Create the CAS instruction normally... | ||||
1676 | AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( | ||||
1677 | Addr, Loaded, NewVal, Alignment, MemOpOrder, | ||||
1678 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | ||||
1679 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | ||||
1680 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | ||||
1681 | |||||
1682 | // ...and then expand the CAS into a libcall. | ||||
1683 | expandAtomicCASToLibcall(Pair); | ||||
1684 | }); | ||||
1685 | } | ||||
1686 | } | ||||
1687 | |||||
1688 | // A helper routine for the above expandAtomic*ToLibcall functions. | ||||
1689 | // | ||||
1690 | // 'Libcalls' contains an array of enum values for the particular | ||||
1691 | // ATOMIC libcalls to be emitted. All of the other arguments besides | ||||
1692 | // 'I' are extracted from the Instruction subclass by the | ||||
1693 | // caller. Depending on the particular call, some will be null. | ||||
1694 | bool AtomicExpand::expandAtomicOpToLibcall( | ||||
1695 | Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, | ||||
1696 | Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, | ||||
1697 | AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { | ||||
1698 | assert(Libcalls.size() == 6)((void)0); | ||||
1699 | |||||
1700 | LLVMContext &Ctx = I->getContext(); | ||||
1701 | Module *M = I->getModule(); | ||||
1702 | const DataLayout &DL = M->getDataLayout(); | ||||
1703 | IRBuilder<> Builder(I); | ||||
1704 | IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); | ||||
1705 | |||||
1706 | bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); | ||||
1707 | Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); | ||||
1708 | |||||
1709 | const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); | ||||
1710 | |||||
1711 | // TODO: the "order" argument type is "int", not int32. So | ||||
1712 | // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. | ||||
1713 | ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); | ||||
1714 | assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | ||||
1715 | Constant *OrderingVal = | ||||
1716 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); | ||||
1717 | Constant *Ordering2Val = nullptr; | ||||
1718 | if (CASExpected
| ||||
1719 | assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | ||||
1720 | Ordering2Val = | ||||
1721 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); | ||||
1722 | } | ||||
1723 | bool HasResult = I->getType() != Type::getVoidTy(Ctx); | ||||
1724 | |||||
1725 | RTLIB::Libcall RTLibType; | ||||
1726 | if (UseSizedLibcall) { | ||||
1727 | switch (Size) { | ||||
1728 | case 1: RTLibType = Libcalls[1]; break; | ||||
1729 | case 2: RTLibType = Libcalls[2]; break; | ||||
1730 | case 4: RTLibType = Libcalls[3]; break; | ||||
1731 | case 8: RTLibType = Libcalls[4]; break; | ||||
1732 | case 16: RTLibType = Libcalls[5]; break; | ||||
1733 | } | ||||
1734 | } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { | ||||
1735 | RTLibType = Libcalls[0]; | ||||
1736 | } else { | ||||
1737 | // Can't use sized function, and there's no generic for this | ||||
1738 | // operation, so give up. | ||||
1739 | return false; | ||||
1740 | } | ||||
1741 | |||||
1742 | if (!TLI->getLibcallName(RTLibType)) { | ||||
| |||||
1743 | // This target does not implement the requested atomic libcall so give up. | ||||
1744 | return false; | ||||
1745 | } | ||||
1746 | |||||
1747 | // Build up the function call. There's two kinds. First, the sized | ||||
1748 | // variants. These calls are going to be one of the following (with | ||||
1749 | // N=1,2,4,8,16): | ||||
1750 | // iN __atomic_load_N(iN *ptr, int ordering) | ||||
1751 | // void __atomic_store_N(iN *ptr, iN val, int ordering) | ||||
1752 | // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) | ||||
1753 | // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, | ||||
1754 | // int success_order, int failure_order) | ||||
1755 | // | ||||
1756 | // Note that these functions can be used for non-integer atomic | ||||
1757 | // operations, the values just need to be bitcast to integers on the | ||||
1758 | // way in and out. | ||||
1759 | // | ||||
1760 | // And, then, the generic variants. They look like the following: | ||||
1761 | // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) | ||||
1762 | // void __atomic_store(size_t size, void *ptr, void *val, int ordering) | ||||
1763 | // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, | ||||
1764 | // int ordering) | ||||
1765 | // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, | ||||
1766 | // void *desired, int success_order, | ||||
1767 | // int failure_order) | ||||
1768 | // | ||||
1769 | // The different signatures are built up depending on the | ||||
1770 | // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' | ||||
1771 | // variables. | ||||
1772 | |||||
1773 | AllocaInst *AllocaCASExpected = nullptr; | ||||
1774 | Value *AllocaCASExpected_i8 = nullptr; | ||||
1775 | AllocaInst *AllocaValue = nullptr; | ||||
1776 | Value *AllocaValue_i8 = nullptr; | ||||
1777 | AllocaInst *AllocaResult = nullptr; | ||||
1778 | Value *AllocaResult_i8 = nullptr; | ||||
1779 | |||||
1780 | Type *ResultTy; | ||||
1781 | SmallVector<Value *, 6> Args; | ||||
1782 | AttributeList Attr; | ||||
1783 | |||||
1784 | // 'size' argument. | ||||
1785 | if (!UseSizedLibcall) { | ||||
1786 | // Note, getIntPtrType is assumed equivalent to size_t. | ||||
1787 | Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); | ||||
1788 | } | ||||
1789 | |||||
1790 | // 'ptr' argument. | ||||
1791 | // note: This assumes all address spaces share a common libfunc | ||||
1792 | // implementation and that addresses are convertable. For systems without | ||||
1793 | // that property, we'd need to extend this mechanism to support AS-specific | ||||
1794 | // families of atomic intrinsics. | ||||
1795 | auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); | ||||
1796 | Value *PtrVal = Builder.CreateBitCast(PointerOperand, | ||||
1797 | Type::getInt8PtrTy(Ctx, PtrTypeAS)); | ||||
1798 | PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); | ||||
1799 | Args.push_back(PtrVal); | ||||
1800 | |||||
1801 | // 'expected' argument, if present. | ||||
1802 | if (CASExpected) { | ||||
1803 | AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); | ||||
1804 | AllocaCASExpected->setAlignment(AllocaAlignment); | ||||
1805 | unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); | ||||
1806 | |||||
1807 | AllocaCASExpected_i8 = | ||||
1808 | Builder.CreateBitCast(AllocaCASExpected, | ||||
1809 | Type::getInt8PtrTy(Ctx, AllocaAS)); | ||||
1810 | Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); | ||||
1811 | Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); | ||||
1812 | Args.push_back(AllocaCASExpected_i8); | ||||
1813 | } | ||||
1814 | |||||
1815 | // 'val' argument ('desired' for cas), if present. | ||||
1816 | if (ValueOperand) { | ||||
1817 | if (UseSizedLibcall) { | ||||
1818 | Value *IntValue = | ||||
1819 | Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); | ||||
1820 | Args.push_back(IntValue); | ||||
1821 | } else { | ||||
1822 | AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); | ||||
1823 | AllocaValue->setAlignment(AllocaAlignment); | ||||
1824 | AllocaValue_i8 = | ||||
1825 | Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); | ||||
1826 | Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); | ||||
1827 | Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); | ||||
1828 | Args.push_back(AllocaValue_i8); | ||||
1829 | } | ||||
1830 | } | ||||
1831 | |||||
1832 | // 'ret' argument. | ||||
1833 | if (!CASExpected && HasResult && !UseSizedLibcall) { | ||||
1834 | AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); | ||||
1835 | AllocaResult->setAlignment(AllocaAlignment); | ||||
1836 | unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); | ||||
1837 | AllocaResult_i8 = | ||||
1838 | Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); | ||||
1839 | Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); | ||||
1840 | Args.push_back(AllocaResult_i8); | ||||
1841 | } | ||||
1842 | |||||
1843 | // 'ordering' ('success_order' for cas) argument. | ||||
1844 | Args.push_back(OrderingVal); | ||||
1845 | |||||
1846 | // 'failure_order' argument, if present. | ||||
1847 | if (Ordering2Val) | ||||
1848 | Args.push_back(Ordering2Val); | ||||
1849 | |||||
1850 | // Now, the return type. | ||||
1851 | if (CASExpected) { | ||||
1852 | ResultTy = Type::getInt1Ty(Ctx); | ||||
1853 | Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); | ||||
1854 | } else if (HasResult && UseSizedLibcall) | ||||
1855 | ResultTy = SizedIntTy; | ||||
1856 | else | ||||
1857 | ResultTy = Type::getVoidTy(Ctx); | ||||
1858 | |||||
1859 | // Done with setting up arguments and return types, create the call: | ||||
1860 | SmallVector<Type *, 6> ArgTys; | ||||
1861 | for (Value *Arg : Args) | ||||
1862 | ArgTys.push_back(Arg->getType()); | ||||
1863 | FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); | ||||
1864 | FunctionCallee LibcallFn = | ||||
1865 | M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); | ||||
1866 | CallInst *Call = Builder.CreateCall(LibcallFn, Args); | ||||
1867 | Call->setAttributes(Attr); | ||||
1868 | Value *Result = Call; | ||||
1869 | |||||
1870 | // And then, extract the results... | ||||
1871 | if (ValueOperand && !UseSizedLibcall) | ||||
1872 | Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); | ||||
1873 | |||||
1874 | if (CASExpected) { | ||||
1875 | // The final result from the CAS is {load of 'expected' alloca, bool result | ||||
1876 | // from call} | ||||
1877 | Type *FinalResultTy = I->getType(); | ||||
1878 | Value *V = UndefValue::get(FinalResultTy); | ||||
1879 | Value *ExpectedOut = Builder.CreateAlignedLoad( | ||||
1880 | CASExpected->getType(), AllocaCASExpected, AllocaAlignment); | ||||
1881 | Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); | ||||
1882 | V = Builder.CreateInsertValue(V, ExpectedOut, 0); | ||||
1883 | V = Builder.CreateInsertValue(V, Result, 1); | ||||
1884 | I->replaceAllUsesWith(V); | ||||
1885 | } else if (HasResult) { | ||||
1886 | Value *V; | ||||
1887 | if (UseSizedLibcall) | ||||
1888 | V = Builder.CreateBitOrPointerCast(Result, I->getType()); | ||||
1889 | else { | ||||
1890 | V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, | ||||
1891 | AllocaAlignment); | ||||
1892 | Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); | ||||
1893 | } | ||||
1894 | I->replaceAllUsesWith(V); | ||||
1895 | } | ||||
1896 | I->eraseFromParent(); | ||||
1897 | return true; | ||||
1898 | } |
1 | //===- llvm/ADT/STLExtras.h - Useful STL related functions ------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains some templates that are useful if you are working with the |
10 | // STL at all. |
11 | // |
12 | // No library is required when using these functions. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_ADT_STLEXTRAS_H |
17 | #define LLVM_ADT_STLEXTRAS_H |
18 | |
19 | #include "llvm/ADT/Optional.h" |
20 | #include "llvm/ADT/STLForwardCompat.h" |
21 | #include "llvm/ADT/iterator.h" |
22 | #include "llvm/ADT/iterator_range.h" |
23 | #include "llvm/Config/abi-breaking.h" |
24 | #include "llvm/Support/ErrorHandling.h" |
25 | #include <algorithm> |
26 | #include <cassert> |
27 | #include <cstddef> |
28 | #include <cstdint> |
29 | #include <cstdlib> |
30 | #include <functional> |
31 | #include <initializer_list> |
32 | #include <iterator> |
33 | #include <limits> |
34 | #include <memory> |
35 | #include <tuple> |
36 | #include <type_traits> |
37 | #include <utility> |
38 | |
39 | #ifdef EXPENSIVE_CHECKS |
40 | #include <random> // for std::mt19937 |
41 | #endif |
42 | |
43 | namespace llvm { |
44 | |
45 | // Only used by compiler if both template types are the same. Useful when |
46 | // using SFINAE to test for the existence of member functions. |
47 | template <typename T, T> struct SameType; |
48 | |
49 | namespace detail { |
50 | |
51 | template <typename RangeT> |
52 | using IterOfRange = decltype(std::begin(std::declval<RangeT &>())); |
53 | |
54 | template <typename RangeT> |
55 | using ValueOfRange = typename std::remove_reference<decltype( |
56 | *std::begin(std::declval<RangeT &>()))>::type; |
57 | |
58 | } // end namespace detail |
59 | |
60 | //===----------------------------------------------------------------------===// |
61 | // Extra additions to <type_traits> |
62 | //===----------------------------------------------------------------------===// |
63 | |
64 | template <typename T> struct make_const_ptr { |
65 | using type = |
66 | typename std::add_pointer<typename std::add_const<T>::type>::type; |
67 | }; |
68 | |
69 | template <typename T> struct make_const_ref { |
70 | using type = typename std::add_lvalue_reference< |
71 | typename std::add_const<T>::type>::type; |
72 | }; |
73 | |
74 | namespace detail { |
75 | template <typename...> using void_t = void; |
76 | template <class, template <class...> class Op, class... Args> struct detector { |
77 | using value_t = std::false_type; |
78 | }; |
79 | template <template <class...> class Op, class... Args> |
80 | struct detector<void_t<Op<Args...>>, Op, Args...> { |
81 | using value_t = std::true_type; |
82 | }; |
83 | } // end namespace detail |
84 | |
85 | /// Detects if a given trait holds for some set of arguments 'Args'. |
86 | /// For example, the given trait could be used to detect if a given type |
87 | /// has a copy assignment operator: |
88 | /// template<class T> |
89 | /// using has_copy_assign_t = decltype(std::declval<T&>() |
90 | /// = std::declval<const T&>()); |
91 | /// bool fooHasCopyAssign = is_detected<has_copy_assign_t, FooClass>::value; |
92 | template <template <class...> class Op, class... Args> |
93 | using is_detected = typename detail::detector<void, Op, Args...>::value_t; |
94 | |
95 | namespace detail { |
96 | template <typename Callable, typename... Args> |
97 | using is_invocable = |
98 | decltype(std::declval<Callable &>()(std::declval<Args>()...)); |
99 | } // namespace detail |
100 | |
101 | /// Check if a Callable type can be invoked with the given set of arg types. |
102 | template <typename Callable, typename... Args> |
103 | using is_invocable = is_detected<detail::is_invocable, Callable, Args...>; |
104 | |
105 | /// This class provides various trait information about a callable object. |
106 | /// * To access the number of arguments: Traits::num_args |
107 | /// * To access the type of an argument: Traits::arg_t<Index> |
108 | /// * To access the type of the result: Traits::result_t |
109 | template <typename T, bool isClass = std::is_class<T>::value> |
110 | struct function_traits : public function_traits<decltype(&T::operator())> {}; |
111 | |
112 | /// Overload for class function types. |
113 | template <typename ClassType, typename ReturnType, typename... Args> |
114 | struct function_traits<ReturnType (ClassType::*)(Args...) const, false> { |
115 | /// The number of arguments to this function. |
116 | enum { num_args = sizeof...(Args) }; |
117 | |
118 | /// The result type of this function. |
119 | using result_t = ReturnType; |
120 | |
121 | /// The type of an argument to this function. |
122 | template <size_t Index> |
123 | using arg_t = typename std::tuple_element<Index, std::tuple<Args...>>::type; |
124 | }; |
125 | /// Overload for class function types. |
126 | template <typename ClassType, typename ReturnType, typename... Args> |
127 | struct function_traits<ReturnType (ClassType::*)(Args...), false> |
128 | : function_traits<ReturnType (ClassType::*)(Args...) const> {}; |
129 | /// Overload for non-class function types. |
130 | template <typename ReturnType, typename... Args> |
131 | struct function_traits<ReturnType (*)(Args...), false> { |
132 | /// The number of arguments to this function. |
133 | enum { num_args = sizeof...(Args) }; |
134 | |
135 | /// The result type of this function. |
136 | using result_t = ReturnType; |
137 | |
138 | /// The type of an argument to this function. |
139 | template <size_t i> |
140 | using arg_t = typename std::tuple_element<i, std::tuple<Args...>>::type; |
141 | }; |
142 | /// Overload for non-class function type references. |
143 | template <typename ReturnType, typename... Args> |
144 | struct function_traits<ReturnType (&)(Args...), false> |
145 | : public function_traits<ReturnType (*)(Args...)> {}; |
146 | |
147 | //===----------------------------------------------------------------------===// |
148 | // Extra additions to <functional> |
149 | //===----------------------------------------------------------------------===// |
150 | |
151 | template <class Ty> struct identity { |
152 | using argument_type = Ty; |
153 | |
154 | Ty &operator()(Ty &self) const { |
155 | return self; |
156 | } |
157 | const Ty &operator()(const Ty &self) const { |
158 | return self; |
159 | } |
160 | }; |
161 | |
162 | /// An efficient, type-erasing, non-owning reference to a callable. This is |
163 | /// intended for use as the type of a function parameter that is not used |
164 | /// after the function in question returns. |
165 | /// |
166 | /// This class does not own the callable, so it is not in general safe to store |
167 | /// a function_ref. |
168 | template<typename Fn> class function_ref; |
169 | |
170 | template<typename Ret, typename ...Params> |
171 | class function_ref<Ret(Params...)> { |
172 | Ret (*callback)(intptr_t callable, Params ...params) = nullptr; |
173 | intptr_t callable; |
174 | |
175 | template<typename Callable> |
176 | static Ret callback_fn(intptr_t callable, Params ...params) { |
177 | return (*reinterpret_cast<Callable*>(callable))( |
178 | std::forward<Params>(params)...); |
179 | } |
180 | |
181 | public: |
182 | function_ref() = default; |
183 | function_ref(std::nullptr_t) {} |
184 | |
185 | template <typename Callable> |
186 | function_ref( |
187 | Callable &&callable, |
188 | // This is not the copy-constructor. |
189 | std::enable_if_t<!std::is_same<remove_cvref_t<Callable>, |
190 | function_ref>::value> * = nullptr, |
191 | // Functor must be callable and return a suitable type. |
192 | std::enable_if_t<std::is_void<Ret>::value || |
193 | std::is_convertible<decltype(std::declval<Callable>()( |
194 | std::declval<Params>()...)), |
195 | Ret>::value> * = nullptr) |
196 | : callback(callback_fn<typename std::remove_reference<Callable>::type>), |
197 | callable(reinterpret_cast<intptr_t>(&callable)) {} |
198 | |
199 | Ret operator()(Params ...params) const { |
200 | return callback(callable, std::forward<Params>(params)...); |
201 | } |
202 | |
203 | explicit operator bool() const { return callback; } |
204 | }; |
205 | |
206 | //===----------------------------------------------------------------------===// |
207 | // Extra additions to <iterator> |
208 | //===----------------------------------------------------------------------===// |
209 | |
210 | namespace adl_detail { |
211 | |
212 | using std::begin; |
213 | |
214 | template <typename ContainerTy> |
215 | decltype(auto) adl_begin(ContainerTy &&container) { |
216 | return begin(std::forward<ContainerTy>(container)); |
217 | } |
218 | |
219 | using std::end; |
220 | |
221 | template <typename ContainerTy> |
222 | decltype(auto) adl_end(ContainerTy &&container) { |
223 | return end(std::forward<ContainerTy>(container)); |
224 | } |
225 | |
226 | using std::swap; |
227 | |
228 | template <typename T> |
229 | void adl_swap(T &&lhs, T &&rhs) noexcept(noexcept(swap(std::declval<T>(), |
230 | std::declval<T>()))) { |
231 | swap(std::forward<T>(lhs), std::forward<T>(rhs)); |
232 | } |
233 | |
234 | } // end namespace adl_detail |
235 | |
236 | template <typename ContainerTy> |
237 | decltype(auto) adl_begin(ContainerTy &&container) { |
238 | return adl_detail::adl_begin(std::forward<ContainerTy>(container)); |
239 | } |
240 | |
241 | template <typename ContainerTy> |
242 | decltype(auto) adl_end(ContainerTy &&container) { |
243 | return adl_detail::adl_end(std::forward<ContainerTy>(container)); |
244 | } |
245 | |
246 | template <typename T> |
247 | void adl_swap(T &&lhs, T &&rhs) noexcept( |
248 | noexcept(adl_detail::adl_swap(std::declval<T>(), std::declval<T>()))) { |
249 | adl_detail::adl_swap(std::forward<T>(lhs), std::forward<T>(rhs)); |
250 | } |
251 | |
252 | /// Test whether \p RangeOrContainer is empty. Similar to C++17 std::empty. |
253 | template <typename T> |
254 | constexpr bool empty(const T &RangeOrContainer) { |
255 | return adl_begin(RangeOrContainer) == adl_end(RangeOrContainer); |
256 | } |
257 | |
258 | /// Returns true if the given container only contains a single element. |
259 | template <typename ContainerTy> bool hasSingleElement(ContainerTy &&C) { |
260 | auto B = std::begin(C), E = std::end(C); |
261 | return B != E && std::next(B) == E; |
262 | } |
263 | |
264 | /// Return a range covering \p RangeOrContainer with the first N elements |
265 | /// excluded. |
266 | template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) { |
267 | return make_range(std::next(adl_begin(RangeOrContainer), N), |
268 | adl_end(RangeOrContainer)); |
269 | } |
270 | |
271 | // mapped_iterator - This is a simple iterator adapter that causes a function to |
272 | // be applied whenever operator* is invoked on the iterator. |
273 | |
274 | template <typename ItTy, typename FuncTy, |
275 | typename FuncReturnTy = |
276 | decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))> |
277 | class mapped_iterator |
278 | : public iterator_adaptor_base< |
279 | mapped_iterator<ItTy, FuncTy>, ItTy, |
280 | typename std::iterator_traits<ItTy>::iterator_category, |
281 | typename std::remove_reference<FuncReturnTy>::type> { |
282 | public: |
283 | mapped_iterator(ItTy U, FuncTy F) |
284 | : mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {} |
285 | |
286 | ItTy getCurrent() { return this->I; } |
287 | |
288 | FuncReturnTy operator*() const { return F(*this->I); } |
289 | |
290 | private: |
291 | FuncTy F; |
292 | }; |
293 | |
294 | // map_iterator - Provide a convenient way to create mapped_iterators, just like |
295 | // make_pair is useful for creating pairs... |
296 | template <class ItTy, class FuncTy> |
297 | inline mapped_iterator<ItTy, FuncTy> map_iterator(ItTy I, FuncTy F) { |
298 | return mapped_iterator<ItTy, FuncTy>(std::move(I), std::move(F)); |
299 | } |
300 | |
301 | template <class ContainerTy, class FuncTy> |
302 | auto map_range(ContainerTy &&C, FuncTy F) { |
303 | return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F)); |
304 | } |
305 | |
306 | /// Helper to determine if type T has a member called rbegin(). |
307 | template <typename Ty> class has_rbegin_impl { |
308 | using yes = char[1]; |
309 | using no = char[2]; |
310 | |
311 | template <typename Inner> |
312 | static yes& test(Inner *I, decltype(I->rbegin()) * = nullptr); |
313 | |
314 | template <typename> |
315 | static no& test(...); |
316 | |
317 | public: |
318 | static const bool value = sizeof(test<Ty>(nullptr)) == sizeof(yes); |
319 | }; |
320 | |
321 | /// Metafunction to determine if T& or T has a member called rbegin(). |
322 | template <typename Ty> |
323 | struct has_rbegin : has_rbegin_impl<typename std::remove_reference<Ty>::type> { |
324 | }; |
325 | |
326 | // Returns an iterator_range over the given container which iterates in reverse. |
327 | // Note that the container must have rbegin()/rend() methods for this to work. |
328 | template <typename ContainerTy> |
329 | auto reverse(ContainerTy &&C, |
330 | std::enable_if_t<has_rbegin<ContainerTy>::value> * = nullptr) { |
331 | return make_range(C.rbegin(), C.rend()); |
332 | } |
333 | |
334 | // Returns a std::reverse_iterator wrapped around the given iterator. |
335 | template <typename IteratorTy> |
336 | std::reverse_iterator<IteratorTy> make_reverse_iterator(IteratorTy It) { |
337 | return std::reverse_iterator<IteratorTy>(It); |
338 | } |
339 | |
340 | // Returns an iterator_range over the given container which iterates in reverse. |
341 | // Note that the container must have begin()/end() methods which return |
342 | // bidirectional iterators for this to work. |
343 | template <typename ContainerTy> |
344 | auto reverse(ContainerTy &&C, |
345 | std::enable_if_t<!has_rbegin<ContainerTy>::value> * = nullptr) { |
346 | return make_range(llvm::make_reverse_iterator(std::end(C)), |
347 | llvm::make_reverse_iterator(std::begin(C))); |
348 | } |
349 | |
350 | /// An iterator adaptor that filters the elements of given inner iterators. |
351 | /// |
352 | /// The predicate parameter should be a callable object that accepts the wrapped |
353 | /// iterator's reference type and returns a bool. When incrementing or |
354 | /// decrementing the iterator, it will call the predicate on each element and |
355 | /// skip any where it returns false. |
356 | /// |
357 | /// \code |
358 | /// int A[] = { 1, 2, 3, 4 }; |
359 | /// auto R = make_filter_range(A, [](int N) { return N % 2 == 1; }); |
360 | /// // R contains { 1, 3 }. |
361 | /// \endcode |
362 | /// |
363 | /// Note: filter_iterator_base implements support for forward iteration. |
364 | /// filter_iterator_impl exists to provide support for bidirectional iteration, |
365 | /// conditional on whether the wrapped iterator supports it. |
366 | template <typename WrappedIteratorT, typename PredicateT, typename IterTag> |
367 | class filter_iterator_base |
368 | : public iterator_adaptor_base< |
369 | filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>, |
370 | WrappedIteratorT, |
371 | typename std::common_type< |
372 | IterTag, typename std::iterator_traits< |
373 | WrappedIteratorT>::iterator_category>::type> { |
374 | using BaseT = iterator_adaptor_base< |
375 | filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>, |
376 | WrappedIteratorT, |
377 | typename std::common_type< |
378 | IterTag, typename std::iterator_traits< |
379 | WrappedIteratorT>::iterator_category>::type>; |
380 | |
381 | protected: |
382 | WrappedIteratorT End; |
383 | PredicateT Pred; |
384 | |
385 | void findNextValid() { |
386 | while (this->I != End && !Pred(*this->I)) |
387 | BaseT::operator++(); |
388 | } |
389 | |
390 | // Construct the iterator. The begin iterator needs to know where the end |
391 | // is, so that it can properly stop when it gets there. The end iterator only |
392 | // needs the predicate to support bidirectional iteration. |
393 | filter_iterator_base(WrappedIteratorT Begin, WrappedIteratorT End, |
394 | PredicateT Pred) |
395 | : BaseT(Begin), End(End), Pred(Pred) { |
396 | findNextValid(); |
397 | } |
398 | |
399 | public: |
400 | using BaseT::operator++; |
401 | |
402 | filter_iterator_base &operator++() { |
403 | BaseT::operator++(); |
404 | findNextValid(); |
405 | return *this; |
406 | } |
407 | }; |
408 | |
409 | /// Specialization of filter_iterator_base for forward iteration only. |
410 | template <typename WrappedIteratorT, typename PredicateT, |
411 | typename IterTag = std::forward_iterator_tag> |
412 | class filter_iterator_impl |
413 | : public filter_iterator_base<WrappedIteratorT, PredicateT, IterTag> { |
414 | using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>; |
415 | |
416 | public: |
417 | filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End, |
418 | PredicateT Pred) |
419 | : BaseT(Begin, End, Pred) {} |
420 | }; |
421 | |
422 | /// Specialization of filter_iterator_base for bidirectional iteration. |
423 | template <typename WrappedIteratorT, typename PredicateT> |
424 | class filter_iterator_impl<WrappedIteratorT, PredicateT, |
425 | std::bidirectional_iterator_tag> |
426 | : public filter_iterator_base<WrappedIteratorT, PredicateT, |
427 | std::bidirectional_iterator_tag> { |
428 | using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, |
429 | std::bidirectional_iterator_tag>; |
430 | void findPrevValid() { |
431 | while (!this->Pred(*this->I)) |
432 | BaseT::operator--(); |
433 | } |
434 | |
435 | public: |
436 | using BaseT::operator--; |
437 | |
438 | filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End, |
439 | PredicateT Pred) |
440 | : BaseT(Begin, End, Pred) {} |
441 | |
442 | filter_iterator_impl &operator--() { |
443 | BaseT::operator--(); |
444 | findPrevValid(); |
445 | return *this; |
446 | } |
447 | }; |
448 | |
449 | namespace detail { |
450 | |
451 | template <bool is_bidirectional> struct fwd_or_bidi_tag_impl { |
452 | using type = std::forward_iterator_tag; |
453 | }; |
454 | |
455 | template <> struct fwd_or_bidi_tag_impl<true> { |
456 | using type = std::bidirectional_iterator_tag; |
457 | }; |
458 | |
459 | /// Helper which sets its type member to forward_iterator_tag if the category |
460 | /// of \p IterT does not derive from bidirectional_iterator_tag, and to |
461 | /// bidirectional_iterator_tag otherwise. |
462 | template <typename IterT> struct fwd_or_bidi_tag { |
463 | using type = typename fwd_or_bidi_tag_impl<std::is_base_of< |
464 | std::bidirectional_iterator_tag, |
465 | typename std::iterator_traits<IterT>::iterator_category>::value>::type; |
466 | }; |
467 | |
468 | } // namespace detail |
469 | |
470 | /// Defines filter_iterator to a suitable specialization of |
471 | /// filter_iterator_impl, based on the underlying iterator's category. |
472 | template <typename WrappedIteratorT, typename PredicateT> |
473 | using filter_iterator = filter_iterator_impl< |
474 | WrappedIteratorT, PredicateT, |
475 | typename detail::fwd_or_bidi_tag<WrappedIteratorT>::type>; |
476 | |
477 | /// Convenience function that takes a range of elements and a predicate, |
478 | /// and return a new filter_iterator range. |
479 | /// |
480 | /// FIXME: Currently if RangeT && is a rvalue reference to a temporary, the |
481 | /// lifetime of that temporary is not kept by the returned range object, and the |
482 | /// temporary is going to be dropped on the floor after the make_iterator_range |
483 | /// full expression that contains this function call. |
484 | template <typename RangeT, typename PredicateT> |
485 | iterator_range<filter_iterator<detail::IterOfRange<RangeT>, PredicateT>> |
486 | make_filter_range(RangeT &&Range, PredicateT Pred) { |
487 | using FilterIteratorT = |
488 | filter_iterator<detail::IterOfRange<RangeT>, PredicateT>; |
489 | return make_range( |
490 | FilterIteratorT(std::begin(std::forward<RangeT>(Range)), |
491 | std::end(std::forward<RangeT>(Range)), Pred), |
492 | FilterIteratorT(std::end(std::forward<RangeT>(Range)), |
493 | std::end(std::forward<RangeT>(Range)), Pred)); |
494 | } |
495 | |
496 | /// A pseudo-iterator adaptor that is designed to implement "early increment" |
497 | /// style loops. |
498 | /// |
499 | /// This is *not a normal iterator* and should almost never be used directly. It |
500 | /// is intended primarily to be used with range based for loops and some range |
501 | /// algorithms. |
502 | /// |
503 | /// The iterator isn't quite an `OutputIterator` or an `InputIterator` but |
504 | /// somewhere between them. The constraints of these iterators are: |
505 | /// |
506 | /// - On construction or after being incremented, it is comparable and |
507 | /// dereferencable. It is *not* incrementable. |
508 | /// - After being dereferenced, it is neither comparable nor dereferencable, it |
509 | /// is only incrementable. |
510 | /// |
511 | /// This means you can only dereference the iterator once, and you can only |
512 | /// increment it once between dereferences. |
513 | template <typename WrappedIteratorT> |
514 | class early_inc_iterator_impl |
515 | : public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>, |
516 | WrappedIteratorT, std::input_iterator_tag> { |
517 | using BaseT = |
518 | iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>, |
519 | WrappedIteratorT, std::input_iterator_tag>; |
520 | |
521 | using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer; |
522 | |
523 | protected: |
524 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS0 |
525 | bool IsEarlyIncremented = false; |
526 | #endif |
527 | |
528 | public: |
529 | early_inc_iterator_impl(WrappedIteratorT I) : BaseT(I) {} |
530 | |
531 | using BaseT::operator*; |
532 | decltype(*std::declval<WrappedIteratorT>()) operator*() { |
533 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS0 |
534 | assert(!IsEarlyIncremented && "Cannot dereference twice!")((void)0); |
535 | IsEarlyIncremented = true; |
536 | #endif |
537 | return *(this->I)++; |
538 | } |
539 | |
540 | using BaseT::operator++; |
541 | early_inc_iterator_impl &operator++() { |
542 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS0 |
543 | assert(IsEarlyIncremented && "Cannot increment before dereferencing!")((void)0); |
544 | IsEarlyIncremented = false; |
545 | #endif |
546 | return *this; |
547 | } |
548 | |
549 | friend bool operator==(const early_inc_iterator_impl &LHS, |
550 | const early_inc_iterator_impl &RHS) { |
551 | #if LLVM_ENABLE_ABI_BREAKING_CHECKS0 |
552 | assert(!LHS.IsEarlyIncremented && "Cannot compare after dereferencing!")((void)0); |
553 | #endif |
554 | return (const BaseT &)LHS == (const BaseT &)RHS; |
555 | } |
556 | }; |
557 | |
558 | /// Make a range that does early increment to allow mutation of the underlying |
559 | /// range without disrupting iteration. |
560 | /// |
561 | /// The underlying iterator will be incremented immediately after it is |
562 | /// dereferenced, allowing deletion of the current node or insertion of nodes to |
563 | /// not disrupt iteration provided they do not invalidate the *next* iterator -- |
564 | /// the current iterator can be invalidated. |
565 | /// |
566 | /// This requires a very exact pattern of use that is only really suitable to |
567 | /// range based for loops and other range algorithms that explicitly guarantee |
568 | /// to dereference exactly once each element, and to increment exactly once each |
569 | /// element. |
570 | template <typename RangeT> |
571 | iterator_range<early_inc_iterator_impl<detail::IterOfRange<RangeT>>> |
572 | make_early_inc_range(RangeT &&Range) { |
573 | using EarlyIncIteratorT = |
574 | early_inc_iterator_impl<detail::IterOfRange<RangeT>>; |
575 | return make_range(EarlyIncIteratorT(std::begin(std::forward<RangeT>(Range))), |
576 | EarlyIncIteratorT(std::end(std::forward<RangeT>(Range)))); |
577 | } |
578 | |
579 | // forward declarations required by zip_shortest/zip_first/zip_longest |
580 | template <typename R, typename UnaryPredicate> |
581 | bool all_of(R &&range, UnaryPredicate P); |
582 | template <typename R, typename UnaryPredicate> |
583 | bool any_of(R &&range, UnaryPredicate P); |
584 | |
585 | namespace detail { |
586 | |
587 | using std::declval; |
588 | |
589 | // We have to alias this since inlining the actual type at the usage site |
590 | // in the parameter list of iterator_facade_base<> below ICEs MSVC 2017. |
591 | template<typename... Iters> struct ZipTupleType { |
592 | using type = std::tuple<decltype(*declval<Iters>())...>; |
593 | }; |
594 | |
595 | template <typename ZipType, typename... Iters> |
596 | using zip_traits = iterator_facade_base< |
597 | ZipType, typename std::common_type<std::bidirectional_iterator_tag, |
598 | typename std::iterator_traits< |
599 | Iters>::iterator_category...>::type, |
600 | // ^ TODO: Implement random access methods. |
601 | typename ZipTupleType<Iters...>::type, |
602 | typename std::iterator_traits<typename std::tuple_element< |
603 | 0, std::tuple<Iters...>>::type>::difference_type, |
604 | // ^ FIXME: This follows boost::make_zip_iterator's assumption that all |
605 | // inner iterators have the same difference_type. It would fail if, for |
606 | // instance, the second field's difference_type were non-numeric while the |
607 | // first is. |
608 | typename ZipTupleType<Iters...>::type *, |
609 | typename ZipTupleType<Iters...>::type>; |
610 | |
611 | template <typename ZipType, typename... Iters> |
612 | struct zip_common : public zip_traits<ZipType, Iters...> { |
613 | using Base = zip_traits<ZipType, Iters...>; |
614 | using value_type = typename Base::value_type; |
615 | |
616 | std::tuple<Iters...> iterators; |
617 | |
618 | protected: |
619 | template <size_t... Ns> value_type deref(std::index_sequence<Ns...>) const { |
620 | return value_type(*std::get<Ns>(iterators)...); |
621 | } |
622 | |
623 | template <size_t... Ns> |
624 | decltype(iterators) tup_inc(std::index_sequence<Ns...>) const { |
625 | return std::tuple<Iters...>(std::next(std::get<Ns>(iterators))...); |
626 | } |
627 | |
628 | template <size_t... Ns> |
629 | decltype(iterators) tup_dec(std::index_sequence<Ns...>) const { |
630 | return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...); |
631 | } |
632 | |
633 | public: |
634 | zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {} |
635 | |
636 | value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); } |
637 | |
638 | const value_type operator*() const { |
639 | return deref(std::index_sequence_for<Iters...>{}); |
640 | } |
641 | |
642 | ZipType &operator++() { |
643 | iterators = tup_inc(std::index_sequence_for<Iters...>{}); |
644 | return *reinterpret_cast<ZipType *>(this); |
645 | } |
646 | |
647 | ZipType &operator--() { |
648 | static_assert(Base::IsBidirectional, |
649 | "All inner iterators must be at least bidirectional."); |
650 | iterators = tup_dec(std::index_sequence_for<Iters...>{}); |
651 | return *reinterpret_cast<ZipType *>(this); |
652 | } |
653 | }; |
654 | |
655 | template <typename... Iters> |
656 | struct zip_first : public zip_common<zip_first<Iters...>, Iters...> { |
657 | using Base = zip_common<zip_first<Iters...>, Iters...>; |
658 | |
659 | bool operator==(const zip_first<Iters...> &other) const { |
660 | return std::get<0>(this->iterators) == std::get<0>(other.iterators); |
661 | } |
662 | |
663 | zip_first(Iters &&... ts) : Base(std::forward<Iters>(ts)...) {} |
664 | }; |
665 | |
666 | template <typename... Iters> |
667 | class zip_shortest : public zip_common<zip_shortest<Iters...>, Iters...> { |
668 | template <size_t... Ns> |
669 | bool test(const zip_shortest<Iters...> &other, |
670 | std::index_sequence<Ns...>) const { |
671 | return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) != |
672 | std::get<Ns>(other.iterators)...}, |
673 | identity<bool>{}); |
674 | } |
675 | |
676 | public: |
677 | using Base = zip_common<zip_shortest<Iters...>, Iters...>; |
678 | |
679 | zip_shortest(Iters &&... ts) : Base(std::forward<Iters>(ts)...) {} |
680 | |
681 | bool operator==(const zip_shortest<Iters...> &other) const { |
682 | return !test(other, std::index_sequence_for<Iters...>{}); |
683 | } |
684 | }; |
685 | |
686 | template <template <typename...> class ItType, typename... Args> class zippy { |
687 | public: |
688 | using iterator = ItType<decltype(std::begin(std::declval<Args>()))...>; |
689 | using iterator_category = typename iterator::iterator_category; |
690 | using value_type = typename iterator::value_type; |
691 | using difference_type = typename iterator::difference_type; |
692 | using pointer = typename iterator::pointer; |
693 | using reference = typename iterator::reference; |
694 | |
695 | private: |
696 | std::tuple<Args...> ts; |
697 | |
698 | template <size_t... Ns> |
699 | iterator begin_impl(std::index_sequence<Ns...>) const { |
700 | return iterator(std::begin(std::get<Ns>(ts))...); |
701 | } |
702 | template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const { |
703 | return iterator(std::end(std::get<Ns>(ts))...); |
704 | } |
705 | |
706 | public: |
707 | zippy(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {} |
708 | |
709 | iterator begin() const { |
710 | return begin_impl(std::index_sequence_for<Args...>{}); |
711 | } |
712 | iterator end() const { return end_impl(std::index_sequence_for<Args...>{}); } |
713 | }; |
714 | |
715 | } // end namespace detail |
716 | |
717 | /// zip iterator for two or more iteratable types. |
718 | template <typename T, typename U, typename... Args> |
719 | detail::zippy<detail::zip_shortest, T, U, Args...> zip(T &&t, U &&u, |
720 | Args &&... args) { |
721 | return detail::zippy<detail::zip_shortest, T, U, Args...>( |
722 | std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...); |
723 | } |
724 | |
725 | /// zip iterator that, for the sake of efficiency, assumes the first iteratee to |
726 | /// be the shortest. |
727 | template <typename T, typename U, typename... Args> |
728 | detail::zippy<detail::zip_first, T, U, Args...> zip_first(T &&t, U &&u, |
729 | Args &&... args) { |
730 | return detail::zippy<detail::zip_first, T, U, Args...>( |
731 | std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...); |
732 | } |
733 | |
734 | namespace detail { |
735 | template <typename Iter> |
736 | Iter next_or_end(const Iter &I, const Iter &End) { |
737 | if (I == End) |
738 | return End; |
739 | return std::next(I); |
740 | } |
741 | |
742 | template <typename Iter> |
743 | auto deref_or_none(const Iter &I, const Iter &End) -> llvm::Optional< |
744 | std::remove_const_t<std::remove_reference_t<decltype(*I)>>> { |
745 | if (I == End) |
746 | return None; |
747 | return *I; |
748 | } |
749 | |
750 | template <typename Iter> struct ZipLongestItemType { |
751 | using type = |
752 | llvm::Optional<typename std::remove_const<typename std::remove_reference< |
753 | decltype(*std::declval<Iter>())>::type>::type>; |
754 | }; |
755 | |
756 | template <typename... Iters> struct ZipLongestTupleType { |
757 | using type = std::tuple<typename ZipLongestItemType<Iters>::type...>; |
758 | }; |
759 | |
760 | template <typename... Iters> |
761 | class zip_longest_iterator |
762 | : public iterator_facade_base< |
763 | zip_longest_iterator<Iters...>, |
764 | typename std::common_type< |
765 | std::forward_iterator_tag, |
766 | typename std::iterator_traits<Iters>::iterator_category...>::type, |
767 | typename ZipLongestTupleType<Iters...>::type, |
768 | typename std::iterator_traits<typename std::tuple_element< |
769 | 0, std::tuple<Iters...>>::type>::difference_type, |
770 | typename ZipLongestTupleType<Iters...>::type *, |
771 | typename ZipLongestTupleType<Iters...>::type> { |
772 | public: |
773 | using value_type = typename ZipLongestTupleType<Iters...>::type; |
774 | |
775 | private: |
776 | std::tuple<Iters...> iterators; |
777 | std::tuple<Iters...> end_iterators; |
778 | |
779 | template <size_t... Ns> |
780 | bool test(const zip_longest_iterator<Iters...> &other, |
781 | std::index_sequence<Ns...>) const { |
782 | return llvm::any_of( |
783 | std::initializer_list<bool>{std::get<Ns>(this->iterators) != |
784 | std::get<Ns>(other.iterators)...}, |
785 | identity<bool>{}); |
786 | } |
787 | |
788 | template <size_t... Ns> value_type deref(std::index_sequence<Ns...>) const { |
789 | return value_type( |
790 | deref_or_none(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...); |
791 | } |
792 | |
793 | template <size_t... Ns> |
794 | decltype(iterators) tup_inc(std::index_sequence<Ns...>) const { |
795 | return std::tuple<Iters...>( |
796 | next_or_end(std::get<Ns>(iterators), std::get<Ns>(end_iterators))...); |
797 | } |
798 | |
799 | public: |
800 | zip_longest_iterator(std::pair<Iters &&, Iters &&>... ts) |
801 | : iterators(std::forward<Iters>(ts.first)...), |
802 | end_iterators(std::forward<Iters>(ts.second)...) {} |
803 | |
804 | value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); } |
805 | |
806 | value_type operator*() const { |
807 | return deref(std::index_sequence_for<Iters...>{}); |
808 | } |
809 | |
810 | zip_longest_iterator<Iters...> &operator++() { |
811 | iterators = tup_inc(std::index_sequence_for<Iters...>{}); |
812 | return *this; |
813 | } |
814 | |
815 | bool operator==(const zip_longest_iterator<Iters...> &other) const { |
816 | return !test(other, std::index_sequence_for<Iters...>{}); |
817 | } |
818 | }; |
819 | |
820 | template <typename... Args> class zip_longest_range { |
821 | public: |
822 | using iterator = |
823 | zip_longest_iterator<decltype(adl_begin(std::declval<Args>()))...>; |
824 | using iterator_category = typename iterator::iterator_category; |
825 | using value_type = typename iterator::value_type; |
826 | using difference_type = typename iterator::difference_type; |
827 | using pointer = typename iterator::pointer; |
828 | using reference = typename iterator::reference; |
829 | |
830 | private: |
831 | std::tuple<Args...> ts; |
832 | |
833 | template <size_t... Ns> |
834 | iterator begin_impl(std::index_sequence<Ns...>) const { |
835 | return iterator(std::make_pair(adl_begin(std::get<Ns>(ts)), |
836 | adl_end(std::get<Ns>(ts)))...); |
837 | } |
838 | |
839 | template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const { |
840 | return iterator(std::make_pair(adl_end(std::get<Ns>(ts)), |
841 | adl_end(std::get<Ns>(ts)))...); |
842 | } |
843 | |
844 | public: |
845 | zip_longest_range(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {} |
846 | |
847 | iterator begin() const { |
848 | return begin_impl(std::index_sequence_for<Args...>{}); |
849 | } |
850 | iterator end() const { return end_impl(std::index_sequence_for<Args...>{}); } |
851 | }; |
852 | } // namespace detail |
853 | |
854 | /// Iterate over two or more iterators at the same time. Iteration continues |
855 | /// until all iterators reach the end. The llvm::Optional only contains a value |
856 | /// if the iterator has not reached the end. |
857 | template <typename T, typename U, typename... Args> |
858 | detail::zip_longest_range<T, U, Args...> zip_longest(T &&t, U &&u, |
859 | Args &&... args) { |
860 | return detail::zip_longest_range<T, U, Args...>( |
861 | std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...); |
862 | } |
863 | |
864 | /// Iterator wrapper that concatenates sequences together. |
865 | /// |
866 | /// This can concatenate different iterators, even with different types, into |
867 | /// a single iterator provided the value types of all the concatenated |
868 | /// iterators expose `reference` and `pointer` types that can be converted to |
869 | /// `ValueT &` and `ValueT *` respectively. It doesn't support more |
870 | /// interesting/customized pointer or reference types. |
871 | /// |
872 | /// Currently this only supports forward or higher iterator categories as |
873 | /// inputs and always exposes a forward iterator interface. |
874 | template <typename ValueT, typename... IterTs> |
875 | class concat_iterator |
876 | : public iterator_facade_base<concat_iterator<ValueT, IterTs...>, |
877 | std::forward_iterator_tag, ValueT> { |
878 | using BaseT = typename concat_iterator::iterator_facade_base; |
879 | |
880 | /// We store both the current and end iterators for each concatenated |
881 | /// sequence in a tuple of pairs. |
882 | /// |
883 | /// Note that something like iterator_range seems nice at first here, but the |
884 | /// range properties are of little benefit and end up getting in the way |
885 | /// because we need to do mutation on the current iterators. |
886 | std::tuple<IterTs...> Begins; |
887 | std::tuple<IterTs...> Ends; |
888 | |
889 | /// Attempts to increment a specific iterator. |
890 | /// |
891 | /// Returns true if it was able to increment the iterator. Returns false if |
892 | /// the iterator is already at the end iterator. |
893 | template <size_t Index> bool incrementHelper() { |
894 | auto &Begin = std::get<Index>(Begins); |
895 | auto &End = std::get<Index>(Ends); |
896 | if (Begin == End) |
897 | return false; |
898 | |
899 | ++Begin; |
900 | return true; |
901 | } |
902 | |
903 | /// Increments the first non-end iterator. |
904 | /// |
905 | /// It is an error to call this with all iterators at the end. |
906 | template <size_t... Ns> void increment(std::index_sequence<Ns...>) { |
907 | // Build a sequence of functions to increment each iterator if possible. |
908 | bool (concat_iterator::*IncrementHelperFns[])() = { |
909 | &concat_iterator::incrementHelper<Ns>...}; |
910 | |
911 | // Loop over them, and stop as soon as we succeed at incrementing one. |
912 | for (auto &IncrementHelperFn : IncrementHelperFns) |
913 | if ((this->*IncrementHelperFn)()) |
914 | return; |
915 | |
916 | llvm_unreachable("Attempted to increment an end concat iterator!")__builtin_unreachable(); |
917 | } |
918 | |
919 | /// Returns null if the specified iterator is at the end. Otherwise, |
920 | /// dereferences the iterator and returns the address of the resulting |
921 | /// reference. |
922 | template <size_t Index> ValueT *getHelper() const { |
923 | auto &Begin = std::get<Index>(Begins); |
924 | auto &End = std::get<Index>(Ends); |
925 | if (Begin == End) |
926 | return nullptr; |
927 | |
928 | return &*Begin; |
929 | } |
930 | |
931 | /// Finds the first non-end iterator, dereferences, and returns the resulting |
932 | /// reference. |
933 | /// |
934 | /// It is an error to call this with all iterators at the end. |
935 | template <size_t... Ns> ValueT &get(std::index_sequence<Ns...>) const { |
936 | // Build a sequence of functions to get from iterator if possible. |
937 | ValueT *(concat_iterator::*GetHelperFns[])() const = { |
938 | &concat_iterator::getHelper<Ns>...}; |
939 | |
940 | // Loop over them, and return the first result we find. |
941 | for (auto &GetHelperFn : GetHelperFns) |
942 | if (ValueT *P = (this->*GetHelperFn)()) |
943 | return *P; |
944 | |
945 | llvm_unreachable("Attempted to get a pointer from an end concat iterator!")__builtin_unreachable(); |
946 | } |
947 | |
948 | public: |
949 | /// Constructs an iterator from a sequence of ranges. |
950 | /// |
951 | /// We need the full range to know how to switch between each of the |
952 | /// iterators. |
953 | template <typename... RangeTs> |
954 | explicit concat_iterator(RangeTs &&... Ranges) |
955 | : Begins(std::begin(Ranges)...), Ends(std::end(Ranges)...) {} |
956 | |
957 | using BaseT::operator++; |
958 | |
959 | concat_iterator &operator++() { |
960 | increment(std::index_sequence_for<IterTs...>()); |
961 | return *this; |
962 | } |
963 | |
964 | ValueT &operator*() const { |
965 | return get(std::index_sequence_for<IterTs...>()); |
966 | } |
967 | |
968 | bool operator==(const concat_iterator &RHS) const { |
969 | return Begins == RHS.Begins && Ends == RHS.Ends; |
970 | } |
971 | }; |
972 | |
973 | namespace detail { |
974 | |
975 | /// Helper to store a sequence of ranges being concatenated and access them. |
976 | /// |
977 | /// This is designed to facilitate providing actual storage when temporaries |
978 | /// are passed into the constructor such that we can use it as part of range |
979 | /// based for loops. |
980 | template <typename ValueT, typename... RangeTs> class concat_range { |
981 | public: |
982 | using iterator = |
983 | concat_iterator<ValueT, |
984 | decltype(std::begin(std::declval<RangeTs &>()))...>; |
985 | |
986 | private: |
987 | std::tuple<RangeTs...> Ranges; |
988 | |
989 | template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) { |
990 | return iterator(std::get<Ns>(Ranges)...); |
991 | } |
992 | template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) { |
993 | return iterator(make_range(std::end(std::get<Ns>(Ranges)), |
994 | std::end(std::get<Ns>(Ranges)))...); |
995 | } |
996 | |
997 | public: |
998 | concat_range(RangeTs &&... Ranges) |
999 | : Ranges(std::forward<RangeTs>(Ranges)...) {} |
1000 | |
1001 | iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); } |
1002 | iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); } |
1003 | }; |
1004 | |
1005 | } // end namespace detail |
1006 | |
1007 | /// Concatenated range across two or more ranges. |
1008 | /// |
1009 | /// The desired value type must be explicitly specified. |
1010 | template <typename ValueT, typename... RangeTs> |
1011 | detail::concat_range<ValueT, RangeTs...> concat(RangeTs &&... Ranges) { |
1012 | static_assert(sizeof...(RangeTs) > 1, |
1013 | "Need more than one range to concatenate!"); |
1014 | return detail::concat_range<ValueT, RangeTs...>( |
1015 | std::forward<RangeTs>(Ranges)...); |
1016 | } |
1017 | |
1018 | /// A utility class used to implement an iterator that contains some base object |
1019 | /// and an index. The iterator moves the index but keeps the base constant. |
1020 | template <typename DerivedT, typename BaseT, typename T, |
1021 | typename PointerT = T *, typename ReferenceT = T &> |
1022 | class indexed_accessor_iterator |
1023 | : public llvm::iterator_facade_base<DerivedT, |
1024 | std::random_access_iterator_tag, T, |
1025 | std::ptrdiff_t, PointerT, ReferenceT> { |
1026 | public: |
1027 | ptrdiff_t operator-(const indexed_accessor_iterator &rhs) const { |
1028 | assert(base == rhs.base && "incompatible iterators")((void)0); |
1029 | return index - rhs.index; |
1030 | } |
1031 | bool operator==(const indexed_accessor_iterator &rhs) const { |
1032 | return base == rhs.base && index == rhs.index; |
1033 | } |
1034 | bool operator<(const indexed_accessor_iterator &rhs) const { |
1035 | assert(base == rhs.base && "incompatible iterators")((void)0); |
1036 | return index < rhs.index; |
1037 | } |
1038 | |
1039 | DerivedT &operator+=(ptrdiff_t offset) { |
1040 | this->index += offset; |
1041 | return static_cast<DerivedT &>(*this); |
1042 | } |
1043 | DerivedT &operator-=(ptrdiff_t offset) { |
1044 | this->index -= offset; |
1045 | return static_cast<DerivedT &>(*this); |
1046 | } |
1047 | |
1048 | /// Returns the current index of the iterator. |
1049 | ptrdiff_t getIndex() const { return index; } |
1050 | |
1051 | /// Returns the current base of the iterator. |
1052 | const BaseT &getBase() const { return base; } |
1053 | |
1054 | protected: |
1055 | indexed_accessor_iterator(BaseT base, ptrdiff_t index) |
1056 | : base(base), index(index) {} |
1057 | BaseT base; |
1058 | ptrdiff_t index; |
1059 | }; |
1060 | |
1061 | namespace detail { |
1062 | /// The class represents the base of a range of indexed_accessor_iterators. It |
1063 | /// provides support for many different range functionalities, e.g. |
1064 | /// drop_front/slice/etc.. Derived range classes must implement the following |
1065 | /// static methods: |
1066 | /// * ReferenceT dereference_iterator(const BaseT &base, ptrdiff_t index) |
1067 | /// - Dereference an iterator pointing to the base object at the given |
1068 | /// index. |
1069 | /// * BaseT offset_base(const BaseT &base, ptrdiff_t index) |
1070 | /// - Return a new base that is offset from the provide base by 'index' |
1071 | /// elements. |
1072 | template <typename DerivedT, typename BaseT, typename T, |
1073 | typename PointerT = T *, typename ReferenceT = T &> |
1074 | class indexed_accessor_range_base { |
1075 | public: |
1076 | using RangeBaseT = |
1077 | indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>; |
1078 | |
1079 | /// An iterator element of this range. |
1080 | class iterator : public indexed_accessor_iterator<iterator, BaseT, T, |
1081 | PointerT, ReferenceT> { |
1082 | public: |
1083 | // Index into this iterator, invoking a static method on the derived type. |
1084 | ReferenceT operator*() const { |
1085 | return DerivedT::dereference_iterator(this->getBase(), this->getIndex()); |
1086 | } |
1087 | |
1088 | private: |
1089 | iterator(BaseT owner, ptrdiff_t curIndex) |
1090 | : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>( |
1091 | owner, curIndex) {} |
1092 | |
1093 | /// Allow access to the constructor. |
1094 | friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, |
1095 | ReferenceT>; |
1096 | }; |
1097 | |
1098 | indexed_accessor_range_base(iterator begin, iterator end) |
1099 | : base(offset_base(begin.getBase(), begin.getIndex())), |
1100 | count(end.getIndex() - begin.getIndex()) {} |
1101 | indexed_accessor_range_base(const iterator_range<iterator> &range) |
1102 | : indexed_accessor_range_base(range.begin(), range.end()) {} |
1103 | indexed_accessor_range_base(BaseT base, ptrdiff_t count) |
1104 | : base(base), count(count) {} |
1105 | |
1106 | iterator begin() const { return iterator(base, 0); } |
1107 | iterator end() const { return iterator(base, count); } |
1108 | ReferenceT operator[](size_t Index) const { |
1109 | assert(Index < size() && "invalid index for value range")((void)0); |
1110 | return DerivedT::dereference_iterator(base, static_cast<ptrdiff_t>(Index)); |
1111 | } |
1112 | ReferenceT front() const { |
1113 | assert(!empty() && "expected non-empty range")((void)0); |
1114 | return (*this)[0]; |
1115 | } |
1116 | ReferenceT back() const { |
1117 | assert(!empty() && "expected non-empty range")((void)0); |
1118 | return (*this)[size() - 1]; |
1119 | } |
1120 | |
1121 | /// Compare this range with another. |
1122 | template <typename OtherT> bool operator==(const OtherT &other) const { |
1123 | return size() == |
1124 | static_cast<size_t>(std::distance(other.begin(), other.end())) && |
1125 | std::equal(begin(), end(), other.begin()); |
1126 | } |
1127 | template <typename OtherT> bool operator!=(const OtherT &other) const { |
1128 | return !(*this == other); |
1129 | } |
1130 | |
1131 | /// Return the size of this range. |
1132 | size_t size() const { return count; } |
1133 | |
1134 | /// Return if the range is empty. |
1135 | bool empty() const { return size() == 0; } |
1136 | |
1137 | /// Drop the first N elements, and keep M elements. |
1138 | DerivedT slice(size_t n, size_t m) const { |
1139 | assert(n + m <= size() && "invalid size specifiers")((void)0); |
1140 | return DerivedT(offset_base(base, n), m); |
1141 | } |
1142 | |
1143 | /// Drop the first n elements. |
1144 | DerivedT drop_front(size_t n = 1) const { |
1145 | assert(size() >= n && "Dropping more elements than exist")((void)0); |
1146 | return slice(n, size() - n); |
1147 | } |
1148 | /// Drop the last n elements. |
1149 | DerivedT drop_back(size_t n = 1) const { |
1150 | assert(size() >= n && "Dropping more elements than exist")((void)0); |
1151 | return DerivedT(base, size() - n); |
1152 | } |
1153 | |
1154 | /// Take the first n elements. |
1155 | DerivedT take_front(size_t n = 1) const { |
1156 | return n < size() ? drop_back(size() - n) |
1157 | : static_cast<const DerivedT &>(*this); |
1158 | } |
1159 | |
1160 | /// Take the last n elements. |
1161 | DerivedT take_back(size_t n = 1) const { |
1162 | return n < size() ? drop_front(size() - n) |
1163 | : static_cast<const DerivedT &>(*this); |
1164 | } |
1165 | |
1166 | /// Allow conversion to any type accepting an iterator_range. |
1167 | template <typename RangeT, typename = std::enable_if_t<std::is_constructible< |
1168 | RangeT, iterator_range<iterator>>::value>> |
1169 | operator RangeT() const { |
1170 | return RangeT(iterator_range<iterator>(*this)); |
1171 | } |
1172 | |
1173 | /// Returns the base of this range. |
1174 | const BaseT &getBase() const { return base; } |
1175 | |
1176 | private: |
1177 | /// Offset the given base by the given amount. |
1178 | static BaseT offset_base(const BaseT &base, size_t n) { |
1179 | return n == 0 ? base : DerivedT::offset_base(base, n); |
1180 | } |
1181 | |
1182 | protected: |
1183 | indexed_accessor_range_base(const indexed_accessor_range_base &) = default; |
1184 | indexed_accessor_range_base(indexed_accessor_range_base &&) = default; |
1185 | indexed_accessor_range_base & |
1186 | operator=(const indexed_accessor_range_base &) = default; |
1187 | |
1188 | /// The base that owns the provided range of values. |
1189 | BaseT base; |
1190 | /// The size from the owning range. |
1191 | ptrdiff_t count; |
1192 | }; |
1193 | } // end namespace detail |
1194 | |
1195 | /// This class provides an implementation of a range of |
1196 | /// indexed_accessor_iterators where the base is not indexable. Ranges with |
1197 | /// bases that are offsetable should derive from indexed_accessor_range_base |
1198 | /// instead. Derived range classes are expected to implement the following |
1199 | /// static method: |
1200 | /// * ReferenceT dereference(const BaseT &base, ptrdiff_t index) |
1201 | /// - Dereference an iterator pointing to a parent base at the given index. |
1202 | template <typename DerivedT, typename BaseT, typename T, |
1203 | typename PointerT = T *, typename ReferenceT = T &> |
1204 | class indexed_accessor_range |
1205 | : public detail::indexed_accessor_range_base< |
1206 | DerivedT, std::pair<BaseT, ptrdiff_t>, T, PointerT, ReferenceT> { |
1207 | public: |
1208 | indexed_accessor_range(BaseT base, ptrdiff_t startIndex, ptrdiff_t count) |
1209 | : detail::indexed_accessor_range_base< |
1210 | DerivedT, std::pair<BaseT, ptrdiff_t>, T, PointerT, ReferenceT>( |
1211 | std::make_pair(base, startIndex), count) {} |
1212 | using detail::indexed_accessor_range_base< |
1213 | DerivedT, std::pair<BaseT, ptrdiff_t>, T, PointerT, |
1214 | ReferenceT>::indexed_accessor_range_base; |
1215 | |
1216 | /// Returns the current base of the range. |
1217 | const BaseT &getBase() const { return this->base.first; } |
1218 | |
1219 | /// Returns the current start index of the range. |
1220 | ptrdiff_t getStartIndex() const { return this->base.second; } |
1221 | |
1222 | /// See `detail::indexed_accessor_range_base` for details. |
1223 | static std::pair<BaseT, ptrdiff_t> |
1224 | offset_base(const std::pair<BaseT, ptrdiff_t> &base, ptrdiff_t index) { |
1225 | // We encode the internal base as a pair of the derived base and a start |
1226 | // index into the derived base. |
1227 | return std::make_pair(base.first, base.second + index); |
1228 | } |
1229 | /// See `detail::indexed_accessor_range_base` for details. |
1230 | static ReferenceT |
1231 | dereference_iterator(const std::pair<BaseT, ptrdiff_t> &base, |
1232 | ptrdiff_t index) { |
1233 | return DerivedT::dereference(base.first, base.second + index); |
1234 | } |
1235 | }; |
1236 | |
1237 | /// Given a container of pairs, return a range over the first elements. |
1238 | template <typename ContainerTy> auto make_first_range(ContainerTy &&c) { |
1239 | return llvm::map_range( |
1240 | std::forward<ContainerTy>(c), |
1241 | [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) { |
1242 | return elt.first; |
1243 | }); |
1244 | } |
1245 | |
1246 | /// Given a container of pairs, return a range over the second elements. |
1247 | template <typename ContainerTy> auto make_second_range(ContainerTy &&c) { |
1248 | return llvm::map_range( |
1249 | std::forward<ContainerTy>(c), |
1250 | [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) { |
1251 | return elt.second; |
1252 | }); |
1253 | } |
1254 | |
1255 | //===----------------------------------------------------------------------===// |
1256 | // Extra additions to <utility> |
1257 | //===----------------------------------------------------------------------===// |
1258 | |
1259 | /// Function object to check whether the first component of a std::pair |
1260 | /// compares less than the first component of another std::pair. |
1261 | struct less_first { |
1262 | template <typename T> bool operator()(const T &lhs, const T &rhs) const { |
1263 | return lhs.first < rhs.first; |
1264 | } |
1265 | }; |
1266 | |
1267 | /// Function object to check whether the second component of a std::pair |
1268 | /// compares less than the second component of another std::pair. |
1269 | struct less_second { |
1270 | template <typename T> bool operator()(const T &lhs, const T &rhs) const { |
1271 | return lhs.second < rhs.second; |
1272 | } |
1273 | }; |
1274 | |
1275 | /// \brief Function object to apply a binary function to the first component of |
1276 | /// a std::pair. |
1277 | template<typename FuncTy> |
1278 | struct on_first { |
1279 | FuncTy func; |
1280 | |
1281 | template <typename T> |
1282 | decltype(auto) operator()(const T &lhs, const T &rhs) const { |
1283 | return func(lhs.first, rhs.first); |
1284 | } |
1285 | }; |
1286 | |
1287 | /// Utility type to build an inheritance chain that makes it easy to rank |
1288 | /// overload candidates. |
1289 | template <int N> struct rank : rank<N - 1> {}; |
1290 | template <> struct rank<0> {}; |
1291 | |
1292 | /// traits class for checking whether type T is one of any of the given |
1293 | /// types in the variadic list. |
1294 | template <typename T, typename... Ts> |
1295 | using is_one_of = disjunction<std::is_same<T, Ts>...>; |
1296 | |
1297 | /// traits class for checking whether type T is a base class for all |
1298 | /// the given types in the variadic list. |
1299 | template <typename T, typename... Ts> |
1300 | using are_base_of = conjunction<std::is_base_of<T, Ts>...>; |
1301 | |
1302 | namespace detail { |
1303 | template <typename... Ts> struct Visitor; |
1304 | |
1305 | template <typename HeadT, typename... TailTs> |
1306 | struct Visitor<HeadT, TailTs...> : remove_cvref_t<HeadT>, Visitor<TailTs...> { |
1307 | explicit constexpr Visitor(HeadT &&Head, TailTs &&...Tail) |
1308 | : remove_cvref_t<HeadT>(std::forward<HeadT>(Head)), |
1309 | Visitor<TailTs...>(std::forward<TailTs>(Tail)...) {} |
1310 | using remove_cvref_t<HeadT>::operator(); |
1311 | using Visitor<TailTs...>::operator(); |
1312 | }; |
1313 | |
1314 | template <typename HeadT> struct Visitor<HeadT> : remove_cvref_t<HeadT> { |
1315 | explicit constexpr Visitor(HeadT &&Head) |
1316 | : remove_cvref_t<HeadT>(std::forward<HeadT>(Head)) {} |
1317 | using remove_cvref_t<HeadT>::operator(); |
1318 | }; |
1319 | } // namespace detail |
1320 | |
1321 | /// Returns an opaquely-typed Callable object whose operator() overload set is |
1322 | /// the sum of the operator() overload sets of each CallableT in CallableTs. |
1323 | /// |
1324 | /// The type of the returned object derives from each CallableT in CallableTs. |
1325 | /// The returned object is constructed by invoking the appropriate copy or move |
1326 | /// constructor of each CallableT, as selected by overload resolution on the |
1327 | /// corresponding argument to makeVisitor. |
1328 | /// |
1329 | /// Example: |
1330 | /// |
1331 | /// \code |
1332 | /// auto visitor = makeVisitor([](auto) { return "unhandled type"; }, |
1333 | /// [](int i) { return "int"; }, |
1334 | /// [](std::string s) { return "str"; }); |
1335 | /// auto a = visitor(42); // `a` is now "int". |
1336 | /// auto b = visitor("foo"); // `b` is now "str". |
1337 | /// auto c = visitor(3.14f); // `c` is now "unhandled type". |
1338 | /// \endcode |
1339 | /// |
1340 | /// Example of making a visitor with a lambda which captures a move-only type: |
1341 | /// |
1342 | /// \code |
1343 | /// std::unique_ptr<FooHandler> FH = /* ... */; |
1344 | /// auto visitor = makeVisitor( |
1345 | /// [FH{std::move(FH)}](Foo F) { return FH->handle(F); }, |
1346 | /// [](int i) { return i; }, |
1347 | /// [](std::string s) { return atoi(s); }); |
1348 | /// \endcode |
1349 | template <typename... CallableTs> |
1350 | constexpr decltype(auto) makeVisitor(CallableTs &&...Callables) { |
1351 | return detail::Visitor<CallableTs...>(std::forward<CallableTs>(Callables)...); |
1352 | } |
1353 | |
1354 | //===----------------------------------------------------------------------===// |
1355 | // Extra additions for arrays |
1356 | //===----------------------------------------------------------------------===// |
1357 | |
1358 | // We have a copy here so that LLVM behaves the same when using different |
1359 | // standard libraries. |
1360 | template <class Iterator, class RNG> |
1361 | void shuffle(Iterator first, Iterator last, RNG &&g) { |
1362 | // It would be better to use a std::uniform_int_distribution, |
1363 | // but that would be stdlib dependent. |
1364 | typedef |
1365 | typename std::iterator_traits<Iterator>::difference_type difference_type; |
1366 | for (auto size = last - first; size > 1; ++first, (void)--size) { |
1367 | difference_type offset = g() % size; |
1368 | // Avoid self-assignment due to incorrect assertions in libstdc++ |
1369 | // containers (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85828). |
1370 | if (offset != difference_type(0)) |
1371 | std::iter_swap(first, first + offset); |
1372 | } |
1373 | } |
1374 | |
1375 | /// Find the length of an array. |
1376 | template <class T, std::size_t N> |
1377 | constexpr inline size_t array_lengthof(T (&)[N]) { |
1378 | return N; |
1379 | } |
1380 | |
1381 | /// Adapt std::less<T> for array_pod_sort. |
1382 | template<typename T> |
1383 | inline int array_pod_sort_comparator(const void *P1, const void *P2) { |
1384 | if (std::less<T>()(*reinterpret_cast<const T*>(P1), |
1385 | *reinterpret_cast<const T*>(P2))) |
1386 | return -1; |
1387 | if (std::less<T>()(*reinterpret_cast<const T*>(P2), |
1388 | *reinterpret_cast<const T*>(P1))) |
1389 | return 1; |
1390 | return 0; |
1391 | } |
1392 | |
1393 | /// get_array_pod_sort_comparator - This is an internal helper function used to |
1394 | /// get type deduction of T right. |
1395 | template<typename T> |
1396 | inline int (*get_array_pod_sort_comparator(const T &)) |
1397 | (const void*, const void*) { |
1398 | return array_pod_sort_comparator<T>; |
1399 | } |
1400 | |
1401 | #ifdef EXPENSIVE_CHECKS |
1402 | namespace detail { |
1403 | |
1404 | inline unsigned presortShuffleEntropy() { |
1405 | static unsigned Result(std::random_device{}()); |
1406 | return Result; |
1407 | } |
1408 | |
1409 | template <class IteratorTy> |
1410 | inline void presortShuffle(IteratorTy Start, IteratorTy End) { |
1411 | std::mt19937 Generator(presortShuffleEntropy()); |
1412 | llvm::shuffle(Start, End, Generator); |
1413 | } |
1414 | |
1415 | } // end namespace detail |
1416 | #endif |
1417 | |
1418 | /// array_pod_sort - This sorts an array with the specified start and end |
1419 | /// extent. This is just like std::sort, except that it calls qsort instead of |
1420 | /// using an inlined template. qsort is slightly slower than std::sort, but |
1421 | /// most sorts are not performance critical in LLVM and std::sort has to be |
1422 | /// template instantiated for each type, leading to significant measured code |
1423 | /// bloat. This function should generally be used instead of std::sort where |
1424 | /// possible. |
1425 | /// |
1426 | /// This function assumes that you have simple POD-like types that can be |
1427 | /// compared with std::less and can be moved with memcpy. If this isn't true, |
1428 | /// you should use std::sort. |
1429 | /// |
1430 | /// NOTE: If qsort_r were portable, we could allow a custom comparator and |
1431 | /// default to std::less. |
1432 | template<class IteratorTy> |
1433 | inline void array_pod_sort(IteratorTy Start, IteratorTy End) { |
1434 | // Don't inefficiently call qsort with one element or trigger undefined |
1435 | // behavior with an empty sequence. |
1436 | auto NElts = End - Start; |
1437 | if (NElts <= 1) return; |
1438 | #ifdef EXPENSIVE_CHECKS |
1439 | detail::presortShuffle<IteratorTy>(Start, End); |
1440 | #endif |
1441 | qsort(&*Start, NElts, sizeof(*Start), get_array_pod_sort_comparator(*Start)); |
1442 | } |
1443 | |
1444 | template <class IteratorTy> |
1445 | inline void array_pod_sort( |
1446 | IteratorTy Start, IteratorTy End, |
1447 | int (*Compare)( |
1448 | const typename std::iterator_traits<IteratorTy>::value_type *, |
1449 | const typename std::iterator_traits<IteratorTy>::value_type *)) { |
1450 | // Don't inefficiently call qsort with one element or trigger undefined |
1451 | // behavior with an empty sequence. |
1452 | auto NElts = End - Start; |
1453 | if (NElts <= 1) return; |
1454 | #ifdef EXPENSIVE_CHECKS |
1455 | detail::presortShuffle<IteratorTy>(Start, End); |
1456 | #endif |
1457 | qsort(&*Start, NElts, sizeof(*Start), |
1458 | reinterpret_cast<int (*)(const void *, const void *)>(Compare)); |
1459 | } |
1460 | |
1461 | namespace detail { |
1462 | template <typename T> |
1463 | // We can use qsort if the iterator type is a pointer and the underlying value |
1464 | // is trivially copyable. |
1465 | using sort_trivially_copyable = conjunction< |
1466 | std::is_pointer<T>, |
1467 | std::is_trivially_copyable<typename std::iterator_traits<T>::value_type>>; |
1468 | } // namespace detail |
1469 | |
1470 | // Provide wrappers to std::sort which shuffle the elements before sorting |
1471 | // to help uncover non-deterministic behavior (PR35135). |
1472 | template <typename IteratorTy, |
1473 | std::enable_if_t<!detail::sort_trivially_copyable<IteratorTy>::value, |
1474 | int> = 0> |
1475 | inline void sort(IteratorTy Start, IteratorTy End) { |
1476 | #ifdef EXPENSIVE_CHECKS |
1477 | detail::presortShuffle<IteratorTy>(Start, End); |
1478 | #endif |
1479 | std::sort(Start, End); |
1480 | } |
1481 | |
1482 | // Forward trivially copyable types to array_pod_sort. This avoids a large |
1483 | // amount of code bloat for a minor performance hit. |
1484 | template <typename IteratorTy, |
1485 | std::enable_if_t<detail::sort_trivially_copyable<IteratorTy>::value, |
1486 | int> = 0> |
1487 | inline void sort(IteratorTy Start, IteratorTy End) { |
1488 | array_pod_sort(Start, End); |
1489 | } |
1490 | |
1491 | template <typename Container> inline void sort(Container &&C) { |
1492 | llvm::sort(adl_begin(C), adl_end(C)); |
1493 | } |
1494 | |
1495 | template <typename IteratorTy, typename Compare> |
1496 | inline void sort(IteratorTy Start, IteratorTy End, Compare Comp) { |
1497 | #ifdef EXPENSIVE_CHECKS |
1498 | detail::presortShuffle<IteratorTy>(Start, End); |
1499 | #endif |
1500 | std::sort(Start, End, Comp); |
1501 | } |
1502 | |
1503 | template <typename Container, typename Compare> |
1504 | inline void sort(Container &&C, Compare Comp) { |
1505 | llvm::sort(adl_begin(C), adl_end(C), Comp); |
1506 | } |
1507 | |
1508 | //===----------------------------------------------------------------------===// |
1509 | // Extra additions to <algorithm> |
1510 | //===----------------------------------------------------------------------===// |
1511 | |
1512 | /// Get the size of a range. This is a wrapper function around std::distance |
1513 | /// which is only enabled when the operation is O(1). |
1514 | template <typename R> |
1515 | auto size(R &&Range, |
1516 | std::enable_if_t< |
1517 | std::is_base_of<std::random_access_iterator_tag, |
1518 | typename std::iterator_traits<decltype( |
1519 | Range.begin())>::iterator_category>::value, |
1520 | void> * = nullptr) { |
1521 | return std::distance(Range.begin(), Range.end()); |
1522 | } |
1523 | |
1524 | /// Provide wrappers to std::for_each which take ranges instead of having to |
1525 | /// pass begin/end explicitly. |
1526 | template <typename R, typename UnaryFunction> |
1527 | UnaryFunction for_each(R &&Range, UnaryFunction F) { |
1528 | return std::for_each(adl_begin(Range), adl_end(Range), F); |
1529 | } |
1530 | |
1531 | /// Provide wrappers to std::all_of which take ranges instead of having to pass |
1532 | /// begin/end explicitly. |
1533 | template <typename R, typename UnaryPredicate> |
1534 | bool all_of(R &&Range, UnaryPredicate P) { |
1535 | return std::all_of(adl_begin(Range), adl_end(Range), P); |
1536 | } |
1537 | |
1538 | /// Provide wrappers to std::any_of which take ranges instead of having to pass |
1539 | /// begin/end explicitly. |
1540 | template <typename R, typename UnaryPredicate> |
1541 | bool any_of(R &&Range, UnaryPredicate P) { |
1542 | return std::any_of(adl_begin(Range), adl_end(Range), P); |
1543 | } |
1544 | |
1545 | /// Provide wrappers to std::none_of which take ranges instead of having to pass |
1546 | /// begin/end explicitly. |
1547 | template <typename R, typename UnaryPredicate> |
1548 | bool none_of(R &&Range, UnaryPredicate P) { |
1549 | return std::none_of(adl_begin(Range), adl_end(Range), P); |
1550 | } |
1551 | |
1552 | /// Provide wrappers to std::find which take ranges instead of having to pass |
1553 | /// begin/end explicitly. |
1554 | template <typename R, typename T> auto find(R &&Range, const T &Val) { |
1555 | return std::find(adl_begin(Range), adl_end(Range), Val); |
1556 | } |
1557 | |
1558 | /// Provide wrappers to std::find_if which take ranges instead of having to pass |
1559 | /// begin/end explicitly. |
1560 | template <typename R, typename UnaryPredicate> |
1561 | auto find_if(R &&Range, UnaryPredicate P) { |
1562 | return std::find_if(adl_begin(Range), adl_end(Range), P); |
1563 | } |
1564 | |
1565 | template <typename R, typename UnaryPredicate> |
1566 | auto find_if_not(R &&Range, UnaryPredicate P) { |
1567 | return std::find_if_not(adl_begin(Range), adl_end(Range), P); |
1568 | } |
1569 | |
1570 | /// Provide wrappers to std::remove_if which take ranges instead of having to |
1571 | /// pass begin/end explicitly. |
1572 | template <typename R, typename UnaryPredicate> |
1573 | auto remove_if(R &&Range, UnaryPredicate P) { |
1574 | return std::remove_if(adl_begin(Range), adl_end(Range), P); |
1575 | } |
1576 | |
1577 | /// Provide wrappers to std::copy_if which take ranges instead of having to |
1578 | /// pass begin/end explicitly. |
1579 | template <typename R, typename OutputIt, typename UnaryPredicate> |
1580 | OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P) { |
1581 | return std::copy_if(adl_begin(Range), adl_end(Range), Out, P); |
1582 | } |
1583 | |
1584 | template <typename R, typename OutputIt> |
1585 | OutputIt copy(R &&Range, OutputIt Out) { |
1586 | return std::copy(adl_begin(Range), adl_end(Range), Out); |
1587 | } |
1588 | |
1589 | /// Provide wrappers to std::move which take ranges instead of having to |
1590 | /// pass begin/end explicitly. |
1591 | template <typename R, typename OutputIt> |
1592 | OutputIt move(R &&Range, OutputIt Out) { |
1593 | return std::move(adl_begin(Range), adl_end(Range), Out); |
1594 | } |
1595 | |
1596 | /// Wrapper function around std::find to detect if an element exists |
1597 | /// in a container. |
1598 | template <typename R, typename E> |
1599 | bool is_contained(R &&Range, const E &Element) { |
1600 | return std::find(adl_begin(Range), adl_end(Range), Element) != adl_end(Range); |
1601 | } |
1602 | |
1603 | /// Wrapper function around std::is_sorted to check if elements in a range \p R |
1604 | /// are sorted with respect to a comparator \p C. |
1605 | template <typename R, typename Compare> bool is_sorted(R &&Range, Compare C) { |
1606 | return std::is_sorted(adl_begin(Range), adl_end(Range), C); |
1607 | } |
1608 | |
1609 | /// Wrapper function around std::is_sorted to check if elements in a range \p R |
1610 | /// are sorted in non-descending order. |
1611 | template <typename R> bool is_sorted(R &&Range) { |
1612 | return std::is_sorted(adl_begin(Range), adl_end(Range)); |
1613 | } |
1614 | |
1615 | /// Wrapper function around std::count to count the number of times an element |
1616 | /// \p Element occurs in the given range \p Range. |
1617 | template <typename R, typename E> auto count(R &&Range, const E &Element) { |
1618 | return std::count(adl_begin(Range), adl_end(Range), Element); |
1619 | } |
1620 | |
1621 | /// Wrapper function around std::count_if to count the number of times an |
1622 | /// element satisfying a given predicate occurs in a range. |
1623 | template <typename R, typename UnaryPredicate> |
1624 | auto count_if(R &&Range, UnaryPredicate P) { |
1625 | return std::count_if(adl_begin(Range), adl_end(Range), P); |
1626 | } |
1627 | |
1628 | /// Wrapper function around std::transform to apply a function to a range and |
1629 | /// store the result elsewhere. |
1630 | template <typename R, typename OutputIt, typename UnaryFunction> |
1631 | OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F) { |
1632 | return std::transform(adl_begin(Range), adl_end(Range), d_first, F); |
1633 | } |
1634 | |
1635 | /// Provide wrappers to std::partition which take ranges instead of having to |
1636 | /// pass begin/end explicitly. |
1637 | template <typename R, typename UnaryPredicate> |
1638 | auto partition(R &&Range, UnaryPredicate P) { |
1639 | return std::partition(adl_begin(Range), adl_end(Range), P); |
1640 | } |
1641 | |
1642 | /// Provide wrappers to std::lower_bound which take ranges instead of having to |
1643 | /// pass begin/end explicitly. |
1644 | template <typename R, typename T> auto lower_bound(R &&Range, T &&Value) { |
1645 | return std::lower_bound(adl_begin(Range), adl_end(Range), |
1646 | std::forward<T>(Value)); |
1647 | } |
1648 | |
1649 | template <typename R, typename T, typename Compare> |
1650 | auto lower_bound(R &&Range, T &&Value, Compare C) { |
1651 | return std::lower_bound(adl_begin(Range), adl_end(Range), |
1652 | std::forward<T>(Value), C); |
1653 | } |
1654 | |
1655 | /// Provide wrappers to std::upper_bound which take ranges instead of having to |
1656 | /// pass begin/end explicitly. |
1657 | template <typename R, typename T> auto upper_bound(R &&Range, T &&Value) { |
1658 | return std::upper_bound(adl_begin(Range), adl_end(Range), |
1659 | std::forward<T>(Value)); |
1660 | } |
1661 | |
1662 | template <typename R, typename T, typename Compare> |
1663 | auto upper_bound(R &&Range, T &&Value, Compare C) { |
1664 | return std::upper_bound(adl_begin(Range), adl_end(Range), |
1665 | std::forward<T>(Value), C); |
1666 | } |
1667 | |
1668 | template <typename R> |
1669 | void stable_sort(R &&Range) { |
1670 | std::stable_sort(adl_begin(Range), adl_end(Range)); |
1671 | } |
1672 | |
1673 | template <typename R, typename Compare> |
1674 | void stable_sort(R &&Range, Compare C) { |
1675 | std::stable_sort(adl_begin(Range), adl_end(Range), C); |
1676 | } |
1677 | |
1678 | /// Binary search for the first iterator in a range where a predicate is false. |
1679 | /// Requires that C is always true below some limit, and always false above it. |
1680 | template <typename R, typename Predicate, |
1681 | typename Val = decltype(*adl_begin(std::declval<R>()))> |
1682 | auto partition_point(R &&Range, Predicate P) { |
1683 | return std::partition_point(adl_begin(Range), adl_end(Range), P); |
1684 | } |
1685 | |
1686 | template<typename Range, typename Predicate> |
1687 | auto unique(Range &&R, Predicate P) { |
1688 | return std::unique(adl_begin(R), adl_end(R), P); |
1689 | } |
1690 | |
1691 | /// Wrapper function around std::equal to detect if pair-wise elements between |
1692 | /// two ranges are the same. |
1693 | template <typename L, typename R> bool equal(L &&LRange, R &&RRange) { |
1694 | return std::equal(adl_begin(LRange), adl_end(LRange), adl_begin(RRange), |
1695 | adl_end(RRange)); |
1696 | } |
1697 | |
1698 | /// Wrapper function around std::equal to detect if all elements |
1699 | /// in a container are same. |
1700 | template <typename R> |
1701 | bool is_splat(R &&Range) { |
1702 | size_t range_size = size(Range); |
1703 | return range_size != 0 && (range_size == 1 || |
1704 | std::equal(adl_begin(Range) + 1, adl_end(Range), adl_begin(Range))); |
1705 | } |
1706 | |
1707 | /// Provide a container algorithm similar to C++ Library Fundamentals v2's |
1708 | /// `erase_if` which is equivalent to: |
1709 | /// |
1710 | /// C.erase(remove_if(C, pred), C.end()); |
1711 | /// |
1712 | /// This version works for any container with an erase method call accepting |
1713 | /// two iterators. |
1714 | template <typename Container, typename UnaryPredicate> |
1715 | void erase_if(Container &C, UnaryPredicate P) { |
1716 | C.erase(remove_if(C, P), C.end()); |
1717 | } |
1718 | |
1719 | /// Wrapper function to remove a value from a container: |
1720 | /// |
1721 | /// C.erase(remove(C.begin(), C.end(), V), C.end()); |
1722 | template <typename Container, typename ValueType> |
1723 | void erase_value(Container &C, ValueType V) { |
1724 | C.erase(std::remove(C.begin(), C.end(), V), C.end()); |
1725 | } |
1726 | |
1727 | /// Wrapper function to append a range to a container. |
1728 | /// |
1729 | /// C.insert(C.end(), R.begin(), R.end()); |
1730 | template <typename Container, typename Range> |
1731 | inline void append_range(Container &C, Range &&R) { |
1732 | C.insert(C.end(), R.begin(), R.end()); |
1733 | } |
1734 | |
1735 | /// Given a sequence container Cont, replace the range [ContIt, ContEnd) with |
1736 | /// the range [ValIt, ValEnd) (which is not from the same container). |
1737 | template<typename Container, typename RandomAccessIterator> |
1738 | void replace(Container &Cont, typename Container::iterator ContIt, |
1739 | typename Container::iterator ContEnd, RandomAccessIterator ValIt, |
1740 | RandomAccessIterator ValEnd) { |
1741 | while (true) { |
1742 | if (ValIt == ValEnd) { |
1743 | Cont.erase(ContIt, ContEnd); |
1744 | return; |
1745 | } else if (ContIt == ContEnd) { |
1746 | Cont.insert(ContIt, ValIt, ValEnd); |
1747 | return; |
1748 | } |
1749 | *ContIt++ = *ValIt++; |
1750 | } |
1751 | } |
1752 | |
1753 | /// Given a sequence container Cont, replace the range [ContIt, ContEnd) with |
1754 | /// the range R. |
1755 | template<typename Container, typename Range = std::initializer_list< |
1756 | typename Container::value_type>> |
1757 | void replace(Container &Cont, typename Container::iterator ContIt, |
1758 | typename Container::iterator ContEnd, Range R) { |
1759 | replace(Cont, ContIt, ContEnd, R.begin(), R.end()); |
1760 | } |
1761 | |
1762 | /// An STL-style algorithm similar to std::for_each that applies a second |
1763 | /// functor between every pair of elements. |
1764 | /// |
1765 | /// This provides the control flow logic to, for example, print a |
1766 | /// comma-separated list: |
1767 | /// \code |
1768 | /// interleave(names.begin(), names.end(), |
1769 | /// [&](StringRef name) { os << name; }, |
1770 | /// [&] { os << ", "; }); |
1771 | /// \endcode |
1772 | template <typename ForwardIterator, typename UnaryFunctor, |
1773 | typename NullaryFunctor, |
1774 | typename = typename std::enable_if< |
1775 | !std::is_constructible<StringRef, UnaryFunctor>::value && |
1776 | !std::is_constructible<StringRef, NullaryFunctor>::value>::type> |
1777 | inline void interleave(ForwardIterator begin, ForwardIterator end, |
1778 | UnaryFunctor each_fn, NullaryFunctor between_fn) { |
1779 | if (begin == end) |
1780 | return; |
1781 | each_fn(*begin); |
1782 | ++begin; |
1783 | for (; begin != end; ++begin) { |
1784 | between_fn(); |
1785 | each_fn(*begin); |
1786 | } |
1787 | } |
1788 | |
1789 | template <typename Container, typename UnaryFunctor, typename NullaryFunctor, |
1790 | typename = typename std::enable_if< |
1791 | !std::is_constructible<StringRef, UnaryFunctor>::value && |
1792 | !std::is_constructible<StringRef, NullaryFunctor>::value>::type> |
1793 | inline void interleave(const Container &c, UnaryFunctor each_fn, |
1794 | NullaryFunctor between_fn) { |
1795 | interleave(c.begin(), c.end(), each_fn, between_fn); |
1796 | } |
1797 | |
1798 | /// Overload of interleave for the common case of string separator. |
1799 | template <typename Container, typename UnaryFunctor, typename StreamT, |
1800 | typename T = detail::ValueOfRange<Container>> |
1801 | inline void interleave(const Container &c, StreamT &os, UnaryFunctor each_fn, |
1802 | const StringRef &separator) { |
1803 | interleave(c.begin(), c.end(), each_fn, [&] { os << separator; }); |
1804 | } |
1805 | template <typename Container, typename StreamT, |
1806 | typename T = detail::ValueOfRange<Container>> |
1807 | inline void interleave(const Container &c, StreamT &os, |
1808 | const StringRef &separator) { |
1809 | interleave( |
1810 | c, os, [&](const T &a) { os << a; }, separator); |
1811 | } |
1812 | |
1813 | template <typename Container, typename UnaryFunctor, typename StreamT, |
1814 | typename T = detail::ValueOfRange<Container>> |
1815 | inline void interleaveComma(const Container &c, StreamT &os, |
1816 | UnaryFunctor each_fn) { |
1817 | interleave(c, os, each_fn, ", "); |
1818 | } |
1819 | template <typename Container, typename StreamT, |
1820 | typename T = detail::ValueOfRange<Container>> |
1821 | inline void interleaveComma(const Container &c, StreamT &os) { |
1822 | interleaveComma(c, os, [&](const T &a) { os << a; }); |
1823 | } |
1824 | |
1825 | //===----------------------------------------------------------------------===// |
1826 | // Extra additions to <memory> |
1827 | //===----------------------------------------------------------------------===// |
1828 | |
1829 | struct FreeDeleter { |
1830 | void operator()(void* v) { |
1831 | ::free(v); |
1832 | } |
1833 | }; |
1834 | |
1835 | template<typename First, typename Second> |
1836 | struct pair_hash { |
1837 | size_t operator()(const std::pair<First, Second> &P) const { |
1838 | return std::hash<First>()(P.first) * 31 + std::hash<Second>()(P.second); |
1839 | } |
1840 | }; |
1841 | |
1842 | /// Binary functor that adapts to any other binary functor after dereferencing |
1843 | /// operands. |
1844 | template <typename T> struct deref { |
1845 | T func; |
1846 | |
1847 | // Could be further improved to cope with non-derivable functors and |
1848 | // non-binary functors (should be a variadic template member function |
1849 | // operator()). |
1850 | template <typename A, typename B> auto operator()(A &lhs, B &rhs) const { |
1851 | assert(lhs)((void)0); |
1852 | assert(rhs)((void)0); |
1853 | return func(*lhs, *rhs); |
1854 | } |
1855 | }; |
1856 | |
1857 | namespace detail { |
1858 | |
1859 | template <typename R> class enumerator_iter; |
1860 | |
1861 | template <typename R> struct result_pair { |
1862 | using value_reference = |
1863 | typename std::iterator_traits<IterOfRange<R>>::reference; |
1864 | |
1865 | friend class enumerator_iter<R>; |
1866 | |
1867 | result_pair() = default; |
1868 | result_pair(std::size_t Index, IterOfRange<R> Iter) |
1869 | : Index(Index), Iter(Iter) {} |
1870 | |
1871 | result_pair(const result_pair<R> &Other) |
1872 | : Index(Other.Index), Iter(Other.Iter) {} |
1873 | result_pair &operator=(const result_pair &Other) { |
1874 | Index = Other.Index; |
1875 | Iter = Other.Iter; |
1876 | return *this; |
1877 | } |
1878 | |
1879 | std::size_t index() const { return Index; } |
1880 | const value_reference value() const { return *Iter; } |
1881 | value_reference value() { return *Iter; } |
1882 | |
1883 | private: |
1884 | std::size_t Index = std::numeric_limits<std::size_t>::max(); |
1885 | IterOfRange<R> Iter; |
1886 | }; |
1887 | |
1888 | template <typename R> |
1889 | class enumerator_iter |
1890 | : public iterator_facade_base< |
1891 | enumerator_iter<R>, std::forward_iterator_tag, result_pair<R>, |
1892 | typename std::iterator_traits<IterOfRange<R>>::difference_type, |
1893 | typename std::iterator_traits<IterOfRange<R>>::pointer, |
1894 | typename std::iterator_traits<IterOfRange<R>>::reference> { |
1895 | using result_type = result_pair<R>; |
1896 | |
1897 | public: |
1898 | explicit enumerator_iter(IterOfRange<R> EndIter) |
1899 | : Result(std::numeric_limits<size_t>::max(), EndIter) {} |
1900 | |
1901 | enumerator_iter(std::size_t Index, IterOfRange<R> Iter) |
1902 | : Result(Index, Iter) {} |
1903 | |
1904 | result_type &operator*() { return Result; } |
1905 | const result_type &operator*() const { return Result; } |
1906 | |
1907 | enumerator_iter &operator++() { |
1908 | assert(Result.Index != std::numeric_limits<size_t>::max())((void)0); |
1909 | ++Result.Iter; |
1910 | ++Result.Index; |
1911 | return *this; |
1912 | } |
1913 | |
1914 | bool operator==(const enumerator_iter &RHS) const { |
1915 | // Don't compare indices here, only iterators. It's possible for an end |
1916 | // iterator to have different indices depending on whether it was created |
1917 | // by calling std::end() versus incrementing a valid iterator. |
1918 | return Result.Iter == RHS.Result.Iter; |
1919 | } |
1920 | |
1921 | enumerator_iter(const enumerator_iter &Other) : Result(Other.Result) {} |
1922 | enumerator_iter &operator=(const enumerator_iter &Other) { |
1923 | Result = Other.Result; |
1924 | return *this; |
1925 | } |
1926 | |
1927 | private: |
1928 | result_type Result; |
1929 | }; |
1930 | |
1931 | template <typename R> class enumerator { |
1932 | public: |
1933 | explicit enumerator(R &&Range) : TheRange(std::forward<R>(Range)) {} |
1934 | |
1935 | enumerator_iter<R> begin() { |
1936 | return enumerator_iter<R>(0, std::begin(TheRange)); |
1937 | } |
1938 | |
1939 | enumerator_iter<R> end() { |
1940 | return enumerator_iter<R>(std::end(TheRange)); |
1941 | } |
1942 | |
1943 | private: |
1944 | R TheRange; |
1945 | }; |
1946 | |
1947 | } // end namespace detail |
1948 | |
1949 | /// Given an input range, returns a new range whose values are are pair (A,B) |
1950 | /// such that A is the 0-based index of the item in the sequence, and B is |
1951 | /// the value from the original sequence. Example: |
1952 | /// |
1953 | /// std::vector<char> Items = {'A', 'B', 'C', 'D'}; |
1954 | /// for (auto X : enumerate(Items)) { |
1955 | /// printf("Item %d - %c\n", X.index(), X.value()); |
1956 | /// } |
1957 | /// |
1958 | /// Output: |
1959 | /// Item 0 - A |
1960 | /// Item 1 - B |
1961 | /// Item 2 - C |
1962 | /// Item 3 - D |
1963 | /// |
1964 | template <typename R> detail::enumerator<R> enumerate(R &&TheRange) { |
1965 | return detail::enumerator<R>(std::forward<R>(TheRange)); |
1966 | } |
1967 | |
1968 | namespace detail { |
1969 | |
1970 | template <typename F, typename Tuple, std::size_t... I> |
1971 | decltype(auto) apply_tuple_impl(F &&f, Tuple &&t, std::index_sequence<I...>) { |
1972 | return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...); |
1973 | } |
1974 | |
1975 | } // end namespace detail |
1976 | |
1977 | /// Given an input tuple (a1, a2, ..., an), pass the arguments of the |
1978 | /// tuple variadically to f as if by calling f(a1, a2, ..., an) and |
1979 | /// return the result. |
1980 | template <typename F, typename Tuple> |
1981 | decltype(auto) apply_tuple(F &&f, Tuple &&t) { |
1982 | using Indices = std::make_index_sequence< |
1983 | std::tuple_size<typename std::decay<Tuple>::type>::value>; |
1984 | |
1985 | return detail::apply_tuple_impl(std::forward<F>(f), std::forward<Tuple>(t), |
1986 | Indices{}); |
1987 | } |
1988 | |
1989 | /// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N) |
1990 | /// time. Not meant for use with random-access iterators. |
1991 | /// Can optionally take a predicate to filter lazily some items. |
1992 | template <typename IterTy, |
1993 | typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> |
1994 | bool hasNItems( |
1995 | IterTy &&Begin, IterTy &&End, unsigned N, |
1996 | Pred &&ShouldBeCounted = |
1997 | [](const decltype(*std::declval<IterTy>()) &) { return true; }, |
1998 | std::enable_if_t< |
1999 | !std::is_base_of<std::random_access_iterator_tag, |
2000 | typename std::iterator_traits<std::remove_reference_t< |
2001 | decltype(Begin)>>::iterator_category>::value, |
2002 | void> * = nullptr) { |
2003 | for (; N; ++Begin) { |
2004 | if (Begin == End) |
2005 | return false; // Too few. |
2006 | N -= ShouldBeCounted(*Begin); |
2007 | } |
2008 | for (; Begin != End; ++Begin) |
2009 | if (ShouldBeCounted(*Begin)) |
2010 | return false; // Too many. |
2011 | return true; |
2012 | } |
2013 | |
2014 | /// Return true if the sequence [Begin, End) has N or more items. Runs in O(N) |
2015 | /// time. Not meant for use with random-access iterators. |
2016 | /// Can optionally take a predicate to lazily filter some items. |
2017 | template <typename IterTy, |
2018 | typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> |
2019 | bool hasNItemsOrMore( |
2020 | IterTy &&Begin, IterTy &&End, unsigned N, |
2021 | Pred &&ShouldBeCounted = |
2022 | [](const decltype(*std::declval<IterTy>()) &) { return true; }, |
2023 | std::enable_if_t< |
2024 | !std::is_base_of<std::random_access_iterator_tag, |
2025 | typename std::iterator_traits<std::remove_reference_t< |
2026 | decltype(Begin)>>::iterator_category>::value, |
2027 | void> * = nullptr) { |
2028 | for (; N; ++Begin) { |
2029 | if (Begin == End) |
2030 | return false; // Too few. |
2031 | N -= ShouldBeCounted(*Begin); |
2032 | } |
2033 | return true; |
2034 | } |
2035 | |
2036 | /// Returns true if the sequence [Begin, End) has N or less items. Can |
2037 | /// optionally take a predicate to lazily filter some items. |
2038 | template <typename IterTy, |
2039 | typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> |
2040 | bool hasNItemsOrLess( |
2041 | IterTy &&Begin, IterTy &&End, unsigned N, |
2042 | Pred &&ShouldBeCounted = [](const decltype(*std::declval<IterTy>()) &) { |
2043 | return true; |
2044 | }) { |
2045 | assert(N != std::numeric_limits<unsigned>::max())((void)0); |
2046 | return !hasNItemsOrMore(Begin, End, N + 1, ShouldBeCounted); |
2047 | } |
2048 | |
2049 | /// Returns true if the given container has exactly N items |
2050 | template <typename ContainerTy> bool hasNItems(ContainerTy &&C, unsigned N) { |
2051 | return hasNItems(std::begin(C), std::end(C), N); |
2052 | } |
2053 | |
2054 | /// Returns true if the given container has N or more items |
2055 | template <typename ContainerTy> |
2056 | bool hasNItemsOrMore(ContainerTy &&C, unsigned N) { |
2057 | return hasNItemsOrMore(std::begin(C), std::end(C), N); |
2058 | } |
2059 | |
2060 | /// Returns true if the given container has N or less items |
2061 | template <typename ContainerTy> |
2062 | bool hasNItemsOrLess(ContainerTy &&C, unsigned N) { |
2063 | return hasNItemsOrLess(std::begin(C), std::end(C), N); |
2064 | } |
2065 | |
2066 | /// Returns a raw pointer that represents the same address as the argument. |
2067 | /// |
2068 | /// This implementation can be removed once we move to C++20 where it's defined |
2069 | /// as std::to_address(). |
2070 | /// |
2071 | /// The std::pointer_traits<>::to_address(p) variations of these overloads has |
2072 | /// not been implemented. |
2073 | template <class Ptr> auto to_address(const Ptr &P) { return P.operator->(); } |
2074 | template <class T> constexpr T *to_address(T *P) { return P; } |
2075 | |
2076 | } // end namespace llvm |
2077 | |
2078 | #endif // LLVM_ADT_STLEXTRAS_H |