File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains a pass (at IR level) to replace atomic instructions with | |||
10 | // __atomic_* library calls, or target specific instruction which implement the | |||
11 | // same semantics in a way which better fits the target backend. This can | |||
12 | // include the use of (intrinsic-based) load-linked/store-conditional loops, | |||
13 | // AtomicCmpXchg, or type coercions. | |||
14 | // | |||
15 | //===----------------------------------------------------------------------===// | |||
16 | ||||
17 | #include "llvm/ADT/ArrayRef.h" | |||
18 | #include "llvm/ADT/STLExtras.h" | |||
19 | #include "llvm/ADT/SmallVector.h" | |||
20 | #include "llvm/CodeGen/AtomicExpandUtils.h" | |||
21 | #include "llvm/CodeGen/RuntimeLibcalls.h" | |||
22 | #include "llvm/CodeGen/TargetLowering.h" | |||
23 | #include "llvm/CodeGen/TargetPassConfig.h" | |||
24 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
25 | #include "llvm/CodeGen/ValueTypes.h" | |||
26 | #include "llvm/IR/Attributes.h" | |||
27 | #include "llvm/IR/BasicBlock.h" | |||
28 | #include "llvm/IR/Constant.h" | |||
29 | #include "llvm/IR/Constants.h" | |||
30 | #include "llvm/IR/DataLayout.h" | |||
31 | #include "llvm/IR/DerivedTypes.h" | |||
32 | #include "llvm/IR/Function.h" | |||
33 | #include "llvm/IR/IRBuilder.h" | |||
34 | #include "llvm/IR/InstIterator.h" | |||
35 | #include "llvm/IR/Instruction.h" | |||
36 | #include "llvm/IR/Instructions.h" | |||
37 | #include "llvm/IR/Module.h" | |||
38 | #include "llvm/IR/Type.h" | |||
39 | #include "llvm/IR/User.h" | |||
40 | #include "llvm/IR/Value.h" | |||
41 | #include "llvm/InitializePasses.h" | |||
42 | #include "llvm/Pass.h" | |||
43 | #include "llvm/Support/AtomicOrdering.h" | |||
44 | #include "llvm/Support/Casting.h" | |||
45 | #include "llvm/Support/Debug.h" | |||
46 | #include "llvm/Support/ErrorHandling.h" | |||
47 | #include "llvm/Support/raw_ostream.h" | |||
48 | #include "llvm/Target/TargetMachine.h" | |||
49 | #include <cassert> | |||
50 | #include <cstdint> | |||
51 | #include <iterator> | |||
52 | ||||
53 | using namespace llvm; | |||
54 | ||||
55 | #define DEBUG_TYPE"atomic-expand" "atomic-expand" | |||
56 | ||||
57 | namespace { | |||
58 | ||||
59 | class AtomicExpand: public FunctionPass { | |||
60 | const TargetLowering *TLI = nullptr; | |||
61 | ||||
62 | public: | |||
63 | static char ID; // Pass identification, replacement for typeid | |||
64 | ||||
65 | AtomicExpand() : FunctionPass(ID) { | |||
66 | initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); | |||
67 | } | |||
68 | ||||
69 | bool runOnFunction(Function &F) override; | |||
70 | ||||
71 | private: | |||
72 | bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); | |||
73 | IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); | |||
74 | LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); | |||
75 | bool tryExpandAtomicLoad(LoadInst *LI); | |||
76 | bool expandAtomicLoadToLL(LoadInst *LI); | |||
77 | bool expandAtomicLoadToCmpXchg(LoadInst *LI); | |||
78 | StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); | |||
79 | bool expandAtomicStore(StoreInst *SI); | |||
80 | bool tryExpandAtomicRMW(AtomicRMWInst *AI); | |||
81 | AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); | |||
82 | Value * | |||
83 | insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, | |||
84 | Align AddrAlign, AtomicOrdering MemOpOrder, | |||
85 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | |||
86 | void expandAtomicOpToLLSC( | |||
87 | Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
88 | AtomicOrdering MemOpOrder, | |||
89 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); | |||
90 | void expandPartwordAtomicRMW( | |||
91 | AtomicRMWInst *I, | |||
92 | TargetLoweringBase::AtomicExpansionKind ExpansionKind); | |||
93 | AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); | |||
94 | bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); | |||
95 | void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); | |||
96 | void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); | |||
97 | ||||
98 | AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); | |||
99 | static Value *insertRMWCmpXchgLoop( | |||
100 | IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, | |||
101 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
102 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | |||
103 | CreateCmpXchgInstFun CreateCmpXchg); | |||
104 | bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); | |||
105 | ||||
106 | bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); | |||
107 | bool isIdempotentRMW(AtomicRMWInst *RMWI); | |||
108 | bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); | |||
109 | ||||
110 | bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, | |||
111 | Value *PointerOperand, Value *ValueOperand, | |||
112 | Value *CASExpected, AtomicOrdering Ordering, | |||
113 | AtomicOrdering Ordering2, | |||
114 | ArrayRef<RTLIB::Libcall> Libcalls); | |||
115 | void expandAtomicLoadToLibcall(LoadInst *LI); | |||
116 | void expandAtomicStoreToLibcall(StoreInst *LI); | |||
117 | void expandAtomicRMWToLibcall(AtomicRMWInst *I); | |||
118 | void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); | |||
119 | ||||
120 | friend bool | |||
121 | llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | |||
122 | CreateCmpXchgInstFun CreateCmpXchg); | |||
123 | }; | |||
124 | ||||
125 | } // end anonymous namespace | |||
126 | ||||
127 | char AtomicExpand::ID = 0; | |||
128 | ||||
129 | char &llvm::AtomicExpandID = AtomicExpand::ID; | |||
130 | ||||
131 | INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | |||
132 | false, false)static void *initializeAtomicExpandPassOnce(PassRegistry & Registry) { PassInfo *PI = new PassInfo( "Expand Atomic instructions" , "atomic-expand", &AtomicExpand::ID, PassInfo::NormalCtor_t (callDefaultCtor<AtomicExpand>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAtomicExpandPassFlag; void llvm::initializeAtomicExpandPass (PassRegistry &Registry) { llvm::call_once(InitializeAtomicExpandPassFlag , initializeAtomicExpandPassOnce, std::ref(Registry)); } | |||
133 | ||||
134 | FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } | |||
135 | ||||
136 | // Helper functions to retrieve the size of atomic instructions. | |||
137 | static unsigned getAtomicOpSize(LoadInst *LI) { | |||
138 | const DataLayout &DL = LI->getModule()->getDataLayout(); | |||
139 | return DL.getTypeStoreSize(LI->getType()); | |||
140 | } | |||
141 | ||||
142 | static unsigned getAtomicOpSize(StoreInst *SI) { | |||
143 | const DataLayout &DL = SI->getModule()->getDataLayout(); | |||
144 | return DL.getTypeStoreSize(SI->getValueOperand()->getType()); | |||
145 | } | |||
146 | ||||
147 | static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { | |||
148 | const DataLayout &DL = RMWI->getModule()->getDataLayout(); | |||
149 | return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); | |||
150 | } | |||
151 | ||||
152 | static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { | |||
153 | const DataLayout &DL = CASI->getModule()->getDataLayout(); | |||
154 | return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); | |||
155 | } | |||
156 | ||||
157 | // Determine if a particular atomic operation has a supported size, | |||
158 | // and is of appropriate alignment, to be passed through for target | |||
159 | // lowering. (Versus turning into a __atomic libcall) | |||
160 | template <typename Inst> | |||
161 | static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { | |||
162 | unsigned Size = getAtomicOpSize(I); | |||
163 | Align Alignment = I->getAlign(); | |||
164 | return Alignment >= Size && | |||
| ||||
165 | Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; | |||
166 | } | |||
167 | ||||
168 | bool AtomicExpand::runOnFunction(Function &F) { | |||
169 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | |||
170 | if (!TPC) | |||
171 | return false; | |||
172 | ||||
173 | auto &TM = TPC->getTM<TargetMachine>(); | |||
174 | if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) | |||
175 | return false; | |||
176 | TLI = TM.getSubtargetImpl(F)->getTargetLowering(); | |||
177 | ||||
178 | SmallVector<Instruction *, 1> AtomicInsts; | |||
179 | ||||
180 | // Changing control-flow while iterating through it is a bad idea, so gather a | |||
181 | // list of all atomic instructions before we start. | |||
182 | for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { | |||
183 | Instruction *I = &*II; | |||
184 | if (I->isAtomic() && !isa<FenceInst>(I)) | |||
185 | AtomicInsts.push_back(I); | |||
186 | } | |||
187 | ||||
188 | bool MadeChange = false; | |||
189 | for (auto I : AtomicInsts) { | |||
190 | auto LI = dyn_cast<LoadInst>(I); | |||
191 | auto SI = dyn_cast<StoreInst>(I); | |||
192 | auto RMWI = dyn_cast<AtomicRMWInst>(I); | |||
193 | auto CASI = dyn_cast<AtomicCmpXchgInst>(I); | |||
194 | assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction")((void)0); | |||
195 | ||||
196 | // If the Size/Alignment is not supported, replace with a libcall. | |||
197 | if (LI) { | |||
198 | if (!atomicSizeSupported(TLI, LI)) { | |||
199 | expandAtomicLoadToLibcall(LI); | |||
200 | MadeChange = true; | |||
201 | continue; | |||
202 | } | |||
203 | } else if (SI) { | |||
204 | if (!atomicSizeSupported(TLI, SI)) { | |||
205 | expandAtomicStoreToLibcall(SI); | |||
206 | MadeChange = true; | |||
207 | continue; | |||
208 | } | |||
209 | } else if (RMWI) { | |||
210 | if (!atomicSizeSupported(TLI, RMWI)) { | |||
211 | expandAtomicRMWToLibcall(RMWI); | |||
212 | MadeChange = true; | |||
213 | continue; | |||
214 | } | |||
215 | } else if (CASI) { | |||
216 | if (!atomicSizeSupported(TLI, CASI)) { | |||
217 | expandAtomicCASToLibcall(CASI); | |||
218 | MadeChange = true; | |||
219 | continue; | |||
220 | } | |||
221 | } | |||
222 | ||||
223 | if (TLI->shouldInsertFencesForAtomic(I)) { | |||
224 | auto FenceOrdering = AtomicOrdering::Monotonic; | |||
225 | if (LI && isAcquireOrStronger(LI->getOrdering())) { | |||
226 | FenceOrdering = LI->getOrdering(); | |||
227 | LI->setOrdering(AtomicOrdering::Monotonic); | |||
228 | } else if (SI && isReleaseOrStronger(SI->getOrdering())) { | |||
229 | FenceOrdering = SI->getOrdering(); | |||
230 | SI->setOrdering(AtomicOrdering::Monotonic); | |||
231 | } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || | |||
232 | isAcquireOrStronger(RMWI->getOrdering()))) { | |||
233 | FenceOrdering = RMWI->getOrdering(); | |||
234 | RMWI->setOrdering(AtomicOrdering::Monotonic); | |||
235 | } else if (CASI && | |||
236 | TLI->shouldExpandAtomicCmpXchgInIR(CASI) == | |||
237 | TargetLoweringBase::AtomicExpansionKind::None && | |||
238 | (isReleaseOrStronger(CASI->getSuccessOrdering()) || | |||
239 | isAcquireOrStronger(CASI->getSuccessOrdering()) || | |||
240 | isAcquireOrStronger(CASI->getFailureOrdering()))) { | |||
241 | // If a compare and swap is lowered to LL/SC, we can do smarter fence | |||
242 | // insertion, with a stronger one on the success path than on the | |||
243 | // failure path. As a result, fence insertion is directly done by | |||
244 | // expandAtomicCmpXchg in that case. | |||
245 | FenceOrdering = CASI->getMergedOrdering(); | |||
246 | CASI->setSuccessOrdering(AtomicOrdering::Monotonic); | |||
247 | CASI->setFailureOrdering(AtomicOrdering::Monotonic); | |||
248 | } | |||
249 | ||||
250 | if (FenceOrdering != AtomicOrdering::Monotonic) { | |||
251 | MadeChange |= bracketInstWithFences(I, FenceOrdering); | |||
252 | } | |||
253 | } | |||
254 | ||||
255 | if (LI) { | |||
256 | if (LI->getType()->isFloatingPointTy()) { | |||
257 | // TODO: add a TLI hook to control this so that each target can | |||
258 | // convert to lowering the original type one at a time. | |||
259 | LI = convertAtomicLoadToIntegerType(LI); | |||
260 | assert(LI->getType()->isIntegerTy() && "invariant broken")((void)0); | |||
261 | MadeChange = true; | |||
262 | } | |||
263 | ||||
264 | MadeChange |= tryExpandAtomicLoad(LI); | |||
265 | } else if (SI) { | |||
266 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) { | |||
267 | // TODO: add a TLI hook to control this so that each target can | |||
268 | // convert to lowering the original type one at a time. | |||
269 | SI = convertAtomicStoreToIntegerType(SI); | |||
270 | assert(SI->getValueOperand()->getType()->isIntegerTy() &&((void)0) | |||
271 | "invariant broken")((void)0); | |||
272 | MadeChange = true; | |||
273 | } | |||
274 | ||||
275 | if (TLI->shouldExpandAtomicStoreInIR(SI)) | |||
276 | MadeChange |= expandAtomicStore(SI); | |||
277 | } else if (RMWI) { | |||
278 | // There are two different ways of expanding RMW instructions: | |||
279 | // - into a load if it is idempotent | |||
280 | // - into a Cmpxchg/LL-SC loop otherwise | |||
281 | // we try them in that order. | |||
282 | ||||
283 | if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { | |||
284 | MadeChange = true; | |||
285 | } else { | |||
286 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | |||
287 | if (Op == AtomicRMWInst::Xchg && | |||
288 | RMWI->getValOperand()->getType()->isFloatingPointTy()) { | |||
289 | // TODO: add a TLI hook to control this so that each target can | |||
290 | // convert to lowering the original type one at a time. | |||
291 | RMWI = convertAtomicXchgToIntegerType(RMWI); | |||
292 | assert(RMWI->getValOperand()->getType()->isIntegerTy() &&((void)0) | |||
293 | "invariant broken")((void)0); | |||
294 | MadeChange = true; | |||
295 | } | |||
296 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
297 | unsigned ValueSize = getAtomicOpSize(RMWI); | |||
298 | if (ValueSize < MinCASSize && | |||
299 | (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || | |||
300 | Op == AtomicRMWInst::And)) { | |||
301 | RMWI = widenPartwordAtomicRMW(RMWI); | |||
302 | MadeChange = true; | |||
303 | } | |||
304 | ||||
305 | MadeChange |= tryExpandAtomicRMW(RMWI); | |||
306 | } | |||
307 | } else if (CASI) { | |||
308 | // TODO: when we're ready to make the change at the IR level, we can | |||
309 | // extend convertCmpXchgToInteger for floating point too. | |||
310 | assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&((void)0) | |||
311 | "unimplemented - floating point not legal at IR level")((void)0); | |||
312 | if (CASI->getCompareOperand()->getType()->isPointerTy() ) { | |||
313 | // TODO: add a TLI hook to control this so that each target can | |||
314 | // convert to lowering the original type one at a time. | |||
315 | CASI = convertCmpXchgToIntegerType(CASI); | |||
316 | assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&((void)0) | |||
317 | "invariant broken")((void)0); | |||
318 | MadeChange = true; | |||
319 | } | |||
320 | ||||
321 | MadeChange |= tryExpandAtomicCmpXchg(CASI); | |||
322 | } | |||
323 | } | |||
324 | return MadeChange; | |||
325 | } | |||
326 | ||||
327 | bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { | |||
328 | IRBuilder<> Builder(I); | |||
329 | ||||
330 | auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); | |||
331 | ||||
332 | auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); | |||
333 | // We have a guard here because not every atomic operation generates a | |||
334 | // trailing fence. | |||
335 | if (TrailingFence) | |||
336 | TrailingFence->moveAfter(I); | |||
337 | ||||
338 | return (LeadingFence || TrailingFence); | |||
339 | } | |||
340 | ||||
341 | /// Get the iX type with the same bitwidth as T. | |||
342 | IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, | |||
343 | const DataLayout &DL) { | |||
344 | EVT VT = TLI->getMemValueType(DL, T); | |||
345 | unsigned BitWidth = VT.getStoreSizeInBits(); | |||
346 | assert(BitWidth == VT.getSizeInBits() && "must be a power of two")((void)0); | |||
347 | return IntegerType::get(T->getContext(), BitWidth); | |||
348 | } | |||
349 | ||||
350 | /// Convert an atomic load of a non-integral type to an integer load of the | |||
351 | /// equivalent bitwidth. See the function comment on | |||
352 | /// convertAtomicStoreToIntegerType for background. | |||
353 | LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { | |||
354 | auto *M = LI->getModule(); | |||
355 | Type *NewTy = getCorrespondingIntegerType(LI->getType(), | |||
356 | M->getDataLayout()); | |||
357 | ||||
358 | IRBuilder<> Builder(LI); | |||
359 | ||||
360 | Value *Addr = LI->getPointerOperand(); | |||
361 | Type *PT = PointerType::get(NewTy, | |||
362 | Addr->getType()->getPointerAddressSpace()); | |||
363 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
364 | ||||
365 | auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); | |||
366 | NewLI->setAlignment(LI->getAlign()); | |||
367 | NewLI->setVolatile(LI->isVolatile()); | |||
368 | NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); | |||
369 | LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n")do { } while (false); | |||
370 | ||||
371 | Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); | |||
372 | LI->replaceAllUsesWith(NewVal); | |||
373 | LI->eraseFromParent(); | |||
374 | return NewLI; | |||
375 | } | |||
376 | ||||
377 | AtomicRMWInst * | |||
378 | AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { | |||
379 | auto *M = RMWI->getModule(); | |||
380 | Type *NewTy = | |||
381 | getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); | |||
382 | ||||
383 | IRBuilder<> Builder(RMWI); | |||
384 | ||||
385 | Value *Addr = RMWI->getPointerOperand(); | |||
386 | Value *Val = RMWI->getValOperand(); | |||
387 | Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); | |||
388 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
389 | Value *NewVal = Builder.CreateBitCast(Val, NewTy); | |||
390 | ||||
391 | auto *NewRMWI = | |||
392 | Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, | |||
393 | RMWI->getAlign(), RMWI->getOrdering()); | |||
394 | NewRMWI->setVolatile(RMWI->isVolatile()); | |||
395 | LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n")do { } while (false); | |||
396 | ||||
397 | Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); | |||
398 | RMWI->replaceAllUsesWith(NewRVal); | |||
399 | RMWI->eraseFromParent(); | |||
400 | return NewRMWI; | |||
401 | } | |||
402 | ||||
403 | bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { | |||
404 | switch (TLI->shouldExpandAtomicLoadInIR(LI)) { | |||
405 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
406 | return false; | |||
407 | case TargetLoweringBase::AtomicExpansionKind::LLSC: | |||
408 | expandAtomicOpToLLSC( | |||
409 | LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), | |||
410 | LI->getOrdering(), | |||
411 | [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); | |||
412 | return true; | |||
413 | case TargetLoweringBase::AtomicExpansionKind::LLOnly: | |||
414 | return expandAtomicLoadToLL(LI); | |||
415 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: | |||
416 | return expandAtomicLoadToCmpXchg(LI); | |||
417 | default: | |||
418 | llvm_unreachable("Unhandled case in tryExpandAtomicLoad")__builtin_unreachable(); | |||
419 | } | |||
420 | } | |||
421 | ||||
422 | bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { | |||
423 | IRBuilder<> Builder(LI); | |||
424 | ||||
425 | // On some architectures, load-linked instructions are atomic for larger | |||
426 | // sizes than normal loads. For example, the only 64-bit load guaranteed | |||
427 | // to be single-copy atomic by ARM is an ldrexd (A3.5.3). | |||
428 | Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), | |||
429 | LI->getPointerOperand(), LI->getOrdering()); | |||
430 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | |||
431 | ||||
432 | LI->replaceAllUsesWith(Val); | |||
433 | LI->eraseFromParent(); | |||
434 | ||||
435 | return true; | |||
436 | } | |||
437 | ||||
438 | bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { | |||
439 | IRBuilder<> Builder(LI); | |||
440 | AtomicOrdering Order = LI->getOrdering(); | |||
441 | if (Order == AtomicOrdering::Unordered) | |||
442 | Order = AtomicOrdering::Monotonic; | |||
443 | ||||
444 | Value *Addr = LI->getPointerOperand(); | |||
445 | Type *Ty = LI->getType(); | |||
446 | Constant *DummyVal = Constant::getNullValue(Ty); | |||
447 | ||||
448 | Value *Pair = Builder.CreateAtomicCmpXchg( | |||
449 | Addr, DummyVal, DummyVal, LI->getAlign(), Order, | |||
450 | AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); | |||
451 | Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); | |||
452 | ||||
453 | LI->replaceAllUsesWith(Loaded); | |||
454 | LI->eraseFromParent(); | |||
455 | ||||
456 | return true; | |||
457 | } | |||
458 | ||||
459 | /// Convert an atomic store of a non-integral type to an integer store of the | |||
460 | /// equivalent bitwidth. We used to not support floating point or vector | |||
461 | /// atomics in the IR at all. The backends learned to deal with the bitcast | |||
462 | /// idiom because that was the only way of expressing the notion of a atomic | |||
463 | /// float or vector store. The long term plan is to teach each backend to | |||
464 | /// instruction select from the original atomic store, but as a migration | |||
465 | /// mechanism, we convert back to the old format which the backends understand. | |||
466 | /// Each backend will need individual work to recognize the new format. | |||
467 | StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { | |||
468 | IRBuilder<> Builder(SI); | |||
469 | auto *M = SI->getModule(); | |||
470 | Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), | |||
471 | M->getDataLayout()); | |||
472 | Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); | |||
473 | ||||
474 | Value *Addr = SI->getPointerOperand(); | |||
475 | Type *PT = PointerType::get(NewTy, | |||
476 | Addr->getType()->getPointerAddressSpace()); | |||
477 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
478 | ||||
479 | StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); | |||
480 | NewSI->setAlignment(SI->getAlign()); | |||
481 | NewSI->setVolatile(SI->isVolatile()); | |||
482 | NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); | |||
483 | LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n")do { } while (false); | |||
484 | SI->eraseFromParent(); | |||
485 | return NewSI; | |||
486 | } | |||
487 | ||||
488 | bool AtomicExpand::expandAtomicStore(StoreInst *SI) { | |||
489 | // This function is only called on atomic stores that are too large to be | |||
490 | // atomic if implemented as a native store. So we replace them by an | |||
491 | // atomic swap, that can be implemented for example as a ldrex/strex on ARM | |||
492 | // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. | |||
493 | // It is the responsibility of the target to only signal expansion via | |||
494 | // shouldExpandAtomicRMW in cases where this is required and possible. | |||
495 | IRBuilder<> Builder(SI); | |||
496 | AtomicRMWInst *AI = Builder.CreateAtomicRMW( | |||
497 | AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), | |||
498 | SI->getAlign(), SI->getOrdering()); | |||
499 | SI->eraseFromParent(); | |||
500 | ||||
501 | // Now we have an appropriate swap instruction, lower it as usual. | |||
502 | return tryExpandAtomicRMW(AI); | |||
503 | } | |||
504 | ||||
505 | static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, | |||
506 | Value *Loaded, Value *NewVal, Align AddrAlign, | |||
507 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
508 | Value *&Success, Value *&NewLoaded) { | |||
509 | Type *OrigTy = NewVal->getType(); | |||
510 | ||||
511 | // This code can go away when cmpxchg supports FP types. | |||
512 | bool NeedBitcast = OrigTy->isFloatingPointTy(); | |||
513 | if (NeedBitcast) { | |||
514 | IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); | |||
515 | unsigned AS = Addr->getType()->getPointerAddressSpace(); | |||
516 | Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS)); | |||
517 | NewVal = Builder.CreateBitCast(NewVal, IntTy); | |||
518 | Loaded = Builder.CreateBitCast(Loaded, IntTy); | |||
519 | } | |||
520 | ||||
521 | Value *Pair = Builder.CreateAtomicCmpXchg( | |||
522 | Addr, Loaded, NewVal, AddrAlign, MemOpOrder, | |||
523 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | |||
524 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | |||
525 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |||
526 | ||||
527 | if (NeedBitcast) | |||
528 | NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); | |||
529 | } | |||
530 | ||||
531 | /// Emit IR to implement the given atomicrmw operation on values in registers, | |||
532 | /// returning the new value. | |||
533 | static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, | |||
534 | Value *Loaded, Value *Inc) { | |||
535 | Value *NewVal; | |||
536 | switch (Op) { | |||
537 | case AtomicRMWInst::Xchg: | |||
538 | return Inc; | |||
539 | case AtomicRMWInst::Add: | |||
540 | return Builder.CreateAdd(Loaded, Inc, "new"); | |||
541 | case AtomicRMWInst::Sub: | |||
542 | return Builder.CreateSub(Loaded, Inc, "new"); | |||
543 | case AtomicRMWInst::And: | |||
544 | return Builder.CreateAnd(Loaded, Inc, "new"); | |||
545 | case AtomicRMWInst::Nand: | |||
546 | return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); | |||
547 | case AtomicRMWInst::Or: | |||
548 | return Builder.CreateOr(Loaded, Inc, "new"); | |||
549 | case AtomicRMWInst::Xor: | |||
550 | return Builder.CreateXor(Loaded, Inc, "new"); | |||
551 | case AtomicRMWInst::Max: | |||
552 | NewVal = Builder.CreateICmpSGT(Loaded, Inc); | |||
553 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
554 | case AtomicRMWInst::Min: | |||
555 | NewVal = Builder.CreateICmpSLE(Loaded, Inc); | |||
556 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
557 | case AtomicRMWInst::UMax: | |||
558 | NewVal = Builder.CreateICmpUGT(Loaded, Inc); | |||
559 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
560 | case AtomicRMWInst::UMin: | |||
561 | NewVal = Builder.CreateICmpULE(Loaded, Inc); | |||
562 | return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); | |||
563 | case AtomicRMWInst::FAdd: | |||
564 | return Builder.CreateFAdd(Loaded, Inc, "new"); | |||
565 | case AtomicRMWInst::FSub: | |||
566 | return Builder.CreateFSub(Loaded, Inc, "new"); | |||
567 | default: | |||
568 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | |||
569 | } | |||
570 | } | |||
571 | ||||
572 | bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { | |||
573 | switch (TLI->shouldExpandAtomicRMWInIR(AI)) { | |||
574 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
575 | return false; | |||
576 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | |||
577 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
578 | unsigned ValueSize = getAtomicOpSize(AI); | |||
579 | if (ValueSize < MinCASSize) { | |||
580 | expandPartwordAtomicRMW(AI, | |||
581 | TargetLoweringBase::AtomicExpansionKind::LLSC); | |||
582 | } else { | |||
583 | auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { | |||
584 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | |||
585 | AI->getValOperand()); | |||
586 | }; | |||
587 | expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), | |||
588 | AI->getAlign(), AI->getOrdering(), PerformOp); | |||
589 | } | |||
590 | return true; | |||
591 | } | |||
592 | case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { | |||
593 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
594 | unsigned ValueSize = getAtomicOpSize(AI); | |||
595 | if (ValueSize < MinCASSize) { | |||
596 | // TODO: Handle atomicrmw fadd/fsub | |||
597 | if (AI->getType()->isFloatingPointTy()) | |||
598 | return false; | |||
599 | ||||
600 | expandPartwordAtomicRMW(AI, | |||
601 | TargetLoweringBase::AtomicExpansionKind::CmpXChg); | |||
602 | } else { | |||
603 | expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); | |||
604 | } | |||
605 | return true; | |||
606 | } | |||
607 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { | |||
608 | expandAtomicRMWToMaskedIntrinsic(AI); | |||
609 | return true; | |||
610 | } | |||
611 | default: | |||
612 | llvm_unreachable("Unhandled case in tryExpandAtomicRMW")__builtin_unreachable(); | |||
613 | } | |||
614 | } | |||
615 | ||||
616 | namespace { | |||
617 | ||||
618 | struct PartwordMaskValues { | |||
619 | // These three fields are guaranteed to be set by createMaskInstrs. | |||
620 | Type *WordType = nullptr; | |||
621 | Type *ValueType = nullptr; | |||
622 | Value *AlignedAddr = nullptr; | |||
623 | Align AlignedAddrAlignment; | |||
624 | // The remaining fields can be null. | |||
625 | Value *ShiftAmt = nullptr; | |||
626 | Value *Mask = nullptr; | |||
627 | Value *Inv_Mask = nullptr; | |||
628 | }; | |||
629 | ||||
630 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) | |||
631 | raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { | |||
632 | auto PrintObj = [&O](auto *V) { | |||
633 | if (V) | |||
634 | O << *V; | |||
635 | else | |||
636 | O << "nullptr"; | |||
637 | O << '\n'; | |||
638 | }; | |||
639 | O << "PartwordMaskValues {\n"; | |||
640 | O << " WordType: "; | |||
641 | PrintObj(PMV.WordType); | |||
642 | O << " ValueType: "; | |||
643 | PrintObj(PMV.ValueType); | |||
644 | O << " AlignedAddr: "; | |||
645 | PrintObj(PMV.AlignedAddr); | |||
646 | O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; | |||
647 | O << " ShiftAmt: "; | |||
648 | PrintObj(PMV.ShiftAmt); | |||
649 | O << " Mask: "; | |||
650 | PrintObj(PMV.Mask); | |||
651 | O << " Inv_Mask: "; | |||
652 | PrintObj(PMV.Inv_Mask); | |||
653 | O << "}\n"; | |||
654 | return O; | |||
655 | } | |||
656 | ||||
657 | } // end anonymous namespace | |||
658 | ||||
659 | /// This is a helper function which builds instructions to provide | |||
660 | /// values necessary for partword atomic operations. It takes an | |||
661 | /// incoming address, Addr, and ValueType, and constructs the address, | |||
662 | /// shift-amounts and masks needed to work with a larger value of size | |||
663 | /// WordSize. | |||
664 | /// | |||
665 | /// AlignedAddr: Addr rounded down to a multiple of WordSize | |||
666 | /// | |||
667 | /// ShiftAmt: Number of bits to right-shift a WordSize value loaded | |||
668 | /// from AlignAddr for it to have the same value as if | |||
669 | /// ValueType was loaded from Addr. | |||
670 | /// | |||
671 | /// Mask: Value to mask with the value loaded from AlignAddr to | |||
672 | /// include only the part that would've been loaded from Addr. | |||
673 | /// | |||
674 | /// Inv_Mask: The inverse of Mask. | |||
675 | static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, | |||
676 | Type *ValueType, Value *Addr, | |||
677 | Align AddrAlign, | |||
678 | unsigned MinWordSize) { | |||
679 | PartwordMaskValues PMV; | |||
680 | ||||
681 | Module *M = I->getModule(); | |||
682 | LLVMContext &Ctx = M->getContext(); | |||
683 | const DataLayout &DL = M->getDataLayout(); | |||
684 | unsigned ValueSize = DL.getTypeStoreSize(ValueType); | |||
685 | ||||
686 | PMV.ValueType = ValueType; | |||
687 | PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) | |||
688 | : ValueType; | |||
689 | if (PMV.ValueType == PMV.WordType) { | |||
690 | PMV.AlignedAddr = Addr; | |||
691 | PMV.AlignedAddrAlignment = AddrAlign; | |||
692 | PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); | |||
693 | PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); | |||
694 | return PMV; | |||
695 | } | |||
696 | ||||
697 | assert(ValueSize < MinWordSize)((void)0); | |||
698 | ||||
699 | Type *WordPtrType = | |||
700 | PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); | |||
701 | ||||
702 | // TODO: we could skip some of this if AddrAlign >= MinWordSize. | |||
703 | Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); | |||
704 | PMV.AlignedAddr = Builder.CreateIntToPtr( | |||
705 | Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, | |||
706 | "AlignedAddr"); | |||
707 | PMV.AlignedAddrAlignment = Align(MinWordSize); | |||
708 | ||||
709 | Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); | |||
710 | if (DL.isLittleEndian()) { | |||
711 | // turn bytes into bits | |||
712 | PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); | |||
713 | } else { | |||
714 | // turn bytes into bits, and count from the other side. | |||
715 | PMV.ShiftAmt = Builder.CreateShl( | |||
716 | Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); | |||
717 | } | |||
718 | ||||
719 | PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); | |||
720 | PMV.Mask = Builder.CreateShl( | |||
721 | ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, | |||
722 | "Mask"); | |||
723 | PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); | |||
724 | return PMV; | |||
725 | } | |||
726 | ||||
727 | static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord, | |||
728 | const PartwordMaskValues &PMV) { | |||
729 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | |||
730 | if (PMV.WordType == PMV.ValueType) | |||
731 | return WideWord; | |||
732 | ||||
733 | Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); | |||
734 | Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted"); | |||
735 | return Trunc; | |||
736 | } | |||
737 | ||||
738 | static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord, | |||
739 | Value *Updated, const PartwordMaskValues &PMV) { | |||
740 | assert(WideWord->getType() == PMV.WordType && "Widened type mismatch")((void)0); | |||
741 | assert(Updated->getType() == PMV.ValueType && "Value type mismatch")((void)0); | |||
742 | if (PMV.WordType == PMV.ValueType) | |||
743 | return Updated; | |||
744 | ||||
745 | Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); | |||
746 | Value *Shift = | |||
747 | Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); | |||
748 | Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); | |||
749 | Value *Or = Builder.CreateOr(And, Shift, "inserted"); | |||
750 | return Or; | |||
751 | } | |||
752 | ||||
753 | /// Emit IR to implement a masked version of a given atomicrmw | |||
754 | /// operation. (That is, only the bits under the Mask should be | |||
755 | /// affected by the operation) | |||
756 | static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, | |||
757 | IRBuilder<> &Builder, Value *Loaded, | |||
758 | Value *Shifted_Inc, Value *Inc, | |||
759 | const PartwordMaskValues &PMV) { | |||
760 | // TODO: update to use | |||
761 | // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order | |||
762 | // to merge bits from two values without requiring PMV.Inv_Mask. | |||
763 | switch (Op) { | |||
764 | case AtomicRMWInst::Xchg: { | |||
765 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | |||
766 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); | |||
767 | return FinalVal; | |||
768 | } | |||
769 | case AtomicRMWInst::Or: | |||
770 | case AtomicRMWInst::Xor: | |||
771 | case AtomicRMWInst::And: | |||
772 | llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW")__builtin_unreachable(); | |||
773 | case AtomicRMWInst::Add: | |||
774 | case AtomicRMWInst::Sub: | |||
775 | case AtomicRMWInst::Nand: { | |||
776 | // The other arithmetic ops need to be masked into place. | |||
777 | Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); | |||
778 | Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); | |||
779 | Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); | |||
780 | Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); | |||
781 | return FinalVal; | |||
782 | } | |||
783 | case AtomicRMWInst::Max: | |||
784 | case AtomicRMWInst::Min: | |||
785 | case AtomicRMWInst::UMax: | |||
786 | case AtomicRMWInst::UMin: { | |||
787 | // Finally, comparison ops will operate on the full value, so | |||
788 | // truncate down to the original size, and expand out again after | |||
789 | // doing the operation. | |||
790 | Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); | |||
791 | Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); | |||
792 | Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); | |||
793 | return FinalVal; | |||
794 | } | |||
795 | default: | |||
796 | llvm_unreachable("Unknown atomic op")__builtin_unreachable(); | |||
797 | } | |||
798 | } | |||
799 | ||||
800 | /// Expand a sub-word atomicrmw operation into an appropriate | |||
801 | /// word-sized operation. | |||
802 | /// | |||
803 | /// It will create an LL/SC or cmpxchg loop, as appropriate, the same | |||
804 | /// way as a typical atomicrmw expansion. The only difference here is | |||
805 | /// that the operation inside of the loop may operate upon only a | |||
806 | /// part of the value. | |||
807 | void AtomicExpand::expandPartwordAtomicRMW( | |||
808 | AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { | |||
809 | AtomicOrdering MemOpOrder = AI->getOrdering(); | |||
810 | SyncScope::ID SSID = AI->getSyncScopeID(); | |||
811 | ||||
812 | IRBuilder<> Builder(AI); | |||
813 | ||||
814 | PartwordMaskValues PMV = | |||
815 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
816 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
817 | ||||
818 | Value *ValOperand_Shifted = | |||
819 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | |||
820 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
821 | ||||
822 | auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { | |||
823 | return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, | |||
824 | ValOperand_Shifted, AI->getValOperand(), PMV); | |||
825 | }; | |||
826 | ||||
827 | Value *OldResult; | |||
828 | if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { | |||
829 | OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, | |||
830 | PMV.AlignedAddrAlignment, MemOpOrder, | |||
831 | SSID, PerformPartwordOp, | |||
832 | createCmpXchgInstFun); | |||
833 | } else { | |||
834 | assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC)((void)0); | |||
835 | OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, | |||
836 | PMV.AlignedAddrAlignment, MemOpOrder, | |||
837 | PerformPartwordOp); | |||
838 | } | |||
839 | ||||
840 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | |||
841 | AI->replaceAllUsesWith(FinalOldResult); | |||
842 | AI->eraseFromParent(); | |||
843 | } | |||
844 | ||||
845 | // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. | |||
846 | AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) { | |||
847 | IRBuilder<> Builder(AI); | |||
848 | AtomicRMWInst::BinOp Op = AI->getOperation(); | |||
849 | ||||
850 | assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||((void)0) | |||
851 | Op == AtomicRMWInst::And) &&((void)0) | |||
852 | "Unable to widen operation")((void)0); | |||
853 | ||||
854 | PartwordMaskValues PMV = | |||
855 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
856 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
857 | ||||
858 | Value *ValOperand_Shifted = | |||
859 | Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), | |||
860 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
861 | ||||
862 | Value *NewOperand; | |||
863 | ||||
864 | if (Op == AtomicRMWInst::And) | |||
865 | NewOperand = | |||
866 | Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand"); | |||
867 | else | |||
868 | NewOperand = ValOperand_Shifted; | |||
869 | ||||
870 | AtomicRMWInst *NewAI = | |||
871 | Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand, | |||
872 | PMV.AlignedAddrAlignment, AI->getOrdering()); | |||
873 | ||||
874 | Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); | |||
875 | AI->replaceAllUsesWith(FinalOldResult); | |||
876 | AI->eraseFromParent(); | |||
877 | return NewAI; | |||
878 | } | |||
879 | ||||
880 | bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { | |||
881 | // The basic idea here is that we're expanding a cmpxchg of a | |||
882 | // smaller memory size up to a word-sized cmpxchg. To do this, we | |||
883 | // need to add a retry-loop for strong cmpxchg, so that | |||
884 | // modifications to other parts of the word don't cause a spurious | |||
885 | // failure. | |||
886 | ||||
887 | // This generates code like the following: | |||
888 | // [[Setup mask values PMV.*]] | |||
889 | // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt | |||
890 | // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt | |||
891 | // %InitLoaded = load i32* %addr | |||
892 | // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask | |||
893 | // br partword.cmpxchg.loop | |||
894 | // partword.cmpxchg.loop: | |||
895 | // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], | |||
896 | // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] | |||
897 | // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted | |||
898 | // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted | |||
899 | // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, | |||
900 | // i32 %FullWord_NewVal success_ordering failure_ordering | |||
901 | // %OldVal = extractvalue { i32, i1 } %NewCI, 0 | |||
902 | // %Success = extractvalue { i32, i1 } %NewCI, 1 | |||
903 | // br i1 %Success, label %partword.cmpxchg.end, | |||
904 | // label %partword.cmpxchg.failure | |||
905 | // partword.cmpxchg.failure: | |||
906 | // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask | |||
907 | // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut | |||
908 | // br i1 %ShouldContinue, label %partword.cmpxchg.loop, | |||
909 | // label %partword.cmpxchg.end | |||
910 | // partword.cmpxchg.end: | |||
911 | // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt | |||
912 | // %FinalOldVal = trunc i32 %tmp1 to i8 | |||
913 | // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 | |||
914 | // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 | |||
915 | ||||
916 | Value *Addr = CI->getPointerOperand(); | |||
917 | Value *Cmp = CI->getCompareOperand(); | |||
918 | Value *NewVal = CI->getNewValOperand(); | |||
919 | ||||
920 | BasicBlock *BB = CI->getParent(); | |||
921 | Function *F = BB->getParent(); | |||
922 | IRBuilder<> Builder(CI); | |||
923 | LLVMContext &Ctx = Builder.getContext(); | |||
924 | ||||
925 | BasicBlock *EndBB = | |||
926 | BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); | |||
927 | auto FailureBB = | |||
928 | BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); | |||
929 | auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); | |||
930 | ||||
931 | // The split call above "helpfully" added a branch at the end of BB | |||
932 | // (to the wrong place). | |||
933 | std::prev(BB->end())->eraseFromParent(); | |||
934 | Builder.SetInsertPoint(BB); | |||
935 | ||||
936 | PartwordMaskValues PMV = | |||
937 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | |||
938 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
939 | ||||
940 | // Shift the incoming values over, into the right location in the word. | |||
941 | Value *NewVal_Shifted = | |||
942 | Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); | |||
943 | Value *Cmp_Shifted = | |||
944 | Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); | |||
945 | ||||
946 | // Load the entire current word, and mask into place the expected and new | |||
947 | // values | |||
948 | LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); | |||
949 | InitLoaded->setVolatile(CI->isVolatile()); | |||
950 | Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); | |||
951 | Builder.CreateBr(LoopBB); | |||
952 | ||||
953 | // partword.cmpxchg.loop: | |||
954 | Builder.SetInsertPoint(LoopBB); | |||
955 | PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); | |||
956 | Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); | |||
957 | ||||
958 | // Mask/Or the expected and new values into place in the loaded word. | |||
959 | Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); | |||
960 | Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); | |||
961 | AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( | |||
962 | PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, | |||
963 | CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); | |||
964 | NewCI->setVolatile(CI->isVolatile()); | |||
965 | // When we're building a strong cmpxchg, we need a loop, so you | |||
966 | // might think we could use a weak cmpxchg inside. But, using strong | |||
967 | // allows the below comparison for ShouldContinue, and we're | |||
968 | // expecting the underlying cmpxchg to be a machine instruction, | |||
969 | // which is strong anyways. | |||
970 | NewCI->setWeak(CI->isWeak()); | |||
971 | ||||
972 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | |||
973 | Value *Success = Builder.CreateExtractValue(NewCI, 1); | |||
974 | ||||
975 | if (CI->isWeak()) | |||
976 | Builder.CreateBr(EndBB); | |||
977 | else | |||
978 | Builder.CreateCondBr(Success, EndBB, FailureBB); | |||
979 | ||||
980 | // partword.cmpxchg.failure: | |||
981 | Builder.SetInsertPoint(FailureBB); | |||
982 | // Upon failure, verify that the masked-out part of the loaded value | |||
983 | // has been modified. If it didn't, abort the cmpxchg, since the | |||
984 | // masked-in part must've. | |||
985 | Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); | |||
986 | Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); | |||
987 | Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); | |||
988 | ||||
989 | // Add the second value to the phi from above | |||
990 | Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); | |||
991 | ||||
992 | // partword.cmpxchg.end: | |||
993 | Builder.SetInsertPoint(CI); | |||
994 | ||||
995 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | |||
996 | Value *Res = UndefValue::get(CI->getType()); | |||
997 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | |||
998 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
999 | ||||
1000 | CI->replaceAllUsesWith(Res); | |||
1001 | CI->eraseFromParent(); | |||
1002 | return true; | |||
1003 | } | |||
1004 | ||||
1005 | void AtomicExpand::expandAtomicOpToLLSC( | |||
1006 | Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, | |||
1007 | AtomicOrdering MemOpOrder, | |||
1008 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | |||
1009 | IRBuilder<> Builder(I); | |||
1010 | Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, | |||
1011 | MemOpOrder, PerformOp); | |||
1012 | ||||
1013 | I->replaceAllUsesWith(Loaded); | |||
1014 | I->eraseFromParent(); | |||
1015 | } | |||
1016 | ||||
1017 | void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { | |||
1018 | IRBuilder<> Builder(AI); | |||
1019 | ||||
1020 | PartwordMaskValues PMV = | |||
1021 | createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), | |||
1022 | AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
1023 | ||||
1024 | // The value operand must be sign-extended for signed min/max so that the | |||
1025 | // target's signed comparison instructions can be used. Otherwise, just | |||
1026 | // zero-ext. | |||
1027 | Instruction::CastOps CastOp = Instruction::ZExt; | |||
1028 | AtomicRMWInst::BinOp RMWOp = AI->getOperation(); | |||
1029 | if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) | |||
1030 | CastOp = Instruction::SExt; | |||
1031 | ||||
1032 | Value *ValOperand_Shifted = Builder.CreateShl( | |||
1033 | Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), | |||
1034 | PMV.ShiftAmt, "ValOperand_Shifted"); | |||
1035 | Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( | |||
1036 | Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, | |||
1037 | AI->getOrdering()); | |||
1038 | Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); | |||
1039 | AI->replaceAllUsesWith(FinalOldResult); | |||
1040 | AI->eraseFromParent(); | |||
1041 | } | |||
1042 | ||||
1043 | void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { | |||
1044 | IRBuilder<> Builder(CI); | |||
1045 | ||||
1046 | PartwordMaskValues PMV = createMaskInstrs( | |||
1047 | Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), | |||
1048 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
1049 | ||||
1050 | Value *CmpVal_Shifted = Builder.CreateShl( | |||
1051 | Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, | |||
1052 | "CmpVal_Shifted"); | |||
1053 | Value *NewVal_Shifted = Builder.CreateShl( | |||
1054 | Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, | |||
1055 | "NewVal_Shifted"); | |||
1056 | Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( | |||
1057 | Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, | |||
1058 | CI->getMergedOrdering()); | |||
1059 | Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); | |||
1060 | Value *Res = UndefValue::get(CI->getType()); | |||
1061 | Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); | |||
1062 | Value *Success = Builder.CreateICmpEQ( | |||
1063 | CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); | |||
1064 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
1065 | ||||
1066 | CI->replaceAllUsesWith(Res); | |||
1067 | CI->eraseFromParent(); | |||
1068 | } | |||
1069 | ||||
1070 | Value *AtomicExpand::insertRMWLLSCLoop( | |||
1071 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
1072 | AtomicOrdering MemOpOrder, | |||
1073 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { | |||
1074 | LLVMContext &Ctx = Builder.getContext(); | |||
1075 | BasicBlock *BB = Builder.GetInsertBlock(); | |||
1076 | Function *F = BB->getParent(); | |||
1077 | ||||
1078 | assert(AddrAlign >=((void)0) | |||
1079 | F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&((void)0) | |||
1080 | "Expected at least natural alignment at this point.")((void)0); | |||
1081 | ||||
1082 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |||
1083 | // | |||
1084 | // The standard expansion we produce is: | |||
1085 | // [...] | |||
1086 | // atomicrmw.start: | |||
1087 | // %loaded = @load.linked(%addr) | |||
1088 | // %new = some_op iN %loaded, %incr | |||
1089 | // %stored = @store_conditional(%new, %addr) | |||
1090 | // %try_again = icmp i32 ne %stored, 0 | |||
1091 | // br i1 %try_again, label %loop, label %atomicrmw.end | |||
1092 | // atomicrmw.end: | |||
1093 | // [...] | |||
1094 | BasicBlock *ExitBB = | |||
1095 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | |||
1096 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |||
1097 | ||||
1098 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
1099 | // wrong place). | |||
1100 | std::prev(BB->end())->eraseFromParent(); | |||
1101 | Builder.SetInsertPoint(BB); | |||
1102 | Builder.CreateBr(LoopBB); | |||
1103 | ||||
1104 | // Start the main loop block now that we've taken care of the preliminaries. | |||
1105 | Builder.SetInsertPoint(LoopBB); | |||
1106 | Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); | |||
1107 | ||||
1108 | Value *NewVal = PerformOp(Builder, Loaded); | |||
1109 | ||||
1110 | Value *StoreSuccess = | |||
1111 | TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); | |||
1112 | Value *TryAgain = Builder.CreateICmpNE( | |||
1113 | StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); | |||
1114 | Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); | |||
1115 | ||||
1116 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
1117 | return Loaded; | |||
1118 | } | |||
1119 | ||||
1120 | /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of | |||
1121 | /// the equivalent bitwidth. We used to not support pointer cmpxchg in the | |||
1122 | /// IR. As a migration step, we convert back to what use to be the standard | |||
1123 | /// way to represent a pointer cmpxchg so that we can update backends one by | |||
1124 | /// one. | |||
1125 | AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { | |||
1126 | auto *M = CI->getModule(); | |||
1127 | Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), | |||
1128 | M->getDataLayout()); | |||
1129 | ||||
1130 | IRBuilder<> Builder(CI); | |||
1131 | ||||
1132 | Value *Addr = CI->getPointerOperand(); | |||
1133 | Type *PT = PointerType::get(NewTy, | |||
1134 | Addr->getType()->getPointerAddressSpace()); | |||
1135 | Value *NewAddr = Builder.CreateBitCast(Addr, PT); | |||
1136 | ||||
1137 | Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); | |||
1138 | Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); | |||
1139 | ||||
1140 | auto *NewCI = Builder.CreateAtomicCmpXchg( | |||
1141 | NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), | |||
1142 | CI->getFailureOrdering(), CI->getSyncScopeID()); | |||
1143 | NewCI->setVolatile(CI->isVolatile()); | |||
1144 | NewCI->setWeak(CI->isWeak()); | |||
1145 | LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n")do { } while (false); | |||
1146 | ||||
1147 | Value *OldVal = Builder.CreateExtractValue(NewCI, 0); | |||
1148 | Value *Succ = Builder.CreateExtractValue(NewCI, 1); | |||
1149 | ||||
1150 | OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); | |||
1151 | ||||
1152 | Value *Res = UndefValue::get(CI->getType()); | |||
1153 | Res = Builder.CreateInsertValue(Res, OldVal, 0); | |||
1154 | Res = Builder.CreateInsertValue(Res, Succ, 1); | |||
1155 | ||||
1156 | CI->replaceAllUsesWith(Res); | |||
1157 | CI->eraseFromParent(); | |||
1158 | return NewCI; | |||
1159 | } | |||
1160 | ||||
1161 | bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | |||
1162 | AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); | |||
1163 | AtomicOrdering FailureOrder = CI->getFailureOrdering(); | |||
1164 | Value *Addr = CI->getPointerOperand(); | |||
1165 | BasicBlock *BB = CI->getParent(); | |||
1166 | Function *F = BB->getParent(); | |||
1167 | LLVMContext &Ctx = F->getContext(); | |||
1168 | // If shouldInsertFencesForAtomic() returns true, then the target does not | |||
1169 | // want to deal with memory orders, and emitLeading/TrailingFence should take | |||
1170 | // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we | |||
1171 | // should preserve the ordering. | |||
1172 | bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); | |||
1173 | AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic | |||
1174 | ? AtomicOrdering::Monotonic | |||
1175 | : CI->getMergedOrdering(); | |||
1176 | ||||
1177 | // In implementations which use a barrier to achieve release semantics, we can | |||
1178 | // delay emitting this barrier until we know a store is actually going to be | |||
1179 | // attempted. The cost of this delay is that we need 2 copies of the block | |||
1180 | // emitting the load-linked, affecting code size. | |||
1181 | // | |||
1182 | // Ideally, this logic would be unconditional except for the minsize check | |||
1183 | // since in other cases the extra blocks naturally collapse down to the | |||
1184 | // minimal loop. Unfortunately, this puts too much stress on later | |||
1185 | // optimisations so we avoid emitting the extra logic in those cases too. | |||
1186 | bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && | |||
1187 | SuccessOrder != AtomicOrdering::Monotonic && | |||
1188 | SuccessOrder != AtomicOrdering::Acquire && | |||
1189 | !F->hasMinSize(); | |||
1190 | ||||
1191 | // There's no overhead for sinking the release barrier in a weak cmpxchg, so | |||
1192 | // do it even on minsize. | |||
1193 | bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); | |||
1194 | ||||
1195 | // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord | |||
1196 | // | |||
1197 | // The full expansion we produce is: | |||
1198 | // [...] | |||
1199 | // %aligned.addr = ... | |||
1200 | // cmpxchg.start: | |||
1201 | // %unreleasedload = @load.linked(%aligned.addr) | |||
1202 | // %unreleasedload.extract = extract value from %unreleasedload | |||
1203 | // %should_store = icmp eq %unreleasedload.extract, %desired | |||
1204 | // br i1 %should_store, label %cmpxchg.releasingstore, | |||
1205 | // label %cmpxchg.nostore | |||
1206 | // cmpxchg.releasingstore: | |||
1207 | // fence? | |||
1208 | // br label cmpxchg.trystore | |||
1209 | // cmpxchg.trystore: | |||
1210 | // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], | |||
1211 | // [%releasedload, %cmpxchg.releasedload] | |||
1212 | // %updated.new = insert %new into %loaded.trystore | |||
1213 | // %stored = @store_conditional(%updated.new, %aligned.addr) | |||
1214 | // %success = icmp eq i32 %stored, 0 | |||
1215 | // br i1 %success, label %cmpxchg.success, | |||
1216 | // label %cmpxchg.releasedload/%cmpxchg.failure | |||
1217 | // cmpxchg.releasedload: | |||
1218 | // %releasedload = @load.linked(%aligned.addr) | |||
1219 | // %releasedload.extract = extract value from %releasedload | |||
1220 | // %should_store = icmp eq %releasedload.extract, %desired | |||
1221 | // br i1 %should_store, label %cmpxchg.trystore, | |||
1222 | // label %cmpxchg.failure | |||
1223 | // cmpxchg.success: | |||
1224 | // fence? | |||
1225 | // br label %cmpxchg.end | |||
1226 | // cmpxchg.nostore: | |||
1227 | // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], | |||
1228 | // [%releasedload, | |||
1229 | // %cmpxchg.releasedload/%cmpxchg.trystore] | |||
1230 | // @load_linked_fail_balance()? | |||
1231 | // br label %cmpxchg.failure | |||
1232 | // cmpxchg.failure: | |||
1233 | // fence? | |||
1234 | // br label %cmpxchg.end | |||
1235 | // cmpxchg.end: | |||
1236 | // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], | |||
1237 | // [%loaded.trystore, %cmpxchg.trystore] | |||
1238 | // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] | |||
1239 | // %loaded = extract value from %loaded.exit | |||
1240 | // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 | |||
1241 | // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 | |||
1242 | // [...] | |||
1243 | BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); | |||
1244 | auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); | |||
1245 | auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); | |||
1246 | auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); | |||
1247 | auto ReleasedLoadBB = | |||
1248 | BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); | |||
1249 | auto TryStoreBB = | |||
1250 | BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); | |||
1251 | auto ReleasingStoreBB = | |||
1252 | BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); | |||
1253 | auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); | |||
1254 | ||||
1255 | // This grabs the DebugLoc from CI | |||
1256 | IRBuilder<> Builder(CI); | |||
1257 | ||||
1258 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
1259 | // wrong place), but we might want a fence too. It's easiest to just remove | |||
1260 | // the branch entirely. | |||
1261 | std::prev(BB->end())->eraseFromParent(); | |||
1262 | Builder.SetInsertPoint(BB); | |||
1263 | if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) | |||
1264 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | |||
1265 | ||||
1266 | PartwordMaskValues PMV = | |||
1267 | createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, | |||
1268 | CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); | |||
1269 | Builder.CreateBr(StartBB); | |||
1270 | ||||
1271 | // Start the main loop block now that we've taken care of the preliminaries. | |||
1272 | Builder.SetInsertPoint(StartBB); | |||
1273 | Value *UnreleasedLoad = | |||
1274 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | |||
1275 | Value *UnreleasedLoadExtract = | |||
1276 | extractMaskedValue(Builder, UnreleasedLoad, PMV); | |||
1277 | Value *ShouldStore = Builder.CreateICmpEQ( | |||
1278 | UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); | |||
1279 | ||||
1280 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | |||
1281 | // jump straight past that fence instruction (if it exists). | |||
1282 | Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); | |||
1283 | ||||
1284 | Builder.SetInsertPoint(ReleasingStoreBB); | |||
1285 | if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) | |||
1286 | TLI->emitLeadingFence(Builder, CI, SuccessOrder); | |||
1287 | Builder.CreateBr(TryStoreBB); | |||
1288 | ||||
1289 | Builder.SetInsertPoint(TryStoreBB); | |||
1290 | PHINode *LoadedTryStore = | |||
1291 | Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); | |||
1292 | LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); | |||
1293 | Value *NewValueInsert = | |||
1294 | insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); | |||
1295 | Value *StoreSuccess = | |||
1296 | TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, | |||
1297 | MemOpOrder); | |||
1298 | StoreSuccess = Builder.CreateICmpEQ( | |||
1299 | StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); | |||
1300 | BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; | |||
1301 | Builder.CreateCondBr(StoreSuccess, SuccessBB, | |||
1302 | CI->isWeak() ? FailureBB : RetryBB); | |||
1303 | ||||
1304 | Builder.SetInsertPoint(ReleasedLoadBB); | |||
1305 | Value *SecondLoad; | |||
1306 | if (HasReleasedLoadBB) { | |||
1307 | SecondLoad = | |||
1308 | TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); | |||
1309 | Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); | |||
1310 | ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, | |||
1311 | CI->getCompareOperand(), "should_store"); | |||
1312 | ||||
1313 | // If the cmpxchg doesn't actually need any ordering when it fails, we can | |||
1314 | // jump straight past that fence instruction (if it exists). | |||
1315 | Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); | |||
1316 | // Update PHI node in TryStoreBB. | |||
1317 | LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); | |||
1318 | } else | |||
1319 | Builder.CreateUnreachable(); | |||
1320 | ||||
1321 | // Make sure later instructions don't get reordered with a fence if | |||
1322 | // necessary. | |||
1323 | Builder.SetInsertPoint(SuccessBB); | |||
1324 | if (ShouldInsertFencesForAtomic) | |||
1325 | TLI->emitTrailingFence(Builder, CI, SuccessOrder); | |||
1326 | Builder.CreateBr(ExitBB); | |||
1327 | ||||
1328 | Builder.SetInsertPoint(NoStoreBB); | |||
1329 | PHINode *LoadedNoStore = | |||
1330 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); | |||
1331 | LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); | |||
1332 | if (HasReleasedLoadBB) | |||
1333 | LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); | |||
1334 | ||||
1335 | // In the failing case, where we don't execute the store-conditional, the | |||
1336 | // target might want to balance out the load-linked with a dedicated | |||
1337 | // instruction (e.g., on ARM, clearing the exclusive monitor). | |||
1338 | TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); | |||
1339 | Builder.CreateBr(FailureBB); | |||
1340 | ||||
1341 | Builder.SetInsertPoint(FailureBB); | |||
1342 | PHINode *LoadedFailure = | |||
1343 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); | |||
1344 | LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); | |||
1345 | if (CI->isWeak()) | |||
1346 | LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); | |||
1347 | if (ShouldInsertFencesForAtomic) | |||
1348 | TLI->emitTrailingFence(Builder, CI, FailureOrder); | |||
1349 | Builder.CreateBr(ExitBB); | |||
1350 | ||||
1351 | // Finally, we have control-flow based knowledge of whether the cmpxchg | |||
1352 | // succeeded or not. We expose this to later passes by converting any | |||
1353 | // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate | |||
1354 | // PHI. | |||
1355 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
1356 | PHINode *LoadedExit = | |||
1357 | Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); | |||
1358 | LoadedExit->addIncoming(LoadedTryStore, SuccessBB); | |||
1359 | LoadedExit->addIncoming(LoadedFailure, FailureBB); | |||
1360 | PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); | |||
1361 | Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); | |||
1362 | Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); | |||
1363 | ||||
1364 | // This is the "exit value" from the cmpxchg expansion. It may be of | |||
1365 | // a type wider than the one in the cmpxchg instruction. | |||
1366 | Value *LoadedFull = LoadedExit; | |||
1367 | ||||
1368 | Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); | |||
1369 | Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); | |||
1370 | ||||
1371 | // Look for any users of the cmpxchg that are just comparing the loaded value | |||
1372 | // against the desired one, and replace them with the CFG-derived version. | |||
1373 | SmallVector<ExtractValueInst *, 2> PrunedInsts; | |||
1374 | for (auto User : CI->users()) { | |||
1375 | ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); | |||
1376 | if (!EV) | |||
1377 | continue; | |||
1378 | ||||
1379 | assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&((void)0) | |||
1380 | "weird extraction from { iN, i1 }")((void)0); | |||
1381 | ||||
1382 | if (EV->getIndices()[0] == 0) | |||
1383 | EV->replaceAllUsesWith(Loaded); | |||
1384 | else | |||
1385 | EV->replaceAllUsesWith(Success); | |||
1386 | ||||
1387 | PrunedInsts.push_back(EV); | |||
1388 | } | |||
1389 | ||||
1390 | // We can remove the instructions now we're no longer iterating through them. | |||
1391 | for (auto EV : PrunedInsts) | |||
1392 | EV->eraseFromParent(); | |||
1393 | ||||
1394 | if (!CI->use_empty()) { | |||
1395 | // Some use of the full struct return that we don't understand has happened, | |||
1396 | // so we've got to reconstruct it properly. | |||
1397 | Value *Res; | |||
1398 | Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); | |||
1399 | Res = Builder.CreateInsertValue(Res, Success, 1); | |||
1400 | ||||
1401 | CI->replaceAllUsesWith(Res); | |||
1402 | } | |||
1403 | ||||
1404 | CI->eraseFromParent(); | |||
1405 | return true; | |||
1406 | } | |||
1407 | ||||
1408 | bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { | |||
1409 | auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); | |||
1410 | if(!C) | |||
1411 | return false; | |||
1412 | ||||
1413 | AtomicRMWInst::BinOp Op = RMWI->getOperation(); | |||
1414 | switch(Op) { | |||
1415 | case AtomicRMWInst::Add: | |||
1416 | case AtomicRMWInst::Sub: | |||
1417 | case AtomicRMWInst::Or: | |||
1418 | case AtomicRMWInst::Xor: | |||
1419 | return C->isZero(); | |||
1420 | case AtomicRMWInst::And: | |||
1421 | return C->isMinusOne(); | |||
1422 | // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... | |||
1423 | default: | |||
1424 | return false; | |||
1425 | } | |||
1426 | } | |||
1427 | ||||
1428 | bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { | |||
1429 | if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { | |||
1430 | tryExpandAtomicLoad(ResultingLoad); | |||
1431 | return true; | |||
1432 | } | |||
1433 | return false; | |||
1434 | } | |||
1435 | ||||
1436 | Value *AtomicExpand::insertRMWCmpXchgLoop( | |||
1437 | IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, | |||
1438 | AtomicOrdering MemOpOrder, SyncScope::ID SSID, | |||
1439 | function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, | |||
1440 | CreateCmpXchgInstFun CreateCmpXchg) { | |||
1441 | LLVMContext &Ctx = Builder.getContext(); | |||
1442 | BasicBlock *BB = Builder.GetInsertBlock(); | |||
1443 | Function *F = BB->getParent(); | |||
1444 | ||||
1445 | // Given: atomicrmw some_op iN* %addr, iN %incr ordering | |||
1446 | // | |||
1447 | // The standard expansion we produce is: | |||
1448 | // [...] | |||
1449 | // %init_loaded = load atomic iN* %addr | |||
1450 | // br label %loop | |||
1451 | // loop: | |||
1452 | // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] | |||
1453 | // %new = some_op iN %loaded, %incr | |||
1454 | // %pair = cmpxchg iN* %addr, iN %loaded, iN %new | |||
1455 | // %new_loaded = extractvalue { iN, i1 } %pair, 0 | |||
1456 | // %success = extractvalue { iN, i1 } %pair, 1 | |||
1457 | // br i1 %success, label %atomicrmw.end, label %loop | |||
1458 | // atomicrmw.end: | |||
1459 | // [...] | |||
1460 | BasicBlock *ExitBB = | |||
1461 | BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); | |||
1462 | BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); | |||
1463 | ||||
1464 | // The split call above "helpfully" added a branch at the end of BB (to the | |||
1465 | // wrong place), but we want a load. It's easiest to just remove | |||
1466 | // the branch entirely. | |||
1467 | std::prev(BB->end())->eraseFromParent(); | |||
1468 | Builder.SetInsertPoint(BB); | |||
1469 | LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); | |||
1470 | Builder.CreateBr(LoopBB); | |||
1471 | ||||
1472 | // Start the main loop block now that we've taken care of the preliminaries. | |||
1473 | Builder.SetInsertPoint(LoopBB); | |||
1474 | PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); | |||
1475 | Loaded->addIncoming(InitLoaded, BB); | |||
1476 | ||||
1477 | Value *NewVal = PerformOp(Builder, Loaded); | |||
1478 | ||||
1479 | Value *NewLoaded = nullptr; | |||
1480 | Value *Success = nullptr; | |||
1481 | ||||
1482 | CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, | |||
1483 | MemOpOrder == AtomicOrdering::Unordered | |||
1484 | ? AtomicOrdering::Monotonic | |||
1485 | : MemOpOrder, | |||
1486 | SSID, Success, NewLoaded); | |||
1487 | assert(Success && NewLoaded)((void)0); | |||
1488 | ||||
1489 | Loaded->addIncoming(NewLoaded, LoopBB); | |||
1490 | ||||
1491 | Builder.CreateCondBr(Success, ExitBB, LoopBB); | |||
1492 | ||||
1493 | Builder.SetInsertPoint(ExitBB, ExitBB->begin()); | |||
1494 | return NewLoaded; | |||
1495 | } | |||
1496 | ||||
1497 | bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { | |||
1498 | unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; | |||
1499 | unsigned ValueSize = getAtomicOpSize(CI); | |||
1500 | ||||
1501 | switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { | |||
1502 | default: | |||
1503 | llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg")__builtin_unreachable(); | |||
1504 | case TargetLoweringBase::AtomicExpansionKind::None: | |||
1505 | if (ValueSize < MinCASSize) | |||
1506 | return expandPartwordCmpXchg(CI); | |||
1507 | return false; | |||
1508 | case TargetLoweringBase::AtomicExpansionKind::LLSC: { | |||
1509 | return expandAtomicCmpXchg(CI); | |||
1510 | } | |||
1511 | case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: | |||
1512 | expandAtomicCmpXchgToMaskedIntrinsic(CI); | |||
1513 | return true; | |||
1514 | } | |||
1515 | } | |||
1516 | ||||
1517 | // Note: This function is exposed externally by AtomicExpandUtils.h | |||
1518 | bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, | |||
1519 | CreateCmpXchgInstFun CreateCmpXchg) { | |||
1520 | IRBuilder<> Builder(AI); | |||
1521 | Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( | |||
1522 | Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), | |||
1523 | AI->getOrdering(), AI->getSyncScopeID(), | |||
1524 | [&](IRBuilder<> &Builder, Value *Loaded) { | |||
1525 | return performAtomicOp(AI->getOperation(), Builder, Loaded, | |||
1526 | AI->getValOperand()); | |||
1527 | }, | |||
1528 | CreateCmpXchg); | |||
1529 | ||||
1530 | AI->replaceAllUsesWith(Loaded); | |||
1531 | AI->eraseFromParent(); | |||
1532 | return true; | |||
1533 | } | |||
1534 | ||||
1535 | // In order to use one of the sized library calls such as | |||
1536 | // __atomic_fetch_add_4, the alignment must be sufficient, the size | |||
1537 | // must be one of the potentially-specialized sizes, and the value | |||
1538 | // type must actually exist in C on the target (otherwise, the | |||
1539 | // function wouldn't actually be defined.) | |||
1540 | static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, | |||
1541 | const DataLayout &DL) { | |||
1542 | // TODO: "LargestSize" is an approximation for "largest type that | |||
1543 | // you can express in C". It seems to be the case that int128 is | |||
1544 | // supported on all 64-bit platforms, otherwise only up to 64-bit | |||
1545 | // integers are supported. If we get this wrong, then we'll try to | |||
1546 | // call a sized libcall that doesn't actually exist. There should | |||
1547 | // really be some more reliable way in LLVM of determining integer | |||
1548 | // sizes which are valid in the target's C ABI... | |||
1549 | unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; | |||
1550 | return Alignment >= Size && | |||
1551 | (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && | |||
1552 | Size <= LargestSize; | |||
1553 | } | |||
1554 | ||||
1555 | void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { | |||
1556 | static const RTLIB::Libcall Libcalls[6] = { | |||
1557 | RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, | |||
1558 | RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; | |||
1559 | unsigned Size = getAtomicOpSize(I); | |||
1560 | ||||
1561 | bool expanded = expandAtomicOpToLibcall( | |||
1562 | I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, | |||
1563 | I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
1564 | if (!expanded) | |||
1565 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); | |||
1566 | } | |||
1567 | ||||
1568 | void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { | |||
1569 | static const RTLIB::Libcall Libcalls[6] = { | |||
1570 | RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, | |||
1571 | RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; | |||
1572 | unsigned Size = getAtomicOpSize(I); | |||
1573 | ||||
1574 | bool expanded = expandAtomicOpToLibcall( | |||
1575 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), | |||
1576 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
1577 | if (!expanded) | |||
1578 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); | |||
1579 | } | |||
1580 | ||||
1581 | void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { | |||
1582 | static const RTLIB::Libcall Libcalls[6] = { | |||
1583 | RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, | |||
1584 | RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, | |||
1585 | RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; | |||
1586 | unsigned Size = getAtomicOpSize(I); | |||
1587 | ||||
1588 | bool expanded = expandAtomicOpToLibcall( | |||
1589 | I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), | |||
1590 | I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), | |||
1591 | Libcalls); | |||
1592 | if (!expanded) | |||
1593 | report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); | |||
1594 | } | |||
1595 | ||||
1596 | static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { | |||
1597 | static const RTLIB::Libcall LibcallsXchg[6] = { | |||
1598 | RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, | |||
1599 | RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, | |||
1600 | RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; | |||
1601 | static const RTLIB::Libcall LibcallsAdd[6] = { | |||
1602 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, | |||
1603 | RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, | |||
1604 | RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; | |||
1605 | static const RTLIB::Libcall LibcallsSub[6] = { | |||
1606 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, | |||
1607 | RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, | |||
1608 | RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; | |||
1609 | static const RTLIB::Libcall LibcallsAnd[6] = { | |||
1610 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, | |||
1611 | RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, | |||
1612 | RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; | |||
1613 | static const RTLIB::Libcall LibcallsOr[6] = { | |||
1614 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, | |||
1615 | RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, | |||
1616 | RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; | |||
1617 | static const RTLIB::Libcall LibcallsXor[6] = { | |||
1618 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, | |||
1619 | RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, | |||
1620 | RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; | |||
1621 | static const RTLIB::Libcall LibcallsNand[6] = { | |||
1622 | RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, | |||
1623 | RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, | |||
1624 | RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; | |||
1625 | ||||
1626 | switch (Op) { | |||
1627 | case AtomicRMWInst::BAD_BINOP: | |||
1628 | llvm_unreachable("Should not have BAD_BINOP.")__builtin_unreachable(); | |||
1629 | case AtomicRMWInst::Xchg: | |||
1630 | return makeArrayRef(LibcallsXchg); | |||
1631 | case AtomicRMWInst::Add: | |||
1632 | return makeArrayRef(LibcallsAdd); | |||
1633 | case AtomicRMWInst::Sub: | |||
1634 | return makeArrayRef(LibcallsSub); | |||
1635 | case AtomicRMWInst::And: | |||
1636 | return makeArrayRef(LibcallsAnd); | |||
1637 | case AtomicRMWInst::Or: | |||
1638 | return makeArrayRef(LibcallsOr); | |||
1639 | case AtomicRMWInst::Xor: | |||
1640 | return makeArrayRef(LibcallsXor); | |||
1641 | case AtomicRMWInst::Nand: | |||
1642 | return makeArrayRef(LibcallsNand); | |||
1643 | case AtomicRMWInst::Max: | |||
1644 | case AtomicRMWInst::Min: | |||
1645 | case AtomicRMWInst::UMax: | |||
1646 | case AtomicRMWInst::UMin: | |||
1647 | case AtomicRMWInst::FAdd: | |||
1648 | case AtomicRMWInst::FSub: | |||
1649 | // No atomic libcalls are available for max/min/umax/umin. | |||
1650 | return {}; | |||
1651 | } | |||
1652 | llvm_unreachable("Unexpected AtomicRMW operation.")__builtin_unreachable(); | |||
1653 | } | |||
1654 | ||||
1655 | void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { | |||
1656 | ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); | |||
1657 | ||||
1658 | unsigned Size = getAtomicOpSize(I); | |||
1659 | ||||
1660 | bool Success = false; | |||
1661 | if (!Libcalls.empty()) | |||
1662 | Success = expandAtomicOpToLibcall( | |||
1663 | I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), | |||
1664 | nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); | |||
1665 | ||||
1666 | // The expansion failed: either there were no libcalls at all for | |||
1667 | // the operation (min/max), or there were only size-specialized | |||
1668 | // libcalls (add/sub/etc) and we needed a generic. So, expand to a | |||
1669 | // CAS libcall, via a CAS loop, instead. | |||
1670 | if (!Success) { | |||
1671 | expandAtomicRMWToCmpXchg( | |||
1672 | I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded, | |||
1673 | Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, | |||
1674 | SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { | |||
1675 | // Create the CAS instruction normally... | |||
1676 | AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( | |||
1677 | Addr, Loaded, NewVal, Alignment, MemOpOrder, | |||
1678 | AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); | |||
1679 | Success = Builder.CreateExtractValue(Pair, 1, "success"); | |||
1680 | NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); | |||
1681 | ||||
1682 | // ...and then expand the CAS into a libcall. | |||
1683 | expandAtomicCASToLibcall(Pair); | |||
1684 | }); | |||
1685 | } | |||
1686 | } | |||
1687 | ||||
1688 | // A helper routine for the above expandAtomic*ToLibcall functions. | |||
1689 | // | |||
1690 | // 'Libcalls' contains an array of enum values for the particular | |||
1691 | // ATOMIC libcalls to be emitted. All of the other arguments besides | |||
1692 | // 'I' are extracted from the Instruction subclass by the | |||
1693 | // caller. Depending on the particular call, some will be null. | |||
1694 | bool AtomicExpand::expandAtomicOpToLibcall( | |||
1695 | Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, | |||
1696 | Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, | |||
1697 | AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { | |||
1698 | assert(Libcalls.size() == 6)((void)0); | |||
1699 | ||||
1700 | LLVMContext &Ctx = I->getContext(); | |||
1701 | Module *M = I->getModule(); | |||
1702 | const DataLayout &DL = M->getDataLayout(); | |||
1703 | IRBuilder<> Builder(I); | |||
1704 | IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); | |||
1705 | ||||
1706 | bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); | |||
1707 | Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); | |||
1708 | ||||
1709 | const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); | |||
1710 | ||||
1711 | // TODO: the "order" argument type is "int", not int32. So | |||
1712 | // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. | |||
1713 | ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); | |||
1714 | assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | |||
1715 | Constant *OrderingVal = | |||
1716 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); | |||
1717 | Constant *Ordering2Val = nullptr; | |||
1718 | if (CASExpected) { | |||
1719 | assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO")((void)0); | |||
1720 | Ordering2Val = | |||
1721 | ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); | |||
1722 | } | |||
1723 | bool HasResult = I->getType() != Type::getVoidTy(Ctx); | |||
1724 | ||||
1725 | RTLIB::Libcall RTLibType; | |||
1726 | if (UseSizedLibcall) { | |||
1727 | switch (Size) { | |||
1728 | case 1: RTLibType = Libcalls[1]; break; | |||
1729 | case 2: RTLibType = Libcalls[2]; break; | |||
1730 | case 4: RTLibType = Libcalls[3]; break; | |||
1731 | case 8: RTLibType = Libcalls[4]; break; | |||
1732 | case 16: RTLibType = Libcalls[5]; break; | |||
1733 | } | |||
1734 | } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { | |||
1735 | RTLibType = Libcalls[0]; | |||
1736 | } else { | |||
1737 | // Can't use sized function, and there's no generic for this | |||
1738 | // operation, so give up. | |||
1739 | return false; | |||
1740 | } | |||
1741 | ||||
1742 | if (!TLI->getLibcallName(RTLibType)) { | |||
1743 | // This target does not implement the requested atomic libcall so give up. | |||
1744 | return false; | |||
1745 | } | |||
1746 | ||||
1747 | // Build up the function call. There's two kinds. First, the sized | |||
1748 | // variants. These calls are going to be one of the following (with | |||
1749 | // N=1,2,4,8,16): | |||
1750 | // iN __atomic_load_N(iN *ptr, int ordering) | |||
1751 | // void __atomic_store_N(iN *ptr, iN val, int ordering) | |||
1752 | // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) | |||
1753 | // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, | |||
1754 | // int success_order, int failure_order) | |||
1755 | // | |||
1756 | // Note that these functions can be used for non-integer atomic | |||
1757 | // operations, the values just need to be bitcast to integers on the | |||
1758 | // way in and out. | |||
1759 | // | |||
1760 | // And, then, the generic variants. They look like the following: | |||
1761 | // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) | |||
1762 | // void __atomic_store(size_t size, void *ptr, void *val, int ordering) | |||
1763 | // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, | |||
1764 | // int ordering) | |||
1765 | // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, | |||
1766 | // void *desired, int success_order, | |||
1767 | // int failure_order) | |||
1768 | // | |||
1769 | // The different signatures are built up depending on the | |||
1770 | // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' | |||
1771 | // variables. | |||
1772 | ||||
1773 | AllocaInst *AllocaCASExpected = nullptr; | |||
1774 | Value *AllocaCASExpected_i8 = nullptr; | |||
1775 | AllocaInst *AllocaValue = nullptr; | |||
1776 | Value *AllocaValue_i8 = nullptr; | |||
1777 | AllocaInst *AllocaResult = nullptr; | |||
1778 | Value *AllocaResult_i8 = nullptr; | |||
1779 | ||||
1780 | Type *ResultTy; | |||
1781 | SmallVector<Value *, 6> Args; | |||
1782 | AttributeList Attr; | |||
1783 | ||||
1784 | // 'size' argument. | |||
1785 | if (!UseSizedLibcall) { | |||
1786 | // Note, getIntPtrType is assumed equivalent to size_t. | |||
1787 | Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); | |||
1788 | } | |||
1789 | ||||
1790 | // 'ptr' argument. | |||
1791 | // note: This assumes all address spaces share a common libfunc | |||
1792 | // implementation and that addresses are convertable. For systems without | |||
1793 | // that property, we'd need to extend this mechanism to support AS-specific | |||
1794 | // families of atomic intrinsics. | |||
1795 | auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); | |||
1796 | Value *PtrVal = Builder.CreateBitCast(PointerOperand, | |||
1797 | Type::getInt8PtrTy(Ctx, PtrTypeAS)); | |||
1798 | PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); | |||
1799 | Args.push_back(PtrVal); | |||
1800 | ||||
1801 | // 'expected' argument, if present. | |||
1802 | if (CASExpected) { | |||
1803 | AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); | |||
1804 | AllocaCASExpected->setAlignment(AllocaAlignment); | |||
1805 | unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); | |||
1806 | ||||
1807 | AllocaCASExpected_i8 = | |||
1808 | Builder.CreateBitCast(AllocaCASExpected, | |||
1809 | Type::getInt8PtrTy(Ctx, AllocaAS)); | |||
1810 | Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); | |||
1811 | Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); | |||
1812 | Args.push_back(AllocaCASExpected_i8); | |||
1813 | } | |||
1814 | ||||
1815 | // 'val' argument ('desired' for cas), if present. | |||
1816 | if (ValueOperand) { | |||
1817 | if (UseSizedLibcall) { | |||
1818 | Value *IntValue = | |||
1819 | Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); | |||
1820 | Args.push_back(IntValue); | |||
1821 | } else { | |||
1822 | AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); | |||
1823 | AllocaValue->setAlignment(AllocaAlignment); | |||
1824 | AllocaValue_i8 = | |||
1825 | Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); | |||
1826 | Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); | |||
1827 | Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); | |||
1828 | Args.push_back(AllocaValue_i8); | |||
1829 | } | |||
1830 | } | |||
1831 | ||||
1832 | // 'ret' argument. | |||
1833 | if (!CASExpected && HasResult && !UseSizedLibcall) { | |||
1834 | AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); | |||
1835 | AllocaResult->setAlignment(AllocaAlignment); | |||
1836 | unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); | |||
1837 | AllocaResult_i8 = | |||
1838 | Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); | |||
1839 | Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); | |||
1840 | Args.push_back(AllocaResult_i8); | |||
1841 | } | |||
1842 | ||||
1843 | // 'ordering' ('success_order' for cas) argument. | |||
1844 | Args.push_back(OrderingVal); | |||
1845 | ||||
1846 | // 'failure_order' argument, if present. | |||
1847 | if (Ordering2Val) | |||
1848 | Args.push_back(Ordering2Val); | |||
1849 | ||||
1850 | // Now, the return type. | |||
1851 | if (CASExpected) { | |||
1852 | ResultTy = Type::getInt1Ty(Ctx); | |||
1853 | Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt); | |||
1854 | } else if (HasResult && UseSizedLibcall) | |||
1855 | ResultTy = SizedIntTy; | |||
1856 | else | |||
1857 | ResultTy = Type::getVoidTy(Ctx); | |||
1858 | ||||
1859 | // Done with setting up arguments and return types, create the call: | |||
1860 | SmallVector<Type *, 6> ArgTys; | |||
1861 | for (Value *Arg : Args) | |||
1862 | ArgTys.push_back(Arg->getType()); | |||
1863 | FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); | |||
1864 | FunctionCallee LibcallFn = | |||
1865 | M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); | |||
1866 | CallInst *Call = Builder.CreateCall(LibcallFn, Args); | |||
1867 | Call->setAttributes(Attr); | |||
1868 | Value *Result = Call; | |||
1869 | ||||
1870 | // And then, extract the results... | |||
1871 | if (ValueOperand && !UseSizedLibcall) | |||
1872 | Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); | |||
1873 | ||||
1874 | if (CASExpected) { | |||
1875 | // The final result from the CAS is {load of 'expected' alloca, bool result | |||
1876 | // from call} | |||
1877 | Type *FinalResultTy = I->getType(); | |||
1878 | Value *V = UndefValue::get(FinalResultTy); | |||
1879 | Value *ExpectedOut = Builder.CreateAlignedLoad( | |||
1880 | CASExpected->getType(), AllocaCASExpected, AllocaAlignment); | |||
1881 | Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); | |||
1882 | V = Builder.CreateInsertValue(V, ExpectedOut, 0); | |||
1883 | V = Builder.CreateInsertValue(V, Result, 1); | |||
1884 | I->replaceAllUsesWith(V); | |||
1885 | } else if (HasResult) { | |||
1886 | Value *V; | |||
1887 | if (UseSizedLibcall) | |||
1888 | V = Builder.CreateBitOrPointerCast(Result, I->getType()); | |||
1889 | else { | |||
1890 | V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, | |||
1891 | AllocaAlignment); | |||
1892 | Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); | |||
1893 | } | |||
1894 | I->replaceAllUsesWith(V); | |||
1895 | } | |||
1896 | I->eraseFromParent(); | |||
1897 | return true; | |||
1898 | } |
1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains types to represent alignments. | |||
10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
11 | // invalid manipulations. | |||
12 | // | |||
13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
15 | // | |||
16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
17 | // you can get the underlying Align type by using the getValue() method. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
23 | ||||
24 | #include "llvm/ADT/Optional.h" | |||
25 | #include "llvm/Support/MathExtras.h" | |||
26 | #include <cassert> | |||
27 | #ifndef NDEBUG1 | |||
28 | #include <string> | |||
29 | #endif // NDEBUG | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
35 | ||||
36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
37 | /// alignment. | |||
38 | /// It is suitable for use as static global constants. | |||
39 | struct Align { | |||
40 | private: | |||
41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
42 | /// ShiftValue is less than 64 by construction. | |||
43 | ||||
44 | friend struct MaybeAlign; | |||
45 | friend unsigned Log2(Align); | |||
46 | friend bool operator==(Align Lhs, Align Rhs); | |||
47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
50 | friend bool operator<(Align Lhs, Align Rhs); | |||
51 | friend bool operator>(Align Lhs, Align Rhs); | |||
52 | friend unsigned encode(struct MaybeAlign A); | |||
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
54 | ||||
55 | /// A trivial type to allow construction of constexpr Align. | |||
56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
57 | /// definition of constexpr assign operators. | |||
58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
62 | struct LogValue { | |||
63 | uint8_t Log; | |||
64 | }; | |||
65 | ||||
66 | public: | |||
67 | /// Default is byte-aligned. | |||
68 | constexpr Align() = default; | |||
69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
70 | /// checks have been performed when building `Other`. | |||
71 | constexpr Align(const Align &Other) = default; | |||
72 | constexpr Align(Align &&Other) = default; | |||
73 | Align &operator=(const Align &Other) = default; | |||
74 | Align &operator=(Align &&Other) = default; | |||
75 | ||||
76 | explicit Align(uint64_t Value) { | |||
77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
79 | ShiftValue = Log2_64(Value); | |||
80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
81 | } | |||
82 | ||||
83 | /// This is a hole in the type system and should not be abused. | |||
84 | /// Needed to interact with C for instance. | |||
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
86 | ||||
87 | /// Allow constructions of constexpr Align. | |||
88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
90 | } | |||
91 | ||||
92 | /// Allow constructions of constexpr Align from types. | |||
93 | /// Compile time equivalent to Align(alignof(T)). | |||
94 | template <typename T> constexpr static LogValue Of() { | |||
95 | return Constant<std::alignment_of<T>::value>(); | |||
96 | } | |||
97 | ||||
98 | /// Constexpr constructor from LogValue type. | |||
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
100 | }; | |||
101 | ||||
102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
103 | inline Align assumeAligned(uint64_t Value) { | |||
104 | return Value ? Align(Value) : Align(); | |||
105 | } | |||
106 | ||||
107 | /// This struct is a compact representation of a valid (power of two) or | |||
108 | /// undefined (0) alignment. | |||
109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
110 | private: | |||
111 | using UP = llvm::Optional<Align>; | |||
112 | ||||
113 | public: | |||
114 | /// Default is undefined. | |||
115 | MaybeAlign() = default; | |||
116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
117 | /// checks have been performed when building `Other`. | |||
118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
122 | ||||
123 | /// Use llvm::Optional<Align> constructor. | |||
124 | using UP::UP; | |||
125 | ||||
126 | explicit MaybeAlign(uint64_t Value) { | |||
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
129 | if (Value) | |||
130 | emplace(Value); | |||
131 | } | |||
132 | ||||
133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
135 | }; | |||
136 | ||||
137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
139 | return SizeInBytes % Lhs.value() == 0; | |||
140 | } | |||
141 | ||||
142 | /// Checks that Addr is a multiple of the alignment. | |||
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
145 | } | |||
146 | ||||
147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
149 | const uint64_t Value = A.value(); | |||
150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
151 | ||||
152 | // The division followed by a multiplication can be thought of as a right | |||
153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
155 | // are just zero. | |||
156 | ||||
157 | // Most compilers can generate this code but the pattern may be missed when | |||
158 | // multiple functions gets inlined. | |||
159 | return (Size + Value - 1) & ~(Value - 1U); | |||
160 | } | |||
161 | ||||
162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
165 | /// Skew mod \p A'. | |||
166 | /// | |||
167 | /// Examples: | |||
168 | /// \code | |||
169 | /// alignTo(5, Align(8), 7) = 7 | |||
170 | /// alignTo(17, Align(8), 1) = 17 | |||
171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
172 | /// \endcode | |||
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
174 | const uint64_t Value = A.value(); | |||
175 | Skew %= Value; | |||
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
177 | } | |||
178 | ||||
179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
180 | /// Returns `Size` if current alignment is undefined. | |||
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
183 | } | |||
184 | ||||
185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
189 | ArithAddr &&((void)0) | |||
190 | "Overflow")((void)0); | |||
191 | return alignTo(ArithAddr, Alignment); | |||
192 | } | |||
193 | ||||
194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
195 | /// or equal to \p Value and is a multiple of \p Align. | |||
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
197 | return alignTo(Value, Alignment) - Value; | |||
198 | } | |||
199 | ||||
200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
201 | /// bytes, rounding up. | |||
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
204 | } | |||
205 | ||||
206 | /// Returns the log2 of the alignment. | |||
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
208 | ||||
209 | /// Returns the alignment that satisfies both alignments. | |||
210 | /// Same semantic as MinAlign. | |||
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
212 | ||||
213 | /// Returns the alignment that satisfies both alignments. | |||
214 | /// Same semantic as MinAlign. | |||
215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
216 | return Align(MinAlign(A.value(), Offset)); | |||
217 | } | |||
218 | ||||
219 | /// Returns the alignment that satisfies both alignments. | |||
220 | /// Same semantic as MinAlign. | |||
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
223 | } | |||
224 | ||||
225 | /// Returns the alignment that satisfies both alignments. | |||
226 | /// Same semantic as MinAlign. | |||
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
229 | } | |||
230 | ||||
231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
233 | ||||
234 | /// Dual operation of the encode function above. | |||
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
236 | if (Value == 0) | |||
237 | return MaybeAlign(); | |||
238 | Align Out; | |||
239 | Out.ShiftValue = Value - 1; | |||
240 | return Out; | |||
241 | } | |||
242 | ||||
243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
244 | /// definition. | |||
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
246 | ||||
247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
250 | return Lhs.value() == Rhs; | |||
251 | } | |||
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
254 | return Lhs.value() != Rhs; | |||
255 | } | |||
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
258 | return Lhs.value() <= Rhs; | |||
259 | } | |||
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
262 | return Lhs.value() >= Rhs; | |||
263 | } | |||
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
266 | return Lhs.value() < Rhs; | |||
267 | } | |||
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
270 | return Lhs.value() > Rhs; | |||
271 | } | |||
272 | ||||
273 | /// Comparisons between MaybeAlign and scalars. | |||
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
276 | } | |||
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
279 | } | |||
280 | ||||
281 | /// Comparisons operators between Align. | |||
282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
284 | } | |||
285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
287 | } | |||
288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
290 | } | |||
291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
293 | } | |||
294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
296 | } | |||
297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
299 | } | |||
300 | ||||
301 | // Don't allow relational comparisons with MaybeAlign. | |||
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
306 | ||||
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
311 | ||||
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
316 | ||||
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
319 | return Align(Lhs.value() * Rhs); | |||
320 | } | |||
321 | ||||
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
325 | } | |||
326 | ||||
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
329 | "Divisor must be positive and a power of 2")((void)0); | |||
330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
331 | return Align(Lhs.value() / Divisor); | |||
332 | } | |||
333 | ||||
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
336 | "Divisor must be positive and a power of 2")((void)0); | |||
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
338 | } | |||
339 | ||||
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
342 | } | |||
343 | ||||
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
346 | } | |||
347 | ||||
348 | #ifndef NDEBUG1 | |||
349 | // For usage in LLVM_DEBUG macros. | |||
350 | inline std::string DebugStr(const Align &A) { | |||
351 | return std::to_string(A.value()); | |||
352 | } | |||
353 | // For usage in LLVM_DEBUG macros. | |||
354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
355 | if (MA) | |||
356 | return std::to_string(MA->value()); | |||
357 | return "None"; | |||
358 | } | |||
359 | #endif // NDEBUG | |||
360 | ||||
361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
362 | ||||
363 | } // namespace llvm | |||
364 | ||||
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |