File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This pass eliminates LDS uses from non-kernel functions. | ||||
10 | // | ||||
11 | // The strategy is to create a new struct with a field for each LDS variable | ||||
12 | // and allocate that struct at the same address for every kernel. Uses of the | ||||
13 | // original LDS variables are then replaced with compile time offsets from that | ||||
14 | // known address. AMDGPUMachineFunction allocates the LDS global. | ||||
15 | // | ||||
16 | // Local variables with constant annotation or non-undef initializer are passed | ||||
17 | // through unchanged for simplication or error diagnostics in later passes. | ||||
18 | // | ||||
19 | // To reduce the memory overhead variables that are only used by kernels are | ||||
20 | // excluded from this transform. The analysis to determine whether a variable | ||||
21 | // is only used by a kernel is cheap and conservative so this may allocate | ||||
22 | // a variable in every kernel when it was not strictly necessary to do so. | ||||
23 | // | ||||
24 | // A possible future refinement is to specialise the structure per-kernel, so | ||||
25 | // that fields can be elided based on more expensive analysis. | ||||
26 | // | ||||
27 | // NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct | ||||
28 | // type which would cause allocating huge memory for struct instance within | ||||
29 | // every kernel. Hence, before running this pass, it is advisable to run the | ||||
30 | // pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within | ||||
31 | // non-kernel functions by pointers and thereby minimizes the unnecessary per | ||||
32 | // kernel allocation of LDS memory. | ||||
33 | // | ||||
34 | //===----------------------------------------------------------------------===// | ||||
35 | |||||
36 | #include "AMDGPU.h" | ||||
37 | #include "Utils/AMDGPUBaseInfo.h" | ||||
38 | #include "Utils/AMDGPULDSUtils.h" | ||||
39 | #include "llvm/ADT/STLExtras.h" | ||||
40 | #include "llvm/IR/Constants.h" | ||||
41 | #include "llvm/IR/DerivedTypes.h" | ||||
42 | #include "llvm/IR/IRBuilder.h" | ||||
43 | #include "llvm/IR/InlineAsm.h" | ||||
44 | #include "llvm/IR/Instructions.h" | ||||
45 | #include "llvm/InitializePasses.h" | ||||
46 | #include "llvm/Pass.h" | ||||
47 | #include "llvm/Support/CommandLine.h" | ||||
48 | #include "llvm/Support/Debug.h" | ||||
49 | #include "llvm/Support/OptimizedStructLayout.h" | ||||
50 | #include "llvm/Transforms/Utils/ModuleUtils.h" | ||||
51 | #include <vector> | ||||
52 | |||||
53 | #define DEBUG_TYPE"amdgpu-lower-module-lds" "amdgpu-lower-module-lds" | ||||
54 | |||||
55 | using namespace llvm; | ||||
56 | |||||
57 | static cl::opt<bool> SuperAlignLDSGlobals( | ||||
58 | "amdgpu-super-align-lds-globals", | ||||
59 | cl::desc("Increase alignment of LDS if it is not on align boundary"), | ||||
60 | cl::init(true), cl::Hidden); | ||||
61 | |||||
62 | namespace { | ||||
63 | |||||
64 | class AMDGPULowerModuleLDS : public ModulePass { | ||||
65 | |||||
66 | static void removeFromUsedList(Module &M, StringRef Name, | ||||
67 | SmallPtrSetImpl<Constant *> &ToRemove) { | ||||
68 | GlobalVariable *GV = M.getNamedGlobal(Name); | ||||
69 | if (!GV || ToRemove.empty()) { | ||||
70 | return; | ||||
71 | } | ||||
72 | |||||
73 | SmallVector<Constant *, 16> Init; | ||||
74 | auto *CA = cast<ConstantArray>(GV->getInitializer()); | ||||
75 | for (auto &Op : CA->operands()) { | ||||
76 | // ModuleUtils::appendToUsed only inserts Constants | ||||
77 | Constant *C = cast<Constant>(Op); | ||||
78 | if (!ToRemove.contains(C->stripPointerCasts())) { | ||||
79 | Init.push_back(C); | ||||
80 | } | ||||
81 | } | ||||
82 | |||||
83 | if (Init.size() == CA->getNumOperands()) { | ||||
84 | return; // none to remove | ||||
85 | } | ||||
86 | |||||
87 | GV->eraseFromParent(); | ||||
88 | |||||
89 | for (Constant *C : ToRemove) { | ||||
90 | C->removeDeadConstantUsers(); | ||||
91 | } | ||||
92 | |||||
93 | if (!Init.empty()) { | ||||
94 | ArrayType *ATy = | ||||
95 | ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size()); | ||||
96 | GV = | ||||
97 | new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, | ||||
98 | ConstantArray::get(ATy, Init), Name); | ||||
99 | GV->setSection("llvm.metadata"); | ||||
100 | } | ||||
101 | } | ||||
102 | |||||
103 | static void | ||||
104 | removeFromUsedLists(Module &M, | ||||
105 | const std::vector<GlobalVariable *> &LocalVars) { | ||||
106 | SmallPtrSet<Constant *, 32> LocalVarsSet; | ||||
107 | for (size_t I = 0; I < LocalVars.size(); I++) { | ||||
108 | if (Constant *C = dyn_cast<Constant>(LocalVars[I]->stripPointerCasts())) { | ||||
109 | LocalVarsSet.insert(C); | ||||
110 | } | ||||
111 | } | ||||
112 | removeFromUsedList(M, "llvm.used", LocalVarsSet); | ||||
113 | removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet); | ||||
114 | } | ||||
115 | |||||
116 | static void markUsedByKernel(IRBuilder<> &Builder, Function *Func, | ||||
117 | GlobalVariable *SGV) { | ||||
118 | // The llvm.amdgcn.module.lds instance is implicitly used by all kernels | ||||
119 | // that might call a function which accesses a field within it. This is | ||||
120 | // presently approximated to 'all kernels' if there are any such functions | ||||
121 | // in the module. This implicit use is reified as an explicit use here so | ||||
122 | // that later passes, specifically PromoteAlloca, account for the required | ||||
123 | // memory without any knowledge of this transform. | ||||
124 | |||||
125 | // An operand bundle on llvm.donothing works because the call instruction | ||||
126 | // survives until after the last pass that needs to account for LDS. It is | ||||
127 | // better than inline asm as the latter survives until the end of codegen. A | ||||
128 | // totally robust solution would be a function with the same semantics as | ||||
129 | // llvm.donothing that takes a pointer to the instance and is lowered to a | ||||
130 | // no-op after LDS is allocated, but that is not presently necessary. | ||||
131 | |||||
132 | LLVMContext &Ctx = Func->getContext(); | ||||
133 | |||||
134 | Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI()); | ||||
135 | |||||
136 | FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), {}); | ||||
137 | |||||
138 | Function *Decl = | ||||
139 | Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {}); | ||||
140 | |||||
141 | Value *UseInstance[1] = {Builder.CreateInBoundsGEP( | ||||
142 | SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))}; | ||||
143 | |||||
144 | Builder.CreateCall(FTy, Decl, {}, | ||||
145 | {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)}, | ||||
146 | ""); | ||||
147 | } | ||||
148 | |||||
149 | private: | ||||
150 | SmallPtrSet<GlobalValue *, 32> UsedList; | ||||
151 | |||||
152 | public: | ||||
153 | static char ID; | ||||
154 | |||||
155 | AMDGPULowerModuleLDS() : ModulePass(ID) { | ||||
156 | initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry()); | ||||
157 | } | ||||
158 | |||||
159 | bool runOnModule(Module &M) override { | ||||
160 | UsedList = AMDGPU::getUsedList(M); | ||||
161 | |||||
162 | bool Changed = processUsedLDS(M); | ||||
163 | |||||
164 | for (Function &F : M.functions()) { | ||||
165 | // Only lower compute kernels' LDS. | ||||
166 | if (!AMDGPU::isKernel(F.getCallingConv())) | ||||
167 | continue; | ||||
168 | Changed |= processUsedLDS(M, &F); | ||||
169 | } | ||||
170 | |||||
171 | UsedList.clear(); | ||||
172 | return Changed; | ||||
173 | } | ||||
174 | |||||
175 | private: | ||||
176 | bool processUsedLDS(Module &M, Function *F = nullptr) { | ||||
177 | LLVMContext &Ctx = M.getContext(); | ||||
178 | const DataLayout &DL = M.getDataLayout(); | ||||
179 | |||||
180 | // Find variables to move into new struct instance | ||||
181 | std::vector<GlobalVariable *> FoundLocalVars = | ||||
182 | AMDGPU::findVariablesToLower(M, F); | ||||
183 | |||||
184 | if (FoundLocalVars.empty()) { | ||||
185 | // No variables to rewrite, no changes made. | ||||
186 | return false; | ||||
187 | } | ||||
188 | |||||
189 | // Increase the alignment of LDS globals if necessary to maximise the chance | ||||
190 | // that we can use aligned LDS instructions to access them. | ||||
191 | if (SuperAlignLDSGlobals) { | ||||
192 | for (auto *GV : FoundLocalVars) { | ||||
193 | Align Alignment = AMDGPU::getAlign(DL, GV); | ||||
194 | TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType()); | ||||
195 | |||||
196 | if (GVSize > 8) { | ||||
197 | // We might want to use a b96 or b128 load/store | ||||
198 | Alignment = std::max(Alignment, Align(16)); | ||||
199 | } else if (GVSize > 4) { | ||||
200 | // We might want to use a b64 load/store | ||||
201 | Alignment = std::max(Alignment, Align(8)); | ||||
202 | } else if (GVSize > 2) { | ||||
203 | // We might want to use a b32 load/store | ||||
204 | Alignment = std::max(Alignment, Align(4)); | ||||
205 | } else if (GVSize > 1) { | ||||
206 | // We might want to use a b16 load/store | ||||
207 | Alignment = std::max(Alignment, Align(2)); | ||||
208 | } | ||||
209 | |||||
210 | GV->setAlignment(Alignment); | ||||
211 | } | ||||
212 | } | ||||
213 | |||||
214 | SmallVector<OptimizedStructLayoutField, 8> LayoutFields; | ||||
215 | LayoutFields.reserve(FoundLocalVars.size()); | ||||
216 | for (GlobalVariable *GV : FoundLocalVars) { | ||||
217 | OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()), | ||||
218 | AMDGPU::getAlign(DL, GV)); | ||||
219 | LayoutFields.emplace_back(F); | ||||
220 | } | ||||
221 | |||||
222 | performOptimizedStructLayout(LayoutFields); | ||||
223 | |||||
224 | std::vector<GlobalVariable *> LocalVars; | ||||
225 | LocalVars.reserve(FoundLocalVars.size()); // will be at least this large | ||||
226 | { | ||||
227 | // This usually won't need to insert any padding, perhaps avoid the alloc | ||||
228 | uint64_t CurrentOffset = 0; | ||||
229 | for (size_t I = 0; I < LayoutFields.size(); I++) { | ||||
230 | GlobalVariable *FGV = static_cast<GlobalVariable *>( | ||||
231 | const_cast<void *>(LayoutFields[I].Id)); | ||||
232 | Align DataAlign = LayoutFields[I].Alignment; | ||||
233 | |||||
234 | uint64_t DataAlignV = DataAlign.value(); | ||||
235 | if (uint64_t Rem = CurrentOffset % DataAlignV) { | ||||
236 | uint64_t Padding = DataAlignV - Rem; | ||||
237 | |||||
238 | // Append an array of padding bytes to meet alignment requested | ||||
239 | // Note (o + (a - (o % a)) ) % a == 0 | ||||
240 | // (offset + Padding ) % align == 0 | ||||
241 | |||||
242 | Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding); | ||||
243 | LocalVars.push_back(new GlobalVariable( | ||||
244 | M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy), | ||||
245 | "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, | ||||
246 | false)); | ||||
247 | CurrentOffset += Padding; | ||||
248 | } | ||||
249 | |||||
250 | LocalVars.push_back(FGV); | ||||
251 | CurrentOffset += LayoutFields[I].Size; | ||||
252 | } | ||||
253 | } | ||||
254 | |||||
255 | std::vector<Type *> LocalVarTypes; | ||||
256 | LocalVarTypes.reserve(LocalVars.size()); | ||||
257 | std::transform( | ||||
258 | LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes), | ||||
259 | [](const GlobalVariable *V) -> Type * { return V->getValueType(); }); | ||||
260 | |||||
261 | std::string VarName( | ||||
262 | F
| ||||
263 | : "llvm.amdgcn.module.lds"); | ||||
264 | StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t"); | ||||
265 | |||||
266 | Align StructAlign = | ||||
267 | AMDGPU::getAlign(DL, LocalVars[0]); | ||||
268 | |||||
269 | GlobalVariable *SGV = new GlobalVariable( | ||||
270 | M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy), | ||||
271 | VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, | ||||
272 | false); | ||||
273 | SGV->setAlignment(StructAlign); | ||||
274 | if (!F
| ||||
275 | appendToCompilerUsed( | ||||
276 | M, {static_cast<GlobalValue *>( | ||||
277 | ConstantExpr::getPointerBitCastOrAddrSpaceCast( | ||||
278 | cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))}); | ||||
279 | } | ||||
280 | |||||
281 | // The verifier rejects used lists containing an inttoptr of a constant | ||||
282 | // so remove the variables from these lists before replaceAllUsesWith | ||||
283 | removeFromUsedLists(M, LocalVars); | ||||
284 | |||||
285 | // Replace uses of ith variable with a constantexpr to the ith field of the | ||||
286 | // instance that will be allocated by AMDGPUMachineFunction | ||||
287 | Type *I32 = Type::getInt32Ty(Ctx); | ||||
288 | for (size_t I = 0; I < LocalVars.size(); I++) { | ||||
289 | GlobalVariable *GV = LocalVars[I]; | ||||
290 | Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)}; | ||||
291 | Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx); | ||||
292 | if (F
| ||||
293 | // Replace all constant uses with instructions if they belong to the | ||||
294 | // current kernel. | ||||
295 | for (User *U : make_early_inc_range(GV->users())) { | ||||
296 | if (ConstantExpr *C = dyn_cast<ConstantExpr>(U)) | ||||
297 | AMDGPU::replaceConstantUsesInFunction(C, F); | ||||
298 | } | ||||
299 | |||||
300 | GV->removeDeadConstantUsers(); | ||||
301 | |||||
302 | GV->replaceUsesWithIf(GEP, [F](Use &U) { | ||||
303 | Instruction *I = dyn_cast<Instruction>(U.getUser()); | ||||
304 | return I && I->getFunction() == F; | ||||
305 | }); | ||||
306 | } else { | ||||
307 | GV->replaceAllUsesWith(GEP); | ||||
308 | } | ||||
309 | if (GV->use_empty()) { | ||||
310 | UsedList.erase(GV); | ||||
311 | GV->eraseFromParent(); | ||||
312 | } | ||||
313 | |||||
314 | uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I); | ||||
315 | Align A = commonAlignment(StructAlign, Off); | ||||
316 | refineUsesAlignment(GEP, A, DL); | ||||
317 | } | ||||
318 | |||||
319 | // Mark kernels with asm that reads the address of the allocated structure | ||||
320 | // This is not necessary for lowering. This lets other passes, specifically | ||||
321 | // PromoteAlloca, accurately calculate how much LDS will be used by the | ||||
322 | // kernel after lowering. | ||||
323 | if (!F) { | ||||
324 | IRBuilder<> Builder(Ctx); | ||||
325 | SmallPtrSet<Function *, 32> Kernels; | ||||
326 | for (auto &I : M.functions()) { | ||||
327 | Function *Func = &I; | ||||
328 | if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) { | ||||
329 | markUsedByKernel(Builder, Func, SGV); | ||||
330 | Kernels.insert(Func); | ||||
331 | } | ||||
332 | } | ||||
333 | } | ||||
334 | return true; | ||||
335 | } | ||||
336 | |||||
337 | void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL, | ||||
338 | unsigned MaxDepth = 5) { | ||||
339 | if (!MaxDepth
| ||||
340 | return; | ||||
341 | |||||
342 | for (User *U : Ptr->users()) { | ||||
343 | if (auto *LI = dyn_cast<LoadInst>(U)) { | ||||
344 | LI->setAlignment(std::max(A, LI->getAlign())); | ||||
345 | continue; | ||||
346 | } | ||||
347 | if (auto *SI = dyn_cast<StoreInst>(U)) { | ||||
348 | if (SI->getPointerOperand() == Ptr) | ||||
349 | SI->setAlignment(std::max(A, SI->getAlign())); | ||||
350 | continue; | ||||
351 | } | ||||
352 | if (auto *AI = dyn_cast<AtomicRMWInst>(U)) { | ||||
353 | // None of atomicrmw operations can work on pointers, but let's | ||||
354 | // check it anyway in case it will or we will process ConstantExpr. | ||||
355 | if (AI->getPointerOperand() == Ptr) | ||||
356 | AI->setAlignment(std::max(A, AI->getAlign())); | ||||
357 | continue; | ||||
358 | } | ||||
359 | if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) { | ||||
360 | if (AI->getPointerOperand() == Ptr) | ||||
361 | AI->setAlignment(std::max(A, AI->getAlign())); | ||||
362 | continue; | ||||
363 | } | ||||
364 | if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { | ||||
365 | unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); | ||||
366 | APInt Off(BitWidth, 0); | ||||
367 | if (GEP->getPointerOperand() == Ptr && | ||||
368 | GEP->accumulateConstantOffset(DL, Off)) { | ||||
369 | Align GA = commonAlignment(A, Off.getLimitedValue()); | ||||
370 | refineUsesAlignment(GEP, GA, DL, MaxDepth - 1); | ||||
371 | } | ||||
372 | continue; | ||||
373 | } | ||||
374 | if (auto *I = dyn_cast<Instruction>(U)) { | ||||
375 | if (I->getOpcode() == Instruction::BitCast || | ||||
376 | I->getOpcode() == Instruction::AddrSpaceCast) | ||||
377 | refineUsesAlignment(I, A, DL, MaxDepth - 1); | ||||
378 | } | ||||
379 | } | ||||
380 | } | ||||
381 | }; | ||||
382 | |||||
383 | } // namespace | ||||
384 | char AMDGPULowerModuleLDS::ID = 0; | ||||
385 | |||||
386 | char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID; | ||||
387 | |||||
388 | INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
389 | "Lower uses of LDS variables from non-kernel functions", false,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
390 | false)static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions" , "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce , std::ref(Registry)); } | ||||
391 | |||||
392 | ModulePass *llvm::createAMDGPULowerModuleLDSPass() { | ||||
393 | return new AMDGPULowerModuleLDS(); | ||||
394 | } | ||||
395 | |||||
396 | PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M, | ||||
397 | ModuleAnalysisManager &) { | ||||
398 | return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none() | ||||
| |||||
399 | : PreservedAnalyses::all(); | ||||
400 | } |
1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains types to represent alignments. | |||
10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
11 | // invalid manipulations. | |||
12 | // | |||
13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
15 | // | |||
16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
17 | // you can get the underlying Align type by using the getValue() method. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
23 | ||||
24 | #include "llvm/ADT/Optional.h" | |||
25 | #include "llvm/Support/MathExtras.h" | |||
26 | #include <cassert> | |||
27 | #ifndef NDEBUG1 | |||
28 | #include <string> | |||
29 | #endif // NDEBUG | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
35 | ||||
36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
37 | /// alignment. | |||
38 | /// It is suitable for use as static global constants. | |||
39 | struct Align { | |||
40 | private: | |||
41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
42 | /// ShiftValue is less than 64 by construction. | |||
43 | ||||
44 | friend struct MaybeAlign; | |||
45 | friend unsigned Log2(Align); | |||
46 | friend bool operator==(Align Lhs, Align Rhs); | |||
47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
50 | friend bool operator<(Align Lhs, Align Rhs); | |||
51 | friend bool operator>(Align Lhs, Align Rhs); | |||
52 | friend unsigned encode(struct MaybeAlign A); | |||
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
54 | ||||
55 | /// A trivial type to allow construction of constexpr Align. | |||
56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
57 | /// definition of constexpr assign operators. | |||
58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
62 | struct LogValue { | |||
63 | uint8_t Log; | |||
64 | }; | |||
65 | ||||
66 | public: | |||
67 | /// Default is byte-aligned. | |||
68 | constexpr Align() = default; | |||
69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
70 | /// checks have been performed when building `Other`. | |||
71 | constexpr Align(const Align &Other) = default; | |||
72 | constexpr Align(Align &&Other) = default; | |||
73 | Align &operator=(const Align &Other) = default; | |||
74 | Align &operator=(Align &&Other) = default; | |||
75 | ||||
76 | explicit Align(uint64_t Value) { | |||
77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
79 | ShiftValue = Log2_64(Value); | |||
80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
81 | } | |||
82 | ||||
83 | /// This is a hole in the type system and should not be abused. | |||
84 | /// Needed to interact with C for instance. | |||
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
86 | ||||
87 | /// Allow constructions of constexpr Align. | |||
88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
90 | } | |||
91 | ||||
92 | /// Allow constructions of constexpr Align from types. | |||
93 | /// Compile time equivalent to Align(alignof(T)). | |||
94 | template <typename T> constexpr static LogValue Of() { | |||
95 | return Constant<std::alignment_of<T>::value>(); | |||
96 | } | |||
97 | ||||
98 | /// Constexpr constructor from LogValue type. | |||
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
100 | }; | |||
101 | ||||
102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
103 | inline Align assumeAligned(uint64_t Value) { | |||
104 | return Value ? Align(Value) : Align(); | |||
105 | } | |||
106 | ||||
107 | /// This struct is a compact representation of a valid (power of two) or | |||
108 | /// undefined (0) alignment. | |||
109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
110 | private: | |||
111 | using UP = llvm::Optional<Align>; | |||
112 | ||||
113 | public: | |||
114 | /// Default is undefined. | |||
115 | MaybeAlign() = default; | |||
116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
117 | /// checks have been performed when building `Other`. | |||
118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
122 | ||||
123 | /// Use llvm::Optional<Align> constructor. | |||
124 | using UP::UP; | |||
125 | ||||
126 | explicit MaybeAlign(uint64_t Value) { | |||
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
129 | if (Value) | |||
130 | emplace(Value); | |||
131 | } | |||
132 | ||||
133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
135 | }; | |||
136 | ||||
137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
139 | return SizeInBytes % Lhs.value() == 0; | |||
140 | } | |||
141 | ||||
142 | /// Checks that Addr is a multiple of the alignment. | |||
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
145 | } | |||
146 | ||||
147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
149 | const uint64_t Value = A.value(); | |||
150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
151 | ||||
152 | // The division followed by a multiplication can be thought of as a right | |||
153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
155 | // are just zero. | |||
156 | ||||
157 | // Most compilers can generate this code but the pattern may be missed when | |||
158 | // multiple functions gets inlined. | |||
159 | return (Size + Value - 1) & ~(Value - 1U); | |||
160 | } | |||
161 | ||||
162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
165 | /// Skew mod \p A'. | |||
166 | /// | |||
167 | /// Examples: | |||
168 | /// \code | |||
169 | /// alignTo(5, Align(8), 7) = 7 | |||
170 | /// alignTo(17, Align(8), 1) = 17 | |||
171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
172 | /// \endcode | |||
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
174 | const uint64_t Value = A.value(); | |||
175 | Skew %= Value; | |||
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
177 | } | |||
178 | ||||
179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
180 | /// Returns `Size` if current alignment is undefined. | |||
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
183 | } | |||
184 | ||||
185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
189 | ArithAddr &&((void)0) | |||
190 | "Overflow")((void)0); | |||
191 | return alignTo(ArithAddr, Alignment); | |||
192 | } | |||
193 | ||||
194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
195 | /// or equal to \p Value and is a multiple of \p Align. | |||
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
197 | return alignTo(Value, Alignment) - Value; | |||
198 | } | |||
199 | ||||
200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
201 | /// bytes, rounding up. | |||
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
204 | } | |||
205 | ||||
206 | /// Returns the log2 of the alignment. | |||
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
208 | ||||
209 | /// Returns the alignment that satisfies both alignments. | |||
210 | /// Same semantic as MinAlign. | |||
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
212 | ||||
213 | /// Returns the alignment that satisfies both alignments. | |||
214 | /// Same semantic as MinAlign. | |||
215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
216 | return Align(MinAlign(A.value(), Offset)); | |||
217 | } | |||
218 | ||||
219 | /// Returns the alignment that satisfies both alignments. | |||
220 | /// Same semantic as MinAlign. | |||
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
223 | } | |||
224 | ||||
225 | /// Returns the alignment that satisfies both alignments. | |||
226 | /// Same semantic as MinAlign. | |||
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
229 | } | |||
230 | ||||
231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
233 | ||||
234 | /// Dual operation of the encode function above. | |||
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
236 | if (Value == 0) | |||
237 | return MaybeAlign(); | |||
238 | Align Out; | |||
239 | Out.ShiftValue = Value - 1; | |||
240 | return Out; | |||
241 | } | |||
242 | ||||
243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
244 | /// definition. | |||
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
246 | ||||
247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
250 | return Lhs.value() == Rhs; | |||
251 | } | |||
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
254 | return Lhs.value() != Rhs; | |||
255 | } | |||
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
258 | return Lhs.value() <= Rhs; | |||
259 | } | |||
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
262 | return Lhs.value() >= Rhs; | |||
263 | } | |||
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
266 | return Lhs.value() < Rhs; | |||
267 | } | |||
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
270 | return Lhs.value() > Rhs; | |||
271 | } | |||
272 | ||||
273 | /// Comparisons between MaybeAlign and scalars. | |||
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
276 | } | |||
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
279 | } | |||
280 | ||||
281 | /// Comparisons operators between Align. | |||
282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
284 | } | |||
285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
287 | } | |||
288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
290 | } | |||
291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
293 | } | |||
294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
296 | } | |||
297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
299 | } | |||
300 | ||||
301 | // Don't allow relational comparisons with MaybeAlign. | |||
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
306 | ||||
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
311 | ||||
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
316 | ||||
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
319 | return Align(Lhs.value() * Rhs); | |||
320 | } | |||
321 | ||||
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
325 | } | |||
326 | ||||
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
329 | "Divisor must be positive and a power of 2")((void)0); | |||
330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
331 | return Align(Lhs.value() / Divisor); | |||
332 | } | |||
333 | ||||
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
336 | "Divisor must be positive and a power of 2")((void)0); | |||
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
338 | } | |||
339 | ||||
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
342 | } | |||
343 | ||||
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
346 | } | |||
347 | ||||
348 | #ifndef NDEBUG1 | |||
349 | // For usage in LLVM_DEBUG macros. | |||
350 | inline std::string DebugStr(const Align &A) { | |||
351 | return std::to_string(A.value()); | |||
352 | } | |||
353 | // For usage in LLVM_DEBUG macros. | |||
354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
355 | if (MA) | |||
356 | return std::to_string(MA->value()); | |||
357 | return "None"; | |||
358 | } | |||
359 | #endif // NDEBUG | |||
360 | ||||
361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
362 | ||||
363 | } // namespace llvm | |||
364 | ||||
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |