Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h
Warning:line 85, column 47
The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPULowerModuleLDSPass.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

1//===-- AMDGPULowerModuleLDSPass.cpp ------------------------------*- C++ -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass eliminates LDS uses from non-kernel functions.
10//
11// The strategy is to create a new struct with a field for each LDS variable
12// and allocate that struct at the same address for every kernel. Uses of the
13// original LDS variables are then replaced with compile time offsets from that
14// known address. AMDGPUMachineFunction allocates the LDS global.
15//
16// Local variables with constant annotation or non-undef initializer are passed
17// through unchanged for simplication or error diagnostics in later passes.
18//
19// To reduce the memory overhead variables that are only used by kernels are
20// excluded from this transform. The analysis to determine whether a variable
21// is only used by a kernel is cheap and conservative so this may allocate
22// a variable in every kernel when it was not strictly necessary to do so.
23//
24// A possible future refinement is to specialise the structure per-kernel, so
25// that fields can be elided based on more expensive analysis.
26//
27// NOTE: Since this pass will directly pack LDS (assume large LDS) into a struct
28// type which would cause allocating huge memory for struct instance within
29// every kernel. Hence, before running this pass, it is advisable to run the
30// pass "amdgpu-replace-lds-use-with-pointer" which will replace LDS uses within
31// non-kernel functions by pointers and thereby minimizes the unnecessary per
32// kernel allocation of LDS memory.
33//
34//===----------------------------------------------------------------------===//
35
36#include "AMDGPU.h"
37#include "Utils/AMDGPUBaseInfo.h"
38#include "Utils/AMDGPULDSUtils.h"
39#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/InlineAsm.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/InitializePasses.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/OptimizedStructLayout.h"
50#include "llvm/Transforms/Utils/ModuleUtils.h"
51#include <vector>
52
53#define DEBUG_TYPE"amdgpu-lower-module-lds" "amdgpu-lower-module-lds"
54
55using namespace llvm;
56
57static cl::opt<bool> SuperAlignLDSGlobals(
58 "amdgpu-super-align-lds-globals",
59 cl::desc("Increase alignment of LDS if it is not on align boundary"),
60 cl::init(true), cl::Hidden);
61
62namespace {
63
64class AMDGPULowerModuleLDS : public ModulePass {
65
66 static void removeFromUsedList(Module &M, StringRef Name,
67 SmallPtrSetImpl<Constant *> &ToRemove) {
68 GlobalVariable *GV = M.getNamedGlobal(Name);
69 if (!GV || ToRemove.empty()) {
70 return;
71 }
72
73 SmallVector<Constant *, 16> Init;
74 auto *CA = cast<ConstantArray>(GV->getInitializer());
75 for (auto &Op : CA->operands()) {
76 // ModuleUtils::appendToUsed only inserts Constants
77 Constant *C = cast<Constant>(Op);
78 if (!ToRemove.contains(C->stripPointerCasts())) {
79 Init.push_back(C);
80 }
81 }
82
83 if (Init.size() == CA->getNumOperands()) {
84 return; // none to remove
85 }
86
87 GV->eraseFromParent();
88
89 for (Constant *C : ToRemove) {
90 C->removeDeadConstantUsers();
91 }
92
93 if (!Init.empty()) {
94 ArrayType *ATy =
95 ArrayType::get(Type::getInt8PtrTy(M.getContext()), Init.size());
96 GV =
97 new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
98 ConstantArray::get(ATy, Init), Name);
99 GV->setSection("llvm.metadata");
100 }
101 }
102
103 static void
104 removeFromUsedLists(Module &M,
105 const std::vector<GlobalVariable *> &LocalVars) {
106 SmallPtrSet<Constant *, 32> LocalVarsSet;
107 for (size_t I = 0; I < LocalVars.size(); I++) {
108 if (Constant *C = dyn_cast<Constant>(LocalVars[I]->stripPointerCasts())) {
109 LocalVarsSet.insert(C);
110 }
111 }
112 removeFromUsedList(M, "llvm.used", LocalVarsSet);
113 removeFromUsedList(M, "llvm.compiler.used", LocalVarsSet);
114 }
115
116 static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
117 GlobalVariable *SGV) {
118 // The llvm.amdgcn.module.lds instance is implicitly used by all kernels
119 // that might call a function which accesses a field within it. This is
120 // presently approximated to 'all kernels' if there are any such functions
121 // in the module. This implicit use is reified as an explicit use here so
122 // that later passes, specifically PromoteAlloca, account for the required
123 // memory without any knowledge of this transform.
124
125 // An operand bundle on llvm.donothing works because the call instruction
126 // survives until after the last pass that needs to account for LDS. It is
127 // better than inline asm as the latter survives until the end of codegen. A
128 // totally robust solution would be a function with the same semantics as
129 // llvm.donothing that takes a pointer to the instance and is lowered to a
130 // no-op after LDS is allocated, but that is not presently necessary.
131
132 LLVMContext &Ctx = Func->getContext();
133
134 Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
135
136 FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), {});
137
138 Function *Decl =
139 Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
140
141 Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
142 SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
143
144 Builder.CreateCall(FTy, Decl, {},
145 {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
146 "");
147 }
148
149private:
150 SmallPtrSet<GlobalValue *, 32> UsedList;
151
152public:
153 static char ID;
154
155 AMDGPULowerModuleLDS() : ModulePass(ID) {
156 initializeAMDGPULowerModuleLDSPass(*PassRegistry::getPassRegistry());
157 }
158
159 bool runOnModule(Module &M) override {
160 UsedList = AMDGPU::getUsedList(M);
161
162 bool Changed = processUsedLDS(M);
2
Calling 'AMDGPULowerModuleLDS::processUsedLDS'
163
164 for (Function &F : M.functions()) {
165 // Only lower compute kernels' LDS.
166 if (!AMDGPU::isKernel(F.getCallingConv()))
167 continue;
168 Changed |= processUsedLDS(M, &F);
169 }
170
171 UsedList.clear();
172 return Changed;
173 }
174
175private:
176 bool processUsedLDS(Module &M, Function *F = nullptr) {
177 LLVMContext &Ctx = M.getContext();
178 const DataLayout &DL = M.getDataLayout();
179
180 // Find variables to move into new struct instance
181 std::vector<GlobalVariable *> FoundLocalVars =
182 AMDGPU::findVariablesToLower(M, F);
183
184 if (FoundLocalVars.empty()) {
3
Assuming the condition is false
4
Taking false branch
185 // No variables to rewrite, no changes made.
186 return false;
187 }
188
189 // Increase the alignment of LDS globals if necessary to maximise the chance
190 // that we can use aligned LDS instructions to access them.
191 if (SuperAlignLDSGlobals) {
5
Assuming the condition is false
6
Taking false branch
192 for (auto *GV : FoundLocalVars) {
193 Align Alignment = AMDGPU::getAlign(DL, GV);
194 TypeSize GVSize = DL.getTypeAllocSize(GV->getValueType());
195
196 if (GVSize > 8) {
197 // We might want to use a b96 or b128 load/store
198 Alignment = std::max(Alignment, Align(16));
199 } else if (GVSize > 4) {
200 // We might want to use a b64 load/store
201 Alignment = std::max(Alignment, Align(8));
202 } else if (GVSize > 2) {
203 // We might want to use a b32 load/store
204 Alignment = std::max(Alignment, Align(4));
205 } else if (GVSize > 1) {
206 // We might want to use a b16 load/store
207 Alignment = std::max(Alignment, Align(2));
208 }
209
210 GV->setAlignment(Alignment);
211 }
212 }
213
214 SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
215 LayoutFields.reserve(FoundLocalVars.size());
216 for (GlobalVariable *GV : FoundLocalVars) {
217 OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
218 AMDGPU::getAlign(DL, GV));
219 LayoutFields.emplace_back(F);
220 }
221
222 performOptimizedStructLayout(LayoutFields);
223
224 std::vector<GlobalVariable *> LocalVars;
225 LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
226 {
227 // This usually won't need to insert any padding, perhaps avoid the alloc
228 uint64_t CurrentOffset = 0;
229 for (size_t I = 0; I < LayoutFields.size(); I++) {
7
Assuming the condition is false
8
Loop condition is false. Execution continues on line 255
230 GlobalVariable *FGV = static_cast<GlobalVariable *>(
231 const_cast<void *>(LayoutFields[I].Id));
232 Align DataAlign = LayoutFields[I].Alignment;
233
234 uint64_t DataAlignV = DataAlign.value();
235 if (uint64_t Rem = CurrentOffset % DataAlignV) {
236 uint64_t Padding = DataAlignV - Rem;
237
238 // Append an array of padding bytes to meet alignment requested
239 // Note (o + (a - (o % a)) ) % a == 0
240 // (offset + Padding ) % align == 0
241
242 Type *ATy = ArrayType::get(Type::getInt8Ty(Ctx), Padding);
243 LocalVars.push_back(new GlobalVariable(
244 M, ATy, false, GlobalValue::InternalLinkage, UndefValue::get(ATy),
245 "", nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
246 false));
247 CurrentOffset += Padding;
248 }
249
250 LocalVars.push_back(FGV);
251 CurrentOffset += LayoutFields[I].Size;
252 }
253 }
254
255 std::vector<Type *> LocalVarTypes;
256 LocalVarTypes.reserve(LocalVars.size());
257 std::transform(
258 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
259 [](const GlobalVariable *V) -> Type * { return V->getValueType(); });
260
261 std::string VarName(
262 F
8.1
'F' is null
8.1
'F' is null
? (Twine("llvm.amdgcn.kernel.") + F->getName() + ".lds").str()
9
'?' condition is false
263 : "llvm.amdgcn.module.lds");
264 StructType *LDSTy = StructType::create(Ctx, LocalVarTypes, VarName + ".t");
265
266 Align StructAlign =
267 AMDGPU::getAlign(DL, LocalVars[0]);
268
269 GlobalVariable *SGV = new GlobalVariable(
270 M, LDSTy, false, GlobalValue::InternalLinkage, UndefValue::get(LDSTy),
271 VarName, nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
272 false);
273 SGV->setAlignment(StructAlign);
274 if (!F
9.1
'F' is null
9.1
'F' is null
) {
10
Taking true branch
275 appendToCompilerUsed(
276 M, {static_cast<GlobalValue *>(
277 ConstantExpr::getPointerBitCastOrAddrSpaceCast(
278 cast<Constant>(SGV), Type::getInt8PtrTy(Ctx)))});
11
'SGV' is a 'Constant'
279 }
280
281 // The verifier rejects used lists containing an inttoptr of a constant
282 // so remove the variables from these lists before replaceAllUsesWith
283 removeFromUsedLists(M, LocalVars);
284
285 // Replace uses of ith variable with a constantexpr to the ith field of the
286 // instance that will be allocated by AMDGPUMachineFunction
287 Type *I32 = Type::getInt32Ty(Ctx);
288 for (size_t I = 0; I < LocalVars.size(); I++) {
12
Assuming the condition is true
13
Loop condition is true. Entering loop body
289 GlobalVariable *GV = LocalVars[I];
290 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32, I)};
291 Constant *GEP = ConstantExpr::getGetElementPtr(LDSTy, SGV, GEPIdx);
292 if (F
13.1
'F' is null
13.1
'F' is null
) {
14
Taking false branch
293 // Replace all constant uses with instructions if they belong to the
294 // current kernel.
295 for (User *U : make_early_inc_range(GV->users())) {
296 if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
297 AMDGPU::replaceConstantUsesInFunction(C, F);
298 }
299
300 GV->removeDeadConstantUsers();
301
302 GV->replaceUsesWithIf(GEP, [F](Use &U) {
303 Instruction *I = dyn_cast<Instruction>(U.getUser());
304 return I && I->getFunction() == F;
305 });
306 } else {
307 GV->replaceAllUsesWith(GEP);
308 }
309 if (GV->use_empty()) {
15
Taking false branch
310 UsedList.erase(GV);
311 GV->eraseFromParent();
312 }
313
314 uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
315 Align A = commonAlignment(StructAlign, Off);
316 refineUsesAlignment(GEP, A, DL);
16
Calling 'AMDGPULowerModuleLDS::refineUsesAlignment'
317 }
318
319 // Mark kernels with asm that reads the address of the allocated structure
320 // This is not necessary for lowering. This lets other passes, specifically
321 // PromoteAlloca, accurately calculate how much LDS will be used by the
322 // kernel after lowering.
323 if (!F) {
324 IRBuilder<> Builder(Ctx);
325 SmallPtrSet<Function *, 32> Kernels;
326 for (auto &I : M.functions()) {
327 Function *Func = &I;
328 if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) {
329 markUsedByKernel(Builder, Func, SGV);
330 Kernels.insert(Func);
331 }
332 }
333 }
334 return true;
335 }
336
337 void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
338 unsigned MaxDepth = 5) {
339 if (!MaxDepth
16.1
'MaxDepth' is 5
16.1
'MaxDepth' is 5
|| A == 1)
17
The value 255 is assigned to 'Lhs.ShiftValue'
18
Calling 'operator=='
340 return;
341
342 for (User *U : Ptr->users()) {
343 if (auto *LI = dyn_cast<LoadInst>(U)) {
344 LI->setAlignment(std::max(A, LI->getAlign()));
345 continue;
346 }
347 if (auto *SI = dyn_cast<StoreInst>(U)) {
348 if (SI->getPointerOperand() == Ptr)
349 SI->setAlignment(std::max(A, SI->getAlign()));
350 continue;
351 }
352 if (auto *AI = dyn_cast<AtomicRMWInst>(U)) {
353 // None of atomicrmw operations can work on pointers, but let's
354 // check it anyway in case it will or we will process ConstantExpr.
355 if (AI->getPointerOperand() == Ptr)
356 AI->setAlignment(std::max(A, AI->getAlign()));
357 continue;
358 }
359 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(U)) {
360 if (AI->getPointerOperand() == Ptr)
361 AI->setAlignment(std::max(A, AI->getAlign()));
362 continue;
363 }
364 if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
365 unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
366 APInt Off(BitWidth, 0);
367 if (GEP->getPointerOperand() == Ptr &&
368 GEP->accumulateConstantOffset(DL, Off)) {
369 Align GA = commonAlignment(A, Off.getLimitedValue());
370 refineUsesAlignment(GEP, GA, DL, MaxDepth - 1);
371 }
372 continue;
373 }
374 if (auto *I = dyn_cast<Instruction>(U)) {
375 if (I->getOpcode() == Instruction::BitCast ||
376 I->getOpcode() == Instruction::AddrSpaceCast)
377 refineUsesAlignment(I, A, DL, MaxDepth - 1);
378 }
379 }
380 }
381};
382
383} // namespace
384char AMDGPULowerModuleLDS::ID = 0;
385
386char &llvm::AMDGPULowerModuleLDSID = AMDGPULowerModuleLDS::ID;
387
388INITIALIZE_PASS(AMDGPULowerModuleLDS, DEBUG_TYPE,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions"
, "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void
llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce
, std::ref(Registry)); }
389 "Lower uses of LDS variables from non-kernel functions", false,static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions"
, "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void
llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce
, std::ref(Registry)); }
390 false)static void *initializeAMDGPULowerModuleLDSPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Lower uses of LDS variables from non-kernel functions"
, "amdgpu-lower-module-lds", &AMDGPULowerModuleLDS::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPULowerModuleLDS>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPULowerModuleLDSPassFlag; void
llvm::initializeAMDGPULowerModuleLDSPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPULowerModuleLDSPassFlag, initializeAMDGPULowerModuleLDSPassOnce
, std::ref(Registry)); }
391
392ModulePass *llvm::createAMDGPULowerModuleLDSPass() {
393 return new AMDGPULowerModuleLDS();
394}
395
396PreservedAnalyses AMDGPULowerModuleLDSPass::run(Module &M,
397 ModuleAnalysisManager &) {
398 return AMDGPULowerModuleLDS().runOnModule(M) ? PreservedAnalyses::none()
1
Calling 'AMDGPULowerModuleLDS::runOnModule'
399 : PreservedAnalyses::all();
400}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h

1//===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains types to represent alignments.
10// They are instrumented to guarantee some invariants are preserved and prevent
11// invalid manipulations.
12//
13// - Align represents an alignment in bytes, it is always set and always a valid
14// power of two, its minimum value is 1 which means no alignment requirements.
15//
16// - MaybeAlign is an optional type, it may be undefined or set. When it's set
17// you can get the underlying Align type by using the getValue() method.
18//
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_SUPPORT_ALIGNMENT_H_
22#define LLVM_SUPPORT_ALIGNMENT_H_
23
24#include "llvm/ADT/Optional.h"
25#include "llvm/Support/MathExtras.h"
26#include <cassert>
27#ifndef NDEBUG1
28#include <string>
29#endif // NDEBUG
30
31namespace llvm {
32
33#define ALIGN_CHECK_ISPOSITIVE(decl) \
34 assert(decl > 0 && (#decl " should be defined"))((void)0)
35
36/// This struct is a compact representation of a valid (non-zero power of two)
37/// alignment.
38/// It is suitable for use as static global constants.
39struct Align {
40private:
41 uint8_t ShiftValue = 0; /// The log2 of the required alignment.
42 /// ShiftValue is less than 64 by construction.
43
44 friend struct MaybeAlign;
45 friend unsigned Log2(Align);
46 friend bool operator==(Align Lhs, Align Rhs);
47 friend bool operator!=(Align Lhs, Align Rhs);
48 friend bool operator<=(Align Lhs, Align Rhs);
49 friend bool operator>=(Align Lhs, Align Rhs);
50 friend bool operator<(Align Lhs, Align Rhs);
51 friend bool operator>(Align Lhs, Align Rhs);
52 friend unsigned encode(struct MaybeAlign A);
53 friend struct MaybeAlign decodeMaybeAlign(unsigned Value);
54
55 /// A trivial type to allow construction of constexpr Align.
56 /// This is currently needed to workaround a bug in GCC 5.3 which prevents
57 /// definition of constexpr assign operators.
58 /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic
59 /// FIXME: Remove this, make all assign operators constexpr and introduce user
60 /// defined literals when we don't have to support GCC 5.3 anymore.
61 /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain
62 struct LogValue {
63 uint8_t Log;
64 };
65
66public:
67 /// Default is byte-aligned.
68 constexpr Align() = default;
69 /// Do not perform checks in case of copy/move construct/assign, because the
70 /// checks have been performed when building `Other`.
71 constexpr Align(const Align &Other) = default;
72 constexpr Align(Align &&Other) = default;
73 Align &operator=(const Align &Other) = default;
74 Align &operator=(Align &&Other) = default;
75
76 explicit Align(uint64_t Value) {
77 assert(Value > 0 && "Value must not be 0")((void)0);
78 assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0);
79 ShiftValue = Log2_64(Value);
80 assert(ShiftValue < 64 && "Broken invariant")((void)0);
81 }
82
83 /// This is a hole in the type system and should not be abused.
84 /// Needed to interact with C for instance.
85 uint64_t value() const { return uint64_t(1) << ShiftValue; }
20
The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t'
86
87 /// Allow constructions of constexpr Align.
88 template <size_t kValue> constexpr static LogValue Constant() {
89 return LogValue{static_cast<uint8_t>(CTLog2<kValue>())};
90 }
91
92 /// Allow constructions of constexpr Align from types.
93 /// Compile time equivalent to Align(alignof(T)).
94 template <typename T> constexpr static LogValue Of() {
95 return Constant<std::alignment_of<T>::value>();
96 }
97
98 /// Constexpr constructor from LogValue type.
99 constexpr Align(LogValue CA) : ShiftValue(CA.Log) {}
100};
101
102/// Treats the value 0 as a 1, so Align is always at least 1.
103inline Align assumeAligned(uint64_t Value) {
104 return Value ? Align(Value) : Align();
105}
106
107/// This struct is a compact representation of a valid (power of two) or
108/// undefined (0) alignment.
109struct MaybeAlign : public llvm::Optional<Align> {
110private:
111 using UP = llvm::Optional<Align>;
112
113public:
114 /// Default is undefined.
115 MaybeAlign() = default;
116 /// Do not perform checks in case of copy/move construct/assign, because the
117 /// checks have been performed when building `Other`.
118 MaybeAlign(const MaybeAlign &Other) = default;
119 MaybeAlign &operator=(const MaybeAlign &Other) = default;
120 MaybeAlign(MaybeAlign &&Other) = default;
121 MaybeAlign &operator=(MaybeAlign &&Other) = default;
122
123 /// Use llvm::Optional<Align> constructor.
124 using UP::UP;
125
126 explicit MaybeAlign(uint64_t Value) {
127 assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0)
128 "Alignment is neither 0 nor a power of 2")((void)0);
129 if (Value)
130 emplace(Value);
131 }
132
133 /// For convenience, returns a valid alignment or 1 if undefined.
134 Align valueOrOne() const { return hasValue() ? getValue() : Align(); }
135};
136
137/// Checks that SizeInBytes is a multiple of the alignment.
138inline bool isAligned(Align Lhs, uint64_t SizeInBytes) {
139 return SizeInBytes % Lhs.value() == 0;
140}
141
142/// Checks that Addr is a multiple of the alignment.
143inline bool isAddrAligned(Align Lhs, const void *Addr) {
144 return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr));
145}
146
147/// Returns a multiple of A needed to store `Size` bytes.
148inline uint64_t alignTo(uint64_t Size, Align A) {
149 const uint64_t Value = A.value();
150 // The following line is equivalent to `(Size + Value - 1) / Value * Value`.
151
152 // The division followed by a multiplication can be thought of as a right
153 // shift followed by a left shift which zeros out the extra bits produced in
154 // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out
155 // are just zero.
156
157 // Most compilers can generate this code but the pattern may be missed when
158 // multiple functions gets inlined.
159 return (Size + Value - 1) & ~(Value - 1U);
160}
161
162/// If non-zero \p Skew is specified, the return value will be a minimal integer
163/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
164/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
165/// Skew mod \p A'.
166///
167/// Examples:
168/// \code
169/// alignTo(5, Align(8), 7) = 7
170/// alignTo(17, Align(8), 1) = 17
171/// alignTo(~0LL, Align(8), 3) = 3
172/// \endcode
173inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) {
174 const uint64_t Value = A.value();
175 Skew %= Value;
176 return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew;
177}
178
179/// Returns a multiple of A needed to store `Size` bytes.
180/// Returns `Size` if current alignment is undefined.
181inline uint64_t alignTo(uint64_t Size, MaybeAlign A) {
182 return A ? alignTo(Size, A.getValue()) : Size;
183}
184
185/// Aligns `Addr` to `Alignment` bytes, rounding up.
186inline uintptr_t alignAddr(const void *Addr, Align Alignment) {
187 uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr);
188 assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0)
189 ArithAddr &&((void)0)
190 "Overflow")((void)0);
191 return alignTo(ArithAddr, Alignment);
192}
193
194/// Returns the offset to the next integer (mod 2**64) that is greater than
195/// or equal to \p Value and is a multiple of \p Align.
196inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) {
197 return alignTo(Value, Alignment) - Value;
198}
199
200/// Returns the necessary adjustment for aligning `Addr` to `Alignment`
201/// bytes, rounding up.
202inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) {
203 return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment);
204}
205
206/// Returns the log2 of the alignment.
207inline unsigned Log2(Align A) { return A.ShiftValue; }
208
209/// Returns the alignment that satisfies both alignments.
210/// Same semantic as MinAlign.
211inline Align commonAlignment(Align A, Align B) { return std::min(A, B); }
212
213/// Returns the alignment that satisfies both alignments.
214/// Same semantic as MinAlign.
215inline Align commonAlignment(Align A, uint64_t Offset) {
216 return Align(MinAlign(A.value(), Offset));
217}
218
219/// Returns the alignment that satisfies both alignments.
220/// Same semantic as MinAlign.
221inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) {
222 return A && B ? commonAlignment(*A, *B) : A ? A : B;
223}
224
225/// Returns the alignment that satisfies both alignments.
226/// Same semantic as MinAlign.
227inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) {
228 return MaybeAlign(MinAlign((*A).value(), Offset));
229}
230
231/// Returns a representation of the alignment that encodes undefined as 0.
232inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; }
233
234/// Dual operation of the encode function above.
235inline MaybeAlign decodeMaybeAlign(unsigned Value) {
236 if (Value == 0)
237 return MaybeAlign();
238 Align Out;
239 Out.ShiftValue = Value - 1;
240 return Out;
241}
242
243/// Returns a representation of the alignment, the encoded value is positive by
244/// definition.
245inline unsigned encode(Align A) { return encode(MaybeAlign(A)); }
246
247/// Comparisons between Align and scalars. Rhs must be positive.
248inline bool operator==(Align Lhs, uint64_t Rhs) {
249 ALIGN_CHECK_ISPOSITIVE(Rhs);
250 return Lhs.value() == Rhs;
19
Calling 'Align::value'
251}
252inline bool operator!=(Align Lhs, uint64_t Rhs) {
253 ALIGN_CHECK_ISPOSITIVE(Rhs);
254 return Lhs.value() != Rhs;
255}
256inline bool operator<=(Align Lhs, uint64_t Rhs) {
257 ALIGN_CHECK_ISPOSITIVE(Rhs);
258 return Lhs.value() <= Rhs;
259}
260inline bool operator>=(Align Lhs, uint64_t Rhs) {
261 ALIGN_CHECK_ISPOSITIVE(Rhs);
262 return Lhs.value() >= Rhs;
263}
264inline bool operator<(Align Lhs, uint64_t Rhs) {
265 ALIGN_CHECK_ISPOSITIVE(Rhs);
266 return Lhs.value() < Rhs;
267}
268inline bool operator>(Align Lhs, uint64_t Rhs) {
269 ALIGN_CHECK_ISPOSITIVE(Rhs);
270 return Lhs.value() > Rhs;
271}
272
273/// Comparisons between MaybeAlign and scalars.
274inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) {
275 return Lhs ? (*Lhs).value() == Rhs : Rhs == 0;
276}
277inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) {
278 return Lhs ? (*Lhs).value() != Rhs : Rhs != 0;
279}
280
281/// Comparisons operators between Align.
282inline bool operator==(Align Lhs, Align Rhs) {
283 return Lhs.ShiftValue == Rhs.ShiftValue;
284}
285inline bool operator!=(Align Lhs, Align Rhs) {
286 return Lhs.ShiftValue != Rhs.ShiftValue;
287}
288inline bool operator<=(Align Lhs, Align Rhs) {
289 return Lhs.ShiftValue <= Rhs.ShiftValue;
290}
291inline bool operator>=(Align Lhs, Align Rhs) {
292 return Lhs.ShiftValue >= Rhs.ShiftValue;
293}
294inline bool operator<(Align Lhs, Align Rhs) {
295 return Lhs.ShiftValue < Rhs.ShiftValue;
296}
297inline bool operator>(Align Lhs, Align Rhs) {
298 return Lhs.ShiftValue > Rhs.ShiftValue;
299}
300
301// Don't allow relational comparisons with MaybeAlign.
302bool operator<=(Align Lhs, MaybeAlign Rhs) = delete;
303bool operator>=(Align Lhs, MaybeAlign Rhs) = delete;
304bool operator<(Align Lhs, MaybeAlign Rhs) = delete;
305bool operator>(Align Lhs, MaybeAlign Rhs) = delete;
306
307bool operator<=(MaybeAlign Lhs, Align Rhs) = delete;
308bool operator>=(MaybeAlign Lhs, Align Rhs) = delete;
309bool operator<(MaybeAlign Lhs, Align Rhs) = delete;
310bool operator>(MaybeAlign Lhs, Align Rhs) = delete;
311
312bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
313bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
314bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
315bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete;
316
317inline Align operator*(Align Lhs, uint64_t Rhs) {
318 assert(Rhs > 0 && "Rhs must be positive")((void)0);
319 return Align(Lhs.value() * Rhs);
320}
321
322inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) {
323 assert(Rhs > 0 && "Rhs must be positive")((void)0);
324 return Lhs ? Lhs.getValue() * Rhs : MaybeAlign();
325}
326
327inline Align operator/(Align Lhs, uint64_t Divisor) {
328 assert(llvm::isPowerOf2_64(Divisor) &&((void)0)
329 "Divisor must be positive and a power of 2")((void)0);
330 assert(Lhs != 1 && "Can't halve byte alignment")((void)0);
331 return Align(Lhs.value() / Divisor);
332}
333
334inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) {
335 assert(llvm::isPowerOf2_64(Divisor) &&((void)0)
336 "Divisor must be positive and a power of 2")((void)0);
337 return Lhs ? Lhs.getValue() / Divisor : MaybeAlign();
338}
339
340inline Align max(MaybeAlign Lhs, Align Rhs) {
341 return Lhs && *Lhs > Rhs ? *Lhs : Rhs;
342}
343
344inline Align max(Align Lhs, MaybeAlign Rhs) {
345 return Rhs && *Rhs > Lhs ? *Rhs : Lhs;
346}
347
348#ifndef NDEBUG1
349// For usage in LLVM_DEBUG macros.
350inline std::string DebugStr(const Align &A) {
351 return std::to_string(A.value());
352}
353// For usage in LLVM_DEBUG macros.
354inline std::string DebugStr(const MaybeAlign &MA) {
355 if (MA)
356 return std::to_string(MA->value());
357 return "None";
358}
359#endif // NDEBUG
360
361#undef ALIGN_CHECK_ISPOSITIVE
362
363} // namespace llvm
364
365#endif // LLVM_SUPPORT_ALIGNMENT_H_