clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86InterleavedAccess.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86InterleavedAccess.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | #include "X86ISelLowering.h" |
17 | #include "X86Subtarget.h" |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/Analysis/VectorUtils.h" |
21 | #include "llvm/IR/Constants.h" |
22 | #include "llvm/IR/DataLayout.h" |
23 | #include "llvm/IR/DerivedTypes.h" |
24 | #include "llvm/IR/IRBuilder.h" |
25 | #include "llvm/IR/Instruction.h" |
26 | #include "llvm/IR/Instructions.h" |
27 | #include "llvm/IR/Module.h" |
28 | #include "llvm/IR/Type.h" |
29 | #include "llvm/IR/Value.h" |
30 | #include "llvm/Support/Casting.h" |
31 | #include "llvm/Support/MachineValueType.h" |
32 | #include <algorithm> |
33 | #include <cassert> |
34 | #include <cmath> |
35 | #include <cstdint> |
36 | |
37 | using namespace llvm; |
38 | |
39 | namespace { |
40 | |
41 | |
42 | |
43 | |
44 | |
45 | |
46 | |
47 | |
48 | |
49 | class X86InterleavedAccessGroup { |
50 | |
51 | |
52 | Instruction *const Inst; |
53 | |
54 | |
55 | ArrayRef<ShuffleVectorInst *> Shuffles; |
56 | |
57 | |
58 | ArrayRef<unsigned> Indices; |
59 | |
60 | |
61 | const unsigned Factor; |
62 | |
63 | |
64 | const X86Subtarget &Subtarget; |
65 | |
66 | const DataLayout &DL; |
67 | |
68 | IRBuilder<> &Builder; |
69 | |
70 | |
71 | |
72 | void decompose(Instruction *Inst, unsigned NumSubVectors, FixedVectorType *T, |
73 | SmallVectorImpl<Instruction *> &DecomposedVectors); |
74 | |
75 | |
76 | |
77 | |
78 | |
79 | |
80 | |
81 | |
82 | |
83 | |
84 | |
85 | |
86 | |
87 | |
88 | void transpose_4x4(ArrayRef<Instruction *> InputVectors, |
89 | SmallVectorImpl<Value *> &TransposedMatrix); |
90 | void interleave8bitStride4(ArrayRef<Instruction *> InputVectors, |
91 | SmallVectorImpl<Value *> &TransposedMatrix, |
92 | unsigned NumSubVecElems); |
93 | void interleave8bitStride4VF8(ArrayRef<Instruction *> InputVectors, |
94 | SmallVectorImpl<Value *> &TransposedMatrix); |
95 | void interleave8bitStride3(ArrayRef<Instruction *> InputVectors, |
96 | SmallVectorImpl<Value *> &TransposedMatrix, |
97 | unsigned NumSubVecElems); |
98 | void deinterleave8bitStride3(ArrayRef<Instruction *> InputVectors, |
99 | SmallVectorImpl<Value *> &TransposedMatrix, |
100 | unsigned NumSubVecElems); |
101 | |
102 | public: |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | |
109 | explicit X86InterleavedAccessGroup(Instruction *I, |
110 | ArrayRef<ShuffleVectorInst *> Shuffs, |
111 | ArrayRef<unsigned> Ind, const unsigned F, |
112 | const X86Subtarget &STarget, |
113 | IRBuilder<> &B) |
114 | : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget), |
115 | DL(Inst->getModule()->getDataLayout()), Builder(B) {} |
116 | |
117 | |
118 | |
119 | bool isSupported() const; |
120 | |
121 | |
122 | |
123 | bool lowerIntoOptimizedSequence(); |
124 | }; |
125 | |
126 | } |
127 | |
128 | bool X86InterleavedAccessGroup::isSupported() const { |
129 | VectorType *ShuffleVecTy = Shuffles[0]->getType(); |
130 | Type *ShuffleEltTy = ShuffleVecTy->getElementType(); |
131 | unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy); |
132 | unsigned WideInstSize; |
133 | |
134 | |
135 | |
136 | |
137 | |
138 | |
139 | |
140 | if (!Subtarget.hasAVX() || (Factor != 4 && Factor != 3)) |
141 | return false; |
142 | |
143 | if (isa<LoadInst>(Inst)) { |
144 | WideInstSize = DL.getTypeSizeInBits(Inst->getType()); |
145 | if (cast<LoadInst>(Inst)->getPointerAddressSpace()) |
146 | return false; |
147 | } else |
148 | WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType()); |
149 | |
150 | |
151 | |
152 | if (ShuffleElemSize == 64 && WideInstSize == 1024 && Factor == 4) |
153 | return true; |
154 | |
155 | if (ShuffleElemSize == 8 && isa<StoreInst>(Inst) && Factor == 4 && |
156 | (WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024 || |
157 | WideInstSize == 2048)) |
158 | return true; |
159 | |
160 | if (ShuffleElemSize == 8 && Factor == 3 && |
161 | (WideInstSize == 384 || WideInstSize == 768 || WideInstSize == 1536)) |
162 | return true; |
163 | |
164 | return false; |
165 | } |
166 | |
167 | void X86InterleavedAccessGroup::decompose( |
168 | Instruction *VecInst, unsigned NumSubVectors, FixedVectorType *SubVecTy, |
169 | SmallVectorImpl<Instruction *> &DecomposedVectors) { |
170 | assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) && |
171 | "Expected Load or Shuffle"); |
172 | |
173 | Type *VecWidth = VecInst->getType(); |
174 | (void)VecWidth; |
175 | assert(VecWidth->isVectorTy() && |
176 | DL.getTypeSizeInBits(VecWidth) >= |
177 | DL.getTypeSizeInBits(SubVecTy) * NumSubVectors && |
178 | "Invalid Inst-size!!!"); |
179 | |
180 | if (auto *SVI = dyn_cast<ShuffleVectorInst>(VecInst)) { |
| 14 | | Assuming 'VecInst' is not a 'ShuffleVectorInst' | |
|
| |
181 | Value *Op0 = SVI->getOperand(0); |
182 | Value *Op1 = SVI->getOperand(1); |
183 | |
184 | |
185 | for (unsigned i = 0; i < NumSubVectors; ++i) |
186 | DecomposedVectors.push_back( |
187 | cast<ShuffleVectorInst>(Builder.CreateShuffleVector( |
188 | Op0, Op1, |
189 | createSequentialMask(Indices[i], SubVecTy->getNumElements(), |
190 | 0)))); |
191 | return; |
192 | } |
193 | |
194 | |
195 | LoadInst *LI = cast<LoadInst>(VecInst); |
| 16 | | 'VecInst' is a 'LoadInst' | |
|
196 | Type *VecBaseTy, *VecBasePtrTy; |
197 | Value *VecBasePtr; |
198 | unsigned int NumLoads = NumSubVectors; |
199 | |
200 | |
201 | |
202 | unsigned VecLength = DL.getTypeSizeInBits(VecWidth); |
203 | if (VecLength == 768 || VecLength == 1536) { |
| 17 | | Assuming 'VecLength' is not equal to 768 | |
|
| 18 | | Assuming 'VecLength' is not equal to 1536 | |
|
| |
204 | VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16); |
205 | VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace()); |
206 | VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); |
207 | NumLoads = NumSubVectors * (VecLength / 384); |
208 | } else { |
209 | VecBaseTy = SubVecTy; |
210 | VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace()); |
211 | VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); |
212 | } |
213 | |
214 | assert(VecBaseTy->getPrimitiveSizeInBits().isKnownMultipleOf(8) && |
215 | "VecBaseTy's size must be a multiple of 8"); |
216 | const Align FirstAlignment = LI->getAlign(); |
217 | const Align SubsequentAlignment = commonAlignment( |
| 21 | | Calling 'commonAlignment' | |
|
218 | FirstAlignment, VecBaseTy->getPrimitiveSizeInBits().getFixedSize() / 8); |
| 20 | | The value 255 is assigned to 'A.ShiftValue' | |
|
219 | Align Alignment = FirstAlignment; |
220 | for (unsigned i = 0; i < NumLoads; i++) { |
221 | |
222 | Value *NewBasePtr = |
223 | Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i)); |
224 | Instruction *NewLoad = |
225 | Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, Alignment); |
226 | DecomposedVectors.push_back(NewLoad); |
227 | Alignment = SubsequentAlignment; |
228 | } |
229 | } |
230 | |
231 | |
232 | |
233 | static MVT scaleVectorType(MVT VT) { |
234 | unsigned ScalarSize = VT.getVectorElementType().getScalarSizeInBits() * 2; |
235 | return MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), |
236 | VT.getVectorNumElements() / 2); |
237 | } |
238 | |
239 | static constexpr int Concat[] = { |
240 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
241 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
242 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, |
243 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; |
244 | |
245 | |
246 | |
247 | |
248 | |
249 | |
250 | |
251 | |
252 | |
253 | |
254 | |
255 | |
256 | |
257 | |
258 | |
259 | |
260 | |
261 | static void genShuffleBland(MVT VT, ArrayRef<int> Mask, |
262 | SmallVectorImpl<int> &Out, int LowOffset, |
263 | int HighOffset) { |
264 | assert(VT.getSizeInBits() >= 256 && |
265 | "This function doesn't accept width smaller then 256"); |
266 | unsigned NumOfElm = VT.getVectorNumElements(); |
267 | for (unsigned i = 0; i < Mask.size(); i++) |
268 | Out.push_back(Mask[i] + LowOffset); |
269 | for (unsigned i = 0; i < Mask.size(); i++) |
270 | Out.push_back(Mask[i] + HighOffset + NumOfElm); |
271 | } |
272 | |
273 | |
274 | |
275 | |
276 | |
277 | |
278 | |
279 | |
280 | |
281 | |
282 | |
283 | |
284 | |
285 | |
286 | |
287 | |
288 | |
289 | |
290 | |
291 | static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix, |
292 | ArrayRef<Value *> Vec, ArrayRef<int> VPShuf, |
293 | unsigned VecElems, unsigned Stride, |
294 | IRBuilder<> &Builder) { |
295 | |
296 | if (VecElems == 16) { |
297 | for (unsigned i = 0; i < Stride; i++) |
298 | TransposedMatrix[i] = Builder.CreateShuffleVector(Vec[i], VPShuf); |
299 | return; |
300 | } |
301 | |
302 | SmallVector<int, 32> OptimizeShuf; |
303 | Value *Temp[8]; |
304 | |
305 | for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) { |
306 | genShuffleBland(VT, VPShuf, OptimizeShuf, (i / Stride) * 16, |
307 | (i + 1) / Stride * 16); |
308 | Temp[i / 2] = Builder.CreateShuffleVector( |
309 | Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf); |
310 | OptimizeShuf.clear(); |
311 | } |
312 | |
313 | if (VecElems == 32) { |
314 | std::copy(Temp, Temp + Stride, TransposedMatrix.begin()); |
315 | return; |
316 | } else |
317 | for (unsigned i = 0; i < Stride; i++) |
318 | TransposedMatrix[i] = |
319 | Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat); |
320 | } |
321 | |
322 | void X86InterleavedAccessGroup::interleave8bitStride4VF8( |
323 | ArrayRef<Instruction *> Matrix, |
324 | SmallVectorImpl<Value *> &TransposedMatrix) { |
325 | |
326 | |
327 | |
328 | |
329 | |
330 | |
331 | MVT VT = MVT::v8i16; |
332 | TransposedMatrix.resize(2); |
333 | SmallVector<int, 16> MaskLow; |
334 | SmallVector<int, 32> MaskLowTemp1, MaskLowWord; |
335 | SmallVector<int, 32> MaskHighTemp1, MaskHighWord; |
336 | |
337 | for (unsigned i = 0; i < 8; ++i) { |
338 | MaskLow.push_back(i); |
339 | MaskLow.push_back(i + 8); |
340 | } |
341 | |
342 | createUnpackShuffleMask(VT, MaskLowTemp1, true, false); |
343 | createUnpackShuffleMask(VT, MaskHighTemp1, false, false); |
344 | narrowShuffleMaskElts(2, MaskHighTemp1, MaskHighWord); |
345 | narrowShuffleMaskElts(2, MaskLowTemp1, MaskLowWord); |
346 | |
347 | |
348 | Value *IntrVec1Low = |
349 | Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow); |
350 | Value *IntrVec2Low = |
351 | Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow); |
352 | |
353 | |
354 | |
355 | |
356 | TransposedMatrix[0] = |
357 | Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskLowWord); |
358 | TransposedMatrix[1] = |
359 | Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskHighWord); |
360 | } |
361 | |
362 | void X86InterleavedAccessGroup::interleave8bitStride4( |
363 | ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix, |
364 | unsigned NumOfElm) { |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | MVT VT = MVT::getVectorVT(MVT::i8, NumOfElm); |
372 | MVT HalfVT = scaleVectorType(VT); |
373 | |
374 | TransposedMatrix.resize(4); |
375 | SmallVector<int, 32> MaskHigh; |
376 | SmallVector<int, 32> MaskLow; |
377 | SmallVector<int, 32> LowHighMask[2]; |
378 | SmallVector<int, 32> MaskHighTemp; |
379 | SmallVector<int, 32> MaskLowTemp; |
380 | |
381 | |
382 | |
383 | |
384 | createUnpackShuffleMask(VT, MaskLow, true, false); |
385 | createUnpackShuffleMask(VT, MaskHigh, false, false); |
386 | |
387 | |
388 | |
389 | |
390 | createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); |
391 | createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); |
392 | narrowShuffleMaskElts(2, MaskLowTemp, LowHighMask[0]); |
393 | narrowShuffleMaskElts(2, MaskHighTemp, LowHighMask[1]); |
394 | |
395 | |
396 | |
397 | |
398 | |
399 | Value *IntrVec[4]; |
400 | |
401 | IntrVec[0] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow); |
402 | IntrVec[1] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskHigh); |
403 | IntrVec[2] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow); |
404 | IntrVec[3] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskHigh); |
405 | |
406 | |
407 | |
408 | |
409 | |
410 | |
411 | Value *VecOut[4]; |
412 | for (int i = 0; i < 4; i++) |
413 | VecOut[i] = Builder.CreateShuffleVector(IntrVec[i / 2], IntrVec[i / 2 + 2], |
414 | LowHighMask[i % 2]); |
415 | |
416 | |
417 | |
418 | |
419 | |
420 | |
421 | if (VT == MVT::v16i8) { |
422 | std::copy(VecOut, VecOut + 4, TransposedMatrix.begin()); |
423 | return; |
424 | } |
425 | |
426 | reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16), |
427 | NumOfElm, 4, Builder); |
428 | } |
429 | |
430 | |
431 | |
432 | |
433 | |
434 | |
435 | |
436 | |
437 | |
438 | |
439 | |
440 | static void createShuffleStride(MVT VT, int Stride, |
441 | SmallVectorImpl<int> &Mask) { |
442 | int VectorSize = VT.getSizeInBits(); |
443 | int VF = VT.getVectorNumElements(); |
444 | int LaneCount = std::max(VectorSize / 128, 1); |
445 | for (int Lane = 0; Lane < LaneCount; Lane++) |
446 | for (int i = 0, LaneSize = VF / LaneCount; i != LaneSize; ++i) |
447 | Mask.push_back((i * Stride) % LaneSize + LaneSize * Lane); |
448 | } |
449 | |
450 | |
451 | |
452 | |
453 | |
454 | static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) { |
455 | int VectorSize = VT.getSizeInBits(); |
456 | int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1); |
457 | for (int i = 0, FirstGroupElement = 0; i < 3; i++) { |
458 | int GroupSize = std::ceil((VF - FirstGroupElement) / 3.0); |
459 | SizeInfo.push_back(GroupSize); |
460 | FirstGroupElement = ((GroupSize)*3 + FirstGroupElement) % VF; |
461 | } |
462 | } |
463 | |
464 | |
465 | |
466 | |
467 | |
468 | |
469 | |
470 | |
471 | |
472 | |
473 | |
474 | |
475 | |
476 | |
477 | static void DecodePALIGNRMask(MVT VT, unsigned Imm, |
478 | SmallVectorImpl<int> &ShuffleMask, |
479 | bool AlignDirection = true, bool Unary = false) { |
480 | unsigned NumElts = VT.getVectorNumElements(); |
481 | unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); |
482 | unsigned NumLaneElts = NumElts / NumLanes; |
483 | |
484 | Imm = AlignDirection ? Imm : (NumLaneElts - Imm); |
485 | unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8); |
486 | |
487 | for (unsigned l = 0; l != NumElts; l += NumLaneElts) { |
488 | for (unsigned i = 0; i != NumLaneElts; ++i) { |
489 | unsigned Base = i + Offset; |
490 | |
491 | |
492 | if (Base >= NumLaneElts) |
493 | Base = Unary ? Base % NumLaneElts : Base + NumElts - NumLaneElts; |
494 | ShuffleMask.push_back(Base + l); |
495 | } |
496 | } |
497 | } |
498 | |
499 | |
500 | |
501 | |
502 | |
503 | |
504 | |
505 | |
506 | |
507 | |
508 | |
509 | |
510 | |
511 | |
512 | |
513 | |
514 | |
515 | |
516 | |
517 | |
518 | |
519 | |
520 | |
521 | |
522 | |
523 | |
524 | |
525 | |
526 | static void concatSubVector(Value **Vec, ArrayRef<Instruction *> InVec, |
527 | unsigned VecElems, IRBuilder<> &Builder) { |
528 | if (VecElems == 16) { |
529 | for (int i = 0; i < 3; i++) |
530 | Vec[i] = InVec[i]; |
531 | return; |
532 | } |
533 | |
534 | for (unsigned j = 0; j < VecElems / 32; j++) |
535 | for (int i = 0; i < 3; i++) |
536 | Vec[i + j * 3] = Builder.CreateShuffleVector( |
537 | InVec[j * 6 + i], InVec[j * 6 + i + 3], makeArrayRef(Concat, 32)); |
538 | |
539 | if (VecElems == 32) |
540 | return; |
541 | |
542 | for (int i = 0; i < 3; i++) |
543 | Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], Concat); |
544 | } |
545 | |
546 | void X86InterleavedAccessGroup::deinterleave8bitStride3( |
547 | ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix, |
548 | unsigned VecElems) { |
549 | |
550 | |
551 | |
552 | |
553 | |
554 | TransposedMatrix.resize(3); |
555 | SmallVector<int, 32> VPShuf; |
556 | SmallVector<int, 32> VPAlign[2]; |
557 | SmallVector<int, 32> VPAlign2; |
558 | SmallVector<int, 32> VPAlign3; |
559 | SmallVector<int, 3> GroupSize; |
560 | Value *Vec[6], *TempVector[3]; |
561 | |
562 | MVT VT = MVT::getVT(Shuffles[0]->getType()); |
563 | |
564 | createShuffleStride(VT, 3, VPShuf); |
565 | setGroupSize(VT, GroupSize); |
566 | |
567 | for (int i = 0; i < 2; i++) |
568 | DecodePALIGNRMask(VT, GroupSize[2 - i], VPAlign[i], false); |
569 | |
570 | DecodePALIGNRMask(VT, GroupSize[2] + GroupSize[1], VPAlign2, true, true); |
571 | DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, true, true); |
572 | |
573 | concatSubVector(Vec, InVec, VecElems, Builder); |
574 | |
575 | |
576 | |
577 | |
578 | for (int i = 0; i < 3; i++) |
579 | Vec[i] = Builder.CreateShuffleVector(Vec[i], VPShuf); |
580 | |
581 | |
582 | |
583 | |
584 | |
585 | for (int i = 0; i < 3; i++) |
586 | TempVector[i] = |
587 | Builder.CreateShuffleVector(Vec[(i + 2) % 3], Vec[i], VPAlign[0]); |
588 | |
589 | |
590 | |
591 | |
592 | |
593 | for (int i = 0; i < 3; i++) |
594 | Vec[i] = Builder.CreateShuffleVector(TempVector[(i + 1) % 3], TempVector[i], |
595 | VPAlign[1]); |
596 | |
597 | |
598 | |
599 | |
600 | |
601 | Value *TempVec = Builder.CreateShuffleVector(Vec[1], VPAlign3); |
602 | TransposedMatrix[0] = Builder.CreateShuffleVector(Vec[0], VPAlign2); |
603 | TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec; |
604 | TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2]; |
605 | } |
606 | |
607 | |
608 | |
609 | |
610 | static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask, |
611 | SmallVectorImpl<int> &Output) { |
612 | int IndexGroup[3] = {0, 0, 0}; |
613 | int Index = 0; |
614 | int VectorWidth = VT.getSizeInBits(); |
615 | int VF = VT.getVectorNumElements(); |
616 | |
617 | int Lane = (VectorWidth / 128 > 0) ? VectorWidth / 128 : 1; |
618 | for (int i = 0; i < 3; i++) { |
619 | IndexGroup[(Index * 3) % (VF / Lane)] = Index; |
620 | Index += Mask[i]; |
621 | } |
622 | |
623 | for (int i = 0; i < VF / Lane; i++) { |
624 | Output.push_back(IndexGroup[i % 3]); |
625 | IndexGroup[i % 3]++; |
626 | } |
627 | } |
628 | |
629 | void X86InterleavedAccessGroup::interleave8bitStride3( |
630 | ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix, |
631 | unsigned VecElems) { |
632 | |
633 | |
634 | |
635 | |
636 | |
637 | TransposedMatrix.resize(3); |
638 | SmallVector<int, 3> GroupSize; |
639 | SmallVector<int, 32> VPShuf; |
640 | SmallVector<int, 32> VPAlign[3]; |
641 | SmallVector<int, 32> VPAlign2; |
642 | SmallVector<int, 32> VPAlign3; |
643 | |
644 | Value *Vec[3], *TempVector[3]; |
645 | MVT VT = MVT::getVectorVT(MVT::i8, VecElems); |
646 | |
647 | setGroupSize(VT, GroupSize); |
648 | |
649 | for (int i = 0; i < 3; i++) |
650 | DecodePALIGNRMask(VT, GroupSize[i], VPAlign[i]); |
651 | |
652 | DecodePALIGNRMask(VT, GroupSize[1] + GroupSize[2], VPAlign2, false, true); |
653 | DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, false, true); |
654 | |
655 | |
656 | |
657 | |
658 | |
659 | Vec[0] = Builder.CreateShuffleVector(InVec[0], VPAlign2); |
660 | Vec[1] = Builder.CreateShuffleVector(InVec[1], VPAlign3); |
661 | Vec[2] = InVec[2]; |
662 | |
663 | |
664 | |
665 | |
666 | |
667 | for (int i = 0; i < 3; i++) |
668 | TempVector[i] = |
669 | Builder.CreateShuffleVector(Vec[i], Vec[(i + 2) % 3], VPAlign[1]); |
670 | |
671 | |
672 | |
673 | |
674 | |
675 | for (int i = 0; i < 3; i++) |
676 | Vec[i] = Builder.CreateShuffleVector(TempVector[i], TempVector[(i + 1) % 3], |
677 | VPAlign[2]); |
678 | |
679 | |
680 | |
681 | |
682 | |
683 | unsigned NumOfElm = VT.getVectorNumElements(); |
684 | group2Shuffle(VT, GroupSize, VPShuf); |
685 | reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm, 3, Builder); |
686 | } |
687 | |
688 | void X86InterleavedAccessGroup::transpose_4x4( |
689 | ArrayRef<Instruction *> Matrix, |
690 | SmallVectorImpl<Value *> &TransposedMatrix) { |
691 | assert(Matrix.size() == 4 && "Invalid matrix size"); |
692 | TransposedMatrix.resize(4); |
693 | |
694 | |
695 | static constexpr int IntMask1[] = {0, 1, 4, 5}; |
696 | ArrayRef<int> Mask = makeArrayRef(IntMask1, 4); |
697 | Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask); |
698 | Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask); |
699 | |
700 | |
701 | static constexpr int IntMask2[] = {2, 3, 6, 7}; |
702 | Mask = makeArrayRef(IntMask2, 4); |
703 | Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask); |
704 | Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask); |
705 | |
706 | |
707 | static constexpr int IntMask3[] = {0, 4, 2, 6}; |
708 | Mask = makeArrayRef(IntMask3, 4); |
709 | TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask); |
710 | TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask); |
711 | |
712 | |
713 | static constexpr int IntMask4[] = {1, 5, 3, 7}; |
714 | Mask = makeArrayRef(IntMask4, 4); |
715 | TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask); |
716 | TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask); |
717 | } |
718 | |
719 | |
720 | |
721 | bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { |
722 | SmallVector<Instruction *, 4> DecomposedVectors; |
723 | SmallVector<Value *, 4> TransposedVectors; |
724 | auto *ShuffleTy = cast<FixedVectorType>(Shuffles[0]->getType()); |
| 10 | | The object is a 'FixedVectorType' | |
|
725 | |
726 | if (isa<LoadInst>(Inst)) { |
| 11 | | Assuming field 'Inst' is not a 'LoadInst' | |
|
| |
727 | auto *ShuffleEltTy = cast<FixedVectorType>(Inst->getType()); |
728 | unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor; |
729 | switch (NumSubVecElems) { |
730 | default: |
731 | return false; |
732 | case 4: |
733 | case 8: |
734 | case 16: |
735 | case 32: |
736 | case 64: |
737 | if (ShuffleTy->getNumElements() != NumSubVecElems) |
738 | return false; |
739 | break; |
740 | } |
741 | |
742 | |
743 | decompose(Inst, Factor, ShuffleTy, DecomposedVectors); |
744 | |
745 | |
746 | |
747 | |
748 | |
749 | if (NumSubVecElems == 4) |
750 | transpose_4x4(DecomposedVectors, TransposedVectors); |
751 | else |
752 | deinterleave8bitStride3(DecomposedVectors, TransposedVectors, |
753 | NumSubVecElems); |
754 | |
755 | |
756 | |
757 | for (unsigned i = 0, e = Shuffles.size(); i < e; ++i) |
758 | Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); |
759 | |
760 | return true; |
761 | } |
762 | |
763 | Type *ShuffleEltTy = ShuffleTy->getElementType(); |
764 | unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor; |
765 | |
766 | |
767 | |
768 | |
769 | decompose(Shuffles[0], Factor, |
| 13 | | Calling 'X86InterleavedAccessGroup::decompose' | |
|
770 | FixedVectorType::get(ShuffleEltTy, NumSubVecElems), |
771 | DecomposedVectors); |
772 | |
773 | |
774 | |
775 | switch (NumSubVecElems) { |
776 | case 4: |
777 | transpose_4x4(DecomposedVectors, TransposedVectors); |
778 | break; |
779 | case 8: |
780 | interleave8bitStride4VF8(DecomposedVectors, TransposedVectors); |
781 | break; |
782 | case 16: |
783 | case 32: |
784 | case 64: |
785 | if (Factor == 4) |
786 | interleave8bitStride4(DecomposedVectors, TransposedVectors, |
787 | NumSubVecElems); |
788 | if (Factor == 3) |
789 | interleave8bitStride3(DecomposedVectors, TransposedVectors, |
790 | NumSubVecElems); |
791 | break; |
792 | default: |
793 | return false; |
794 | } |
795 | |
796 | |
797 | Value *WideVec = concatenateVectors(Builder, TransposedVectors); |
798 | |
799 | |
800 | StoreInst *SI = cast<StoreInst>(Inst); |
801 | Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), SI->getAlign()); |
802 | |
803 | return true; |
804 | } |
805 | |
806 | |
807 | |
808 | |
809 | |
810 | bool X86TargetLowering::lowerInterleavedLoad( |
811 | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
812 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
813 | assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && |
814 | "Invalid interleave factor"); |
815 | assert(!Shuffles.empty() && "Empty shufflevector input"); |
816 | assert(Shuffles.size() == Indices.size() && |
817 | "Unmatched number of shufflevectors and indices"); |
818 | |
819 | |
820 | IRBuilder<> Builder(LI); |
821 | X86InterleavedAccessGroup Grp(LI, Shuffles, Indices, Factor, Subtarget, |
822 | Builder); |
823 | |
824 | return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); |
825 | } |
826 | |
827 | bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI, |
828 | ShuffleVectorInst *SVI, |
829 | unsigned Factor) const { |
830 | assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && |
831 | "Invalid interleave factor"); |
832 | |
833 | assert(cast<FixedVectorType>(SVI->getType())->getNumElements() % Factor == |
834 | 0 && |
835 | "Invalid interleaved store"); |
836 | |
837 | |
838 | |
839 | SmallVector<unsigned, 4> Indices; |
840 | auto Mask = SVI->getShuffleMask(); |
841 | for (unsigned i = 0; i < Factor; i++) |
| 1 | Assuming 'i' is < 'Factor' | |
|
| 2 | | Loop condition is true. Entering loop body | |
|
| 3 | | Assuming 'i' is < 'Factor' | |
|
| 4 | | Loop condition is true. Entering loop body | |
|
| 5 | | Assuming 'i' is < 'Factor' | |
|
| 6 | | Loop condition is true. Entering loop body | |
|
| 7 | | Assuming 'i' is >= 'Factor' | |
|
| 8 | | Loop condition is false. Execution continues on line 844 | |
|
842 | Indices.push_back(Mask[i]); |
843 | |
844 | ArrayRef<ShuffleVectorInst *> Shuffles = makeArrayRef(SVI); |
845 | |
846 | |
847 | IRBuilder<> Builder(SI); |
848 | X86InterleavedAccessGroup Grp(SI, Shuffles, Indices, Factor, Subtarget, |
849 | Builder); |
850 | |
851 | return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); |
| 9 | | Calling 'X86InterleavedAccessGroup::lowerIntoOptimizedSequence' | |
|
852 | } |
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ |
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ |
23 | |
24 | #include "llvm/ADT/Optional.h" |
25 | #include "llvm/Support/MathExtras.h" |
26 | #include <cassert> |
27 | #ifndef NDEBUG |
28 | #include <string> |
29 | #endif // NDEBUG |
30 | |
31 | namespace llvm { |
32 | |
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ |
34 | assert(decl > 0 && (#decl " should be defined")) |
35 | |
36 | |
37 | |
38 | |
39 | struct Align { |
40 | private: |
41 | uint8_t ShiftValue = 0; |
42 | |
43 | |
44 | friend struct MaybeAlign; |
45 | friend unsigned Log2(Align); |
46 | friend bool operator==(Align Lhs, Align Rhs); |
47 | friend bool operator!=(Align Lhs, Align Rhs); |
48 | friend bool operator<=(Align Lhs, Align Rhs); |
49 | friend bool operator>=(Align Lhs, Align Rhs); |
50 | friend bool operator<(Align Lhs, Align Rhs); |
51 | friend bool operator>(Align Lhs, Align Rhs); |
52 | friend unsigned encode(struct MaybeAlign A); |
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 | |
62 | struct LogValue { |
63 | uint8_t Log; |
64 | }; |
65 | |
66 | public: |
67 | |
68 | constexpr Align() = default; |
69 | |
70 | |
71 | constexpr Align(const Align &Other) = default; |
72 | constexpr Align(Align &&Other) = default; |
73 | Align &operator=(const Align &Other) = default; |
74 | Align &operator=(Align &&Other) = default; |
75 | |
76 | explicit Align(uint64_t Value) { |
77 | assert(Value > 0 && "Value must not be 0"); |
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2"); |
79 | ShiftValue = Log2_64(Value); |
80 | assert(ShiftValue < 64 && "Broken invariant"); |
81 | } |
82 | |
83 | |
84 | |
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } |
| 23 | | The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
|
86 | |
87 | |
88 | template <size_t kValue> constexpr static LogValue Constant() { |
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; |
90 | } |
91 | |
92 | |
93 | |
94 | template <typename T> constexpr static LogValue Of() { |
95 | return Constant<std::alignment_of<T>::value>(); |
96 | } |
97 | |
98 | |
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} |
100 | }; |
101 | |
102 | |
103 | inline Align assumeAligned(uint64_t Value) { |
104 | return Value ? Align(Value) : Align(); |
105 | } |
106 | |
107 | |
108 | |
109 | struct MaybeAlign : public llvm::Optional<Align> { |
110 | private: |
111 | using UP = llvm::Optional<Align>; |
112 | |
113 | public: |
114 | |
115 | MaybeAlign() = default; |
116 | |
117 | |
118 | MaybeAlign(const MaybeAlign &Other) = default; |
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; |
120 | MaybeAlign(MaybeAlign &&Other) = default; |
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; |
122 | |
123 | |
124 | using UP::UP; |
125 | |
126 | explicit MaybeAlign(uint64_t Value) { |
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) && |
128 | "Alignment is neither 0 nor a power of 2"); |
129 | if (Value) |
130 | emplace(Value); |
131 | } |
132 | |
133 | |
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } |
135 | }; |
136 | |
137 | |
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { |
139 | return SizeInBytes % Lhs.value() == 0; |
140 | } |
141 | |
142 | |
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { |
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); |
145 | } |
146 | |
147 | |
148 | inline uint64_t alignTo(uint64_t Size, Align A) { |
149 | const uint64_t Value = A.value(); |
150 | |
151 | |
152 | |
153 | |
154 | |
155 | |
156 | |
157 | |
158 | |
159 | return (Size + Value - 1) & ~(Value - 1U); |
160 | } |
161 | |
162 | |
163 | |
164 | |
165 | |
166 | |
167 | |
168 | |
169 | |
170 | |
171 | |
172 | |
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { |
174 | const uint64_t Value = A.value(); |
175 | Skew %= Value; |
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; |
177 | } |
178 | |
179 | |
180 | |
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { |
182 | return A ? alignTo(Size, A.getValue()) : Size; |
183 | } |
184 | |
185 | |
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { |
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); |
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >= |
189 | ArithAddr && |
190 | "Overflow"); |
191 | return alignTo(ArithAddr, Alignment); |
192 | } |
193 | |
194 | |
195 | |
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { |
197 | return alignTo(Value, Alignment) - Value; |
198 | } |
199 | |
200 | |
201 | |
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { |
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); |
204 | } |
205 | |
206 | |
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } |
208 | |
209 | |
210 | |
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } |
212 | |
213 | |
214 | |
215 | inline Align commonAlignment(Align A, uint64_t Offset) { |
216 | return Align(MinAlign(A.value(), Offset)); |
| |
217 | } |
218 | |
219 | |
220 | |
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { |
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; |
223 | } |
224 | |
225 | |
226 | |
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { |
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); |
229 | } |
230 | |
231 | |
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } |
233 | |
234 | |
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { |
236 | if (Value == 0) |
237 | return MaybeAlign(); |
238 | Align Out; |
239 | Out.ShiftValue = Value - 1; |
240 | return Out; |
241 | } |
242 | |
243 | |
244 | |
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } |
246 | |
247 | |
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { |
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
250 | return Lhs.value() == Rhs; |
251 | } |
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { |
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
254 | return Lhs.value() != Rhs; |
255 | } |
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { |
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
258 | return Lhs.value() <= Rhs; |
259 | } |
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { |
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
262 | return Lhs.value() >= Rhs; |
263 | } |
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { |
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
266 | return Lhs.value() < Rhs; |
267 | } |
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { |
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); |
270 | return Lhs.value() > Rhs; |
271 | } |
272 | |
273 | |
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { |
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; |
276 | } |
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { |
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; |
279 | } |
280 | |
281 | |
282 | inline bool operator==(Align Lhs, Align Rhs) { |
283 | return Lhs.ShiftValue == Rhs.ShiftValue; |
284 | } |
285 | inline bool operator!=(Align Lhs, Align Rhs) { |
286 | return Lhs.ShiftValue != Rhs.ShiftValue; |
287 | } |
288 | inline bool operator<=(Align Lhs, Align Rhs) { |
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; |
290 | } |
291 | inline bool operator>=(Align Lhs, Align Rhs) { |
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; |
293 | } |
294 | inline bool operator<(Align Lhs, Align Rhs) { |
295 | return Lhs.ShiftValue < Rhs.ShiftValue; |
296 | } |
297 | inline bool operator>(Align Lhs, Align Rhs) { |
298 | return Lhs.ShiftValue > Rhs.ShiftValue; |
299 | } |
300 | |
301 | |
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; |
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; |
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; |
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; |
306 | |
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; |
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; |
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; |
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; |
311 | |
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; |
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; |
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; |
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; |
316 | |
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { |
318 | assert(Rhs > 0 && "Rhs must be positive"); |
319 | return Align(Lhs.value() * Rhs); |
320 | } |
321 | |
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { |
323 | assert(Rhs > 0 && "Rhs must be positive"); |
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); |
325 | } |
326 | |
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { |
328 | assert(llvm::isPowerOf2_64(Divisor) && |
329 | "Divisor must be positive and a power of 2"); |
330 | assert(Lhs != 1 && "Can't halve byte alignment"); |
331 | return Align(Lhs.value() / Divisor); |
332 | } |
333 | |
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { |
335 | assert(llvm::isPowerOf2_64(Divisor) && |
336 | "Divisor must be positive and a power of 2"); |
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); |
338 | } |
339 | |
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { |
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; |
342 | } |
343 | |
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { |
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; |
346 | } |
347 | |
348 | #ifndef NDEBUG |
349 | |
350 | inline std::string DebugStr(const Align &A) { |
351 | return std::to_string(A.value()); |
352 | } |
353 | |
354 | inline std::string DebugStr(const MaybeAlign &MA) { |
355 | if (MA) |
356 | return std::to_string(MA->value()); |
357 | return "None"; |
358 | } |
359 | #endif // NDEBUG |
360 | |
361 | #undef ALIGN_CHECK_ISPOSITIVE |
362 | |
363 | } |
364 | |
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |