File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp |
Warning: | line 212, column 36 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- InstCombineVectorOps.cpp -------------------------------------------===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // This file implements instcombine for ExtractElement, InsertElement and | ||||||||
10 | // ShuffleVector. | ||||||||
11 | // | ||||||||
12 | //===----------------------------------------------------------------------===// | ||||||||
13 | |||||||||
14 | #include "InstCombineInternal.h" | ||||||||
15 | #include "llvm/ADT/APInt.h" | ||||||||
16 | #include "llvm/ADT/ArrayRef.h" | ||||||||
17 | #include "llvm/ADT/DenseMap.h" | ||||||||
18 | #include "llvm/ADT/STLExtras.h" | ||||||||
19 | #include "llvm/ADT/SmallBitVector.h" | ||||||||
20 | #include "llvm/ADT/SmallVector.h" | ||||||||
21 | #include "llvm/ADT/Statistic.h" | ||||||||
22 | #include "llvm/Analysis/InstructionSimplify.h" | ||||||||
23 | #include "llvm/Analysis/VectorUtils.h" | ||||||||
24 | #include "llvm/IR/BasicBlock.h" | ||||||||
25 | #include "llvm/IR/Constant.h" | ||||||||
26 | #include "llvm/IR/Constants.h" | ||||||||
27 | #include "llvm/IR/DerivedTypes.h" | ||||||||
28 | #include "llvm/IR/InstrTypes.h" | ||||||||
29 | #include "llvm/IR/Instruction.h" | ||||||||
30 | #include "llvm/IR/Instructions.h" | ||||||||
31 | #include "llvm/IR/Operator.h" | ||||||||
32 | #include "llvm/IR/PatternMatch.h" | ||||||||
33 | #include "llvm/IR/Type.h" | ||||||||
34 | #include "llvm/IR/User.h" | ||||||||
35 | #include "llvm/IR/Value.h" | ||||||||
36 | #include "llvm/Support/Casting.h" | ||||||||
37 | #include "llvm/Support/ErrorHandling.h" | ||||||||
38 | #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" | ||||||||
39 | #include "llvm/Transforms/InstCombine/InstCombiner.h" | ||||||||
40 | #include <cassert> | ||||||||
41 | #include <cstdint> | ||||||||
42 | #include <iterator> | ||||||||
43 | #include <utility> | ||||||||
44 | |||||||||
45 | using namespace llvm; | ||||||||
46 | using namespace PatternMatch; | ||||||||
47 | |||||||||
48 | #define DEBUG_TYPE"instcombine" "instcombine" | ||||||||
49 | |||||||||
50 | STATISTIC(NumAggregateReconstructionsSimplified,static llvm::Statistic NumAggregateReconstructionsSimplified = {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the " "original aggregate"} | ||||||||
51 | "Number of aggregate reconstructions turned into reuse of the "static llvm::Statistic NumAggregateReconstructionsSimplified = {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the " "original aggregate"} | ||||||||
52 | "original aggregate")static llvm::Statistic NumAggregateReconstructionsSimplified = {"instcombine", "NumAggregateReconstructionsSimplified", "Number of aggregate reconstructions turned into reuse of the " "original aggregate"}; | ||||||||
53 | |||||||||
54 | /// Return true if the value is cheaper to scalarize than it is to leave as a | ||||||||
55 | /// vector operation. IsConstantExtractIndex indicates whether we are extracting | ||||||||
56 | /// one known element from a vector constant. | ||||||||
57 | /// | ||||||||
58 | /// FIXME: It's possible to create more instructions than previously existed. | ||||||||
59 | static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) { | ||||||||
60 | // If we can pick a scalar constant value out of a vector, that is free. | ||||||||
61 | if (auto *C = dyn_cast<Constant>(V)) | ||||||||
62 | return IsConstantExtractIndex || C->getSplatValue(); | ||||||||
63 | |||||||||
64 | // An insertelement to the same constant index as our extract will simplify | ||||||||
65 | // to the scalar inserted element. An insertelement to a different constant | ||||||||
66 | // index is irrelevant to our extract. | ||||||||
67 | if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt()))) | ||||||||
68 | return IsConstantExtractIndex; | ||||||||
69 | |||||||||
70 | if (match(V, m_OneUse(m_Load(m_Value())))) | ||||||||
71 | return true; | ||||||||
72 | |||||||||
73 | if (match(V, m_OneUse(m_UnOp()))) | ||||||||
74 | return true; | ||||||||
75 | |||||||||
76 | Value *V0, *V1; | ||||||||
77 | if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1))))) | ||||||||
78 | if (cheapToScalarize(V0, IsConstantExtractIndex) || | ||||||||
79 | cheapToScalarize(V1, IsConstantExtractIndex)) | ||||||||
80 | return true; | ||||||||
81 | |||||||||
82 | CmpInst::Predicate UnusedPred; | ||||||||
83 | if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1))))) | ||||||||
84 | if (cheapToScalarize(V0, IsConstantExtractIndex) || | ||||||||
85 | cheapToScalarize(V1, IsConstantExtractIndex)) | ||||||||
86 | return true; | ||||||||
87 | |||||||||
88 | return false; | ||||||||
89 | } | ||||||||
90 | |||||||||
91 | // If we have a PHI node with a vector type that is only used to feed | ||||||||
92 | // itself and be an operand of extractelement at a constant location, | ||||||||
93 | // try to replace the PHI of the vector type with a PHI of a scalar type. | ||||||||
94 | Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI, | ||||||||
95 | PHINode *PN) { | ||||||||
96 | SmallVector<Instruction *, 2> Extracts; | ||||||||
97 | // The users we want the PHI to have are: | ||||||||
98 | // 1) The EI ExtractElement (we already know this) | ||||||||
99 | // 2) Possibly more ExtractElements with the same index. | ||||||||
100 | // 3) Another operand, which will feed back into the PHI. | ||||||||
101 | Instruction *PHIUser = nullptr; | ||||||||
102 | for (auto U : PN->users()) { | ||||||||
103 | if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) { | ||||||||
104 | if (EI.getIndexOperand() == EU->getIndexOperand()) | ||||||||
105 | Extracts.push_back(EU); | ||||||||
106 | else | ||||||||
107 | return nullptr; | ||||||||
108 | } else if (!PHIUser) { | ||||||||
109 | PHIUser = cast<Instruction>(U); | ||||||||
110 | } else { | ||||||||
111 | return nullptr; | ||||||||
112 | } | ||||||||
113 | } | ||||||||
114 | |||||||||
115 | if (!PHIUser) | ||||||||
116 | return nullptr; | ||||||||
117 | |||||||||
118 | // Verify that this PHI user has one use, which is the PHI itself, | ||||||||
119 | // and that it is a binary operation which is cheap to scalarize. | ||||||||
120 | // otherwise return nullptr. | ||||||||
121 | if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || | ||||||||
122 | !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) | ||||||||
123 | return nullptr; | ||||||||
124 | |||||||||
125 | // Create a scalar PHI node that will replace the vector PHI node | ||||||||
126 | // just before the current PHI node. | ||||||||
127 | PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( | ||||||||
128 | PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); | ||||||||
129 | // Scalarize each PHI operand. | ||||||||
130 | for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { | ||||||||
131 | Value *PHIInVal = PN->getIncomingValue(i); | ||||||||
132 | BasicBlock *inBB = PN->getIncomingBlock(i); | ||||||||
133 | Value *Elt = EI.getIndexOperand(); | ||||||||
134 | // If the operand is the PHI induction variable: | ||||||||
135 | if (PHIInVal == PHIUser) { | ||||||||
136 | // Scalarize the binary operation. Its first operand is the | ||||||||
137 | // scalar PHI, and the second operand is extracted from the other | ||||||||
138 | // vector operand. | ||||||||
139 | BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); | ||||||||
140 | unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; | ||||||||
141 | Value *Op = InsertNewInstWith( | ||||||||
142 | ExtractElementInst::Create(B0->getOperand(opId), Elt, | ||||||||
143 | B0->getOperand(opId)->getName() + ".Elt"), | ||||||||
144 | *B0); | ||||||||
145 | Value *newPHIUser = InsertNewInstWith( | ||||||||
146 | BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(), | ||||||||
147 | scalarPHI, Op, B0), *B0); | ||||||||
148 | scalarPHI->addIncoming(newPHIUser, inBB); | ||||||||
149 | } else { | ||||||||
150 | // Scalarize PHI input: | ||||||||
151 | Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); | ||||||||
152 | // Insert the new instruction into the predecessor basic block. | ||||||||
153 | Instruction *pos = dyn_cast<Instruction>(PHIInVal); | ||||||||
154 | BasicBlock::iterator InsertPos; | ||||||||
155 | if (pos && !isa<PHINode>(pos)) { | ||||||||
156 | InsertPos = ++pos->getIterator(); | ||||||||
157 | } else { | ||||||||
158 | InsertPos = inBB->getFirstInsertionPt(); | ||||||||
159 | } | ||||||||
160 | |||||||||
161 | InsertNewInstWith(newEI, *InsertPos); | ||||||||
162 | |||||||||
163 | scalarPHI->addIncoming(newEI, inBB); | ||||||||
164 | } | ||||||||
165 | } | ||||||||
166 | |||||||||
167 | for (auto E : Extracts) | ||||||||
168 | replaceInstUsesWith(*E, scalarPHI); | ||||||||
169 | |||||||||
170 | return &EI; | ||||||||
171 | } | ||||||||
172 | |||||||||
173 | static Instruction *foldBitcastExtElt(ExtractElementInst &Ext, | ||||||||
174 | InstCombiner::BuilderTy &Builder, | ||||||||
175 | bool IsBigEndian) { | ||||||||
176 | Value *X; | ||||||||
177 | uint64_t ExtIndexC; | ||||||||
178 | if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) || | ||||||||
179 | !X->getType()->isVectorTy() || | ||||||||
180 | !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC))) | ||||||||
181 | return nullptr; | ||||||||
182 | |||||||||
183 | // If this extractelement is using a bitcast from a vector of the same number | ||||||||
184 | // of elements, see if we can find the source element from the source vector: | ||||||||
185 | // extelt (bitcast VecX), IndexC --> bitcast X[IndexC] | ||||||||
186 | auto *SrcTy = cast<VectorType>(X->getType()); | ||||||||
187 | Type *DestTy = Ext.getType(); | ||||||||
188 | ElementCount NumSrcElts = SrcTy->getElementCount(); | ||||||||
189 | ElementCount NumElts = | ||||||||
190 | cast<VectorType>(Ext.getVectorOperandType())->getElementCount(); | ||||||||
191 | if (NumSrcElts == NumElts) | ||||||||
192 | if (Value *Elt = findScalarElement(X, ExtIndexC)) | ||||||||
193 | return new BitCastInst(Elt, DestTy); | ||||||||
194 | |||||||||
195 | assert(NumSrcElts.isScalable() == NumElts.isScalable() &&((void)0) | ||||||||
196 | "Src and Dst must be the same sort of vector type")((void)0); | ||||||||
197 | |||||||||
198 | // If the source elements are wider than the destination, try to shift and | ||||||||
199 | // truncate a subset of scalar bits of an insert op. | ||||||||
200 | if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) { | ||||||||
201 | Value *Scalar; | ||||||||
202 | uint64_t InsIndexC; | ||||||||
203 | if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar), | ||||||||
204 | m_ConstantInt(InsIndexC)))) | ||||||||
205 | return nullptr; | ||||||||
206 | |||||||||
207 | // The extract must be from the subset of vector elements that we inserted | ||||||||
208 | // into. Example: if we inserted element 1 of a <2 x i64> and we are | ||||||||
209 | // extracting an i16 (narrowing ratio = 4), then this extract must be from 1 | ||||||||
210 | // of elements 4-7 of the bitcasted vector. | ||||||||
211 | unsigned NarrowingRatio = | ||||||||
212 | NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue(); | ||||||||
| |||||||||
213 | if (ExtIndexC / NarrowingRatio != InsIndexC) | ||||||||
214 | return nullptr; | ||||||||
215 | |||||||||
216 | // We are extracting part of the original scalar. How that scalar is | ||||||||
217 | // inserted into the vector depends on the endian-ness. Example: | ||||||||
218 | // Vector Byte Elt Index: 0 1 2 3 4 5 6 7 | ||||||||
219 | // +--+--+--+--+--+--+--+--+ | ||||||||
220 | // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3| | ||||||||
221 | // extelt <4 x i16> V', 3: | |S2|S3| | ||||||||
222 | // +--+--+--+--+--+--+--+--+ | ||||||||
223 | // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value. | ||||||||
224 | // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value. | ||||||||
225 | // In this example, we must right-shift little-endian. Big-endian is just a | ||||||||
226 | // truncate. | ||||||||
227 | unsigned Chunk = ExtIndexC % NarrowingRatio; | ||||||||
228 | if (IsBigEndian) | ||||||||
229 | Chunk = NarrowingRatio - 1 - Chunk; | ||||||||
230 | |||||||||
231 | // Bail out if this is an FP vector to FP vector sequence. That would take | ||||||||
232 | // more instructions than we started with unless there is no shift, and it | ||||||||
233 | // may not be handled as well in the backend. | ||||||||
234 | bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy(); | ||||||||
235 | bool NeedDestBitcast = DestTy->isFloatingPointTy(); | ||||||||
236 | if (NeedSrcBitcast && NeedDestBitcast) | ||||||||
237 | return nullptr; | ||||||||
238 | |||||||||
239 | unsigned SrcWidth = SrcTy->getScalarSizeInBits(); | ||||||||
240 | unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); | ||||||||
241 | unsigned ShAmt = Chunk * DestWidth; | ||||||||
242 | |||||||||
243 | // TODO: This limitation is more strict than necessary. We could sum the | ||||||||
244 | // number of new instructions and subtract the number eliminated to know if | ||||||||
245 | // we can proceed. | ||||||||
246 | if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse()) | ||||||||
247 | if (NeedSrcBitcast || NeedDestBitcast) | ||||||||
248 | return nullptr; | ||||||||
249 | |||||||||
250 | if (NeedSrcBitcast) { | ||||||||
251 | Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth); | ||||||||
252 | Scalar = Builder.CreateBitCast(Scalar, SrcIntTy); | ||||||||
253 | } | ||||||||
254 | |||||||||
255 | if (ShAmt) { | ||||||||
256 | // Bail out if we could end with more instructions than we started with. | ||||||||
257 | if (!Ext.getVectorOperand()->hasOneUse()) | ||||||||
258 | return nullptr; | ||||||||
259 | Scalar = Builder.CreateLShr(Scalar, ShAmt); | ||||||||
260 | } | ||||||||
261 | |||||||||
262 | if (NeedDestBitcast) { | ||||||||
263 | Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth); | ||||||||
264 | return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy); | ||||||||
265 | } | ||||||||
266 | return new TruncInst(Scalar, DestTy); | ||||||||
267 | } | ||||||||
268 | |||||||||
269 | return nullptr; | ||||||||
270 | } | ||||||||
271 | |||||||||
272 | /// Find elements of V demanded by UserInstr. | ||||||||
273 | static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { | ||||||||
274 | unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); | ||||||||
275 | |||||||||
276 | // Conservatively assume that all elements are needed. | ||||||||
277 | APInt UsedElts(APInt::getAllOnesValue(VWidth)); | ||||||||
278 | |||||||||
279 | switch (UserInstr->getOpcode()) { | ||||||||
280 | case Instruction::ExtractElement: { | ||||||||
281 | ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr); | ||||||||
282 | assert(EEI->getVectorOperand() == V)((void)0); | ||||||||
283 | ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand()); | ||||||||
284 | if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { | ||||||||
285 | UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); | ||||||||
286 | } | ||||||||
287 | break; | ||||||||
288 | } | ||||||||
289 | case Instruction::ShuffleVector: { | ||||||||
290 | ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr); | ||||||||
291 | unsigned MaskNumElts = | ||||||||
292 | cast<FixedVectorType>(UserInstr->getType())->getNumElements(); | ||||||||
293 | |||||||||
294 | UsedElts = APInt(VWidth, 0); | ||||||||
295 | for (unsigned i = 0; i < MaskNumElts; i++) { | ||||||||
296 | unsigned MaskVal = Shuffle->getMaskValue(i); | ||||||||
297 | if (MaskVal == -1u || MaskVal >= 2 * VWidth) | ||||||||
298 | continue; | ||||||||
299 | if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) | ||||||||
300 | UsedElts.setBit(MaskVal); | ||||||||
301 | if (Shuffle->getOperand(1) == V && | ||||||||
302 | ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) | ||||||||
303 | UsedElts.setBit(MaskVal - VWidth); | ||||||||
304 | } | ||||||||
305 | break; | ||||||||
306 | } | ||||||||
307 | default: | ||||||||
308 | break; | ||||||||
309 | } | ||||||||
310 | return UsedElts; | ||||||||
311 | } | ||||||||
312 | |||||||||
313 | /// Find union of elements of V demanded by all its users. | ||||||||
314 | /// If it is known by querying findDemandedEltsBySingleUser that | ||||||||
315 | /// no user demands an element of V, then the corresponding bit | ||||||||
316 | /// remains unset in the returned value. | ||||||||
317 | static APInt findDemandedEltsByAllUsers(Value *V) { | ||||||||
318 | unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); | ||||||||
319 | |||||||||
320 | APInt UnionUsedElts(VWidth, 0); | ||||||||
321 | for (const Use &U : V->uses()) { | ||||||||
322 | if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { | ||||||||
323 | UnionUsedElts |= findDemandedEltsBySingleUser(V, I); | ||||||||
324 | } else { | ||||||||
325 | UnionUsedElts = APInt::getAllOnesValue(VWidth); | ||||||||
326 | break; | ||||||||
327 | } | ||||||||
328 | |||||||||
329 | if (UnionUsedElts.isAllOnesValue()) | ||||||||
330 | break; | ||||||||
331 | } | ||||||||
332 | |||||||||
333 | return UnionUsedElts; | ||||||||
334 | } | ||||||||
335 | |||||||||
336 | Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { | ||||||||
337 | Value *SrcVec = EI.getVectorOperand(); | ||||||||
338 | Value *Index = EI.getIndexOperand(); | ||||||||
339 | if (Value *V = SimplifyExtractElementInst(SrcVec, Index, | ||||||||
| |||||||||
340 | SQ.getWithInstruction(&EI))) | ||||||||
341 | return replaceInstUsesWith(EI, V); | ||||||||
342 | |||||||||
343 | // If extracting a specified index from the vector, see if we can recursively | ||||||||
344 | // find a previously computed scalar that was inserted into the vector. | ||||||||
345 | auto *IndexC = dyn_cast<ConstantInt>(Index); | ||||||||
346 | if (IndexC
| ||||||||
347 | ElementCount EC = EI.getVectorOperandType()->getElementCount(); | ||||||||
348 | unsigned NumElts = EC.getKnownMinValue(); | ||||||||
349 | |||||||||
350 | if (IntrinsicInst *II
| ||||||||
351 | Intrinsic::ID IID = II->getIntrinsicID(); | ||||||||
352 | // Index needs to be lower than the minimum size of the vector, because | ||||||||
353 | // for scalable vector, the vector size is known at run time. | ||||||||
354 | if (IID == Intrinsic::experimental_stepvector && | ||||||||
355 | IndexC->getValue().ult(NumElts)) { | ||||||||
356 | Type *Ty = EI.getType(); | ||||||||
357 | unsigned BitWidth = Ty->getIntegerBitWidth(); | ||||||||
358 | Value *Idx; | ||||||||
359 | // Return index when its value does not exceed the allowed limit | ||||||||
360 | // for the element type of the vector, otherwise return undefined. | ||||||||
361 | if (IndexC->getValue().getActiveBits() <= BitWidth) | ||||||||
362 | Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth)); | ||||||||
363 | else | ||||||||
364 | Idx = UndefValue::get(Ty); | ||||||||
365 | return replaceInstUsesWith(EI, Idx); | ||||||||
366 | } | ||||||||
367 | } | ||||||||
368 | |||||||||
369 | // InstSimplify should handle cases where the index is invalid. | ||||||||
370 | // For fixed-length vector, it's invalid to extract out-of-range element. | ||||||||
371 | if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) | ||||||||
372 | return nullptr; | ||||||||
373 | |||||||||
374 | // This instruction only demands the single element from the input vector. | ||||||||
375 | // Skip for scalable type, the number of elements is unknown at | ||||||||
376 | // compile-time. | ||||||||
377 | if (!EC.isScalable() && NumElts != 1) { | ||||||||
378 | // If the input vector has a single use, simplify it based on this use | ||||||||
379 | // property. | ||||||||
380 | if (SrcVec->hasOneUse()) { | ||||||||
381 | APInt UndefElts(NumElts, 0); | ||||||||
382 | APInt DemandedElts(NumElts, 0); | ||||||||
383 | DemandedElts.setBit(IndexC->getZExtValue()); | ||||||||
384 | if (Value *V = | ||||||||
385 | SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) | ||||||||
386 | return replaceOperand(EI, 0, V); | ||||||||
387 | } else { | ||||||||
388 | // If the input vector has multiple uses, simplify it based on a union | ||||||||
389 | // of all elements used. | ||||||||
390 | APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec); | ||||||||
391 | if (!DemandedElts.isAllOnesValue()) { | ||||||||
392 | APInt UndefElts(NumElts, 0); | ||||||||
393 | if (Value *V = SimplifyDemandedVectorElts( | ||||||||
394 | SrcVec, DemandedElts, UndefElts, 0 /* Depth */, | ||||||||
395 | true /* AllowMultipleUsers */)) { | ||||||||
396 | if (V != SrcVec) { | ||||||||
397 | SrcVec->replaceAllUsesWith(V); | ||||||||
398 | return &EI; | ||||||||
399 | } | ||||||||
400 | } | ||||||||
401 | } | ||||||||
402 | } | ||||||||
403 | } | ||||||||
404 | |||||||||
405 | if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian())) | ||||||||
406 | return I; | ||||||||
407 | |||||||||
408 | // If there's a vector PHI feeding a scalar use through this extractelement | ||||||||
409 | // instruction, try to scalarize the PHI. | ||||||||
410 | if (auto *Phi = dyn_cast<PHINode>(SrcVec)) | ||||||||
411 | if (Instruction *ScalarPHI = scalarizePHI(EI, Phi)) | ||||||||
412 | return ScalarPHI; | ||||||||
413 | } | ||||||||
414 | |||||||||
415 | // TODO come up with a n-ary matcher that subsumes both unary and | ||||||||
416 | // binary matchers. | ||||||||
417 | UnaryOperator *UO; | ||||||||
418 | if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) { | ||||||||
419 | // extelt (unop X), Index --> unop (extelt X, Index) | ||||||||
420 | Value *X = UO->getOperand(0); | ||||||||
421 | Value *E = Builder.CreateExtractElement(X, Index); | ||||||||
422 | return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO); | ||||||||
423 | } | ||||||||
424 | |||||||||
425 | BinaryOperator *BO; | ||||||||
426 | if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) { | ||||||||
427 | // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index) | ||||||||
428 | Value *X = BO->getOperand(0), *Y = BO->getOperand(1); | ||||||||
429 | Value *E0 = Builder.CreateExtractElement(X, Index); | ||||||||
430 | Value *E1 = Builder.CreateExtractElement(Y, Index); | ||||||||
431 | return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO); | ||||||||
432 | } | ||||||||
433 | |||||||||
434 | Value *X, *Y; | ||||||||
435 | CmpInst::Predicate Pred; | ||||||||
436 | if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) && | ||||||||
437 | cheapToScalarize(SrcVec, IndexC)) { | ||||||||
438 | // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index) | ||||||||
439 | Value *E0 = Builder.CreateExtractElement(X, Index); | ||||||||
440 | Value *E1 = Builder.CreateExtractElement(Y, Index); | ||||||||
441 | return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1); | ||||||||
442 | } | ||||||||
443 | |||||||||
444 | if (auto *I = dyn_cast<Instruction>(SrcVec)) { | ||||||||
445 | if (auto *IE = dyn_cast<InsertElementInst>(I)) { | ||||||||
446 | // Extracting the inserted element? | ||||||||
447 | if (IE->getOperand(2) == Index) | ||||||||
448 | return replaceInstUsesWith(EI, IE->getOperand(1)); | ||||||||
449 | // If the inserted and extracted elements are constants, they must not | ||||||||
450 | // be the same value, extract from the pre-inserted value instead. | ||||||||
451 | if (isa<Constant>(IE->getOperand(2)) && IndexC) | ||||||||
452 | return replaceOperand(EI, 0, IE->getOperand(0)); | ||||||||
453 | } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { | ||||||||
454 | auto *VecType = cast<VectorType>(GEP->getType()); | ||||||||
455 | ElementCount EC = VecType->getElementCount(); | ||||||||
456 | uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0; | ||||||||
457 | if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) { | ||||||||
458 | // Find out why we have a vector result - these are a few examples: | ||||||||
459 | // 1. We have a scalar pointer and a vector of indices, or | ||||||||
460 | // 2. We have a vector of pointers and a scalar index, or | ||||||||
461 | // 3. We have a vector of pointers and a vector of indices, etc. | ||||||||
462 | // Here we only consider combining when there is exactly one vector | ||||||||
463 | // operand, since the optimization is less obviously a win due to | ||||||||
464 | // needing more than one extractelements. | ||||||||
465 | |||||||||
466 | unsigned VectorOps = | ||||||||
467 | llvm::count_if(GEP->operands(), [](const Value *V) { | ||||||||
468 | return isa<VectorType>(V->getType()); | ||||||||
469 | }); | ||||||||
470 | if (VectorOps > 1) | ||||||||
471 | return nullptr; | ||||||||
472 | assert(VectorOps == 1 && "Expected exactly one vector GEP operand!")((void)0); | ||||||||
473 | |||||||||
474 | Value *NewPtr = GEP->getPointerOperand(); | ||||||||
475 | if (isa<VectorType>(NewPtr->getType())) | ||||||||
476 | NewPtr = Builder.CreateExtractElement(NewPtr, IndexC); | ||||||||
477 | |||||||||
478 | SmallVector<Value *> NewOps; | ||||||||
479 | for (unsigned I = 1; I != GEP->getNumOperands(); ++I) { | ||||||||
480 | Value *Op = GEP->getOperand(I); | ||||||||
481 | if (isa<VectorType>(Op->getType())) | ||||||||
482 | NewOps.push_back(Builder.CreateExtractElement(Op, IndexC)); | ||||||||
483 | else | ||||||||
484 | NewOps.push_back(Op); | ||||||||
485 | } | ||||||||
486 | |||||||||
487 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( | ||||||||
488 | cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr, | ||||||||
489 | NewOps); | ||||||||
490 | NewGEP->setIsInBounds(GEP->isInBounds()); | ||||||||
491 | return NewGEP; | ||||||||
492 | } | ||||||||
493 | return nullptr; | ||||||||
494 | } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) { | ||||||||
495 | // If this is extracting an element from a shufflevector, figure out where | ||||||||
496 | // it came from and extract from the appropriate input element instead. | ||||||||
497 | // Restrict the following transformation to fixed-length vector. | ||||||||
498 | if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) { | ||||||||
499 | int SrcIdx = | ||||||||
500 | SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue()); | ||||||||
501 | Value *Src; | ||||||||
502 | unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType()) | ||||||||
503 | ->getNumElements(); | ||||||||
504 | |||||||||
505 | if (SrcIdx < 0) | ||||||||
506 | return replaceInstUsesWith(EI, UndefValue::get(EI.getType())); | ||||||||
507 | if (SrcIdx < (int)LHSWidth) | ||||||||
508 | Src = SVI->getOperand(0); | ||||||||
509 | else { | ||||||||
510 | SrcIdx -= LHSWidth; | ||||||||
511 | Src = SVI->getOperand(1); | ||||||||
512 | } | ||||||||
513 | Type *Int32Ty = Type::getInt32Ty(EI.getContext()); | ||||||||
514 | return ExtractElementInst::Create( | ||||||||
515 | Src, ConstantInt::get(Int32Ty, SrcIdx, false)); | ||||||||
516 | } | ||||||||
517 | } else if (auto *CI = dyn_cast<CastInst>(I)) { | ||||||||
518 | // Canonicalize extractelement(cast) -> cast(extractelement). | ||||||||
519 | // Bitcasts can change the number of vector elements, and they cost | ||||||||
520 | // nothing. | ||||||||
521 | if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { | ||||||||
522 | Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index); | ||||||||
523 | return CastInst::Create(CI->getOpcode(), EE, EI.getType()); | ||||||||
524 | } | ||||||||
525 | } | ||||||||
526 | } | ||||||||
527 | return nullptr; | ||||||||
528 | } | ||||||||
529 | |||||||||
530 | /// If V is a shuffle of values that ONLY returns elements from either LHS or | ||||||||
531 | /// RHS, return the shuffle mask and true. Otherwise, return false. | ||||||||
532 | static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, | ||||||||
533 | SmallVectorImpl<int> &Mask) { | ||||||||
534 | assert(LHS->getType() == RHS->getType() &&((void)0) | ||||||||
535 | "Invalid CollectSingleShuffleElements")((void)0); | ||||||||
536 | unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); | ||||||||
537 | |||||||||
538 | if (match(V, m_Undef())) { | ||||||||
539 | Mask.assign(NumElts, -1); | ||||||||
540 | return true; | ||||||||
541 | } | ||||||||
542 | |||||||||
543 | if (V == LHS) { | ||||||||
544 | for (unsigned i = 0; i != NumElts; ++i) | ||||||||
545 | Mask.push_back(i); | ||||||||
546 | return true; | ||||||||
547 | } | ||||||||
548 | |||||||||
549 | if (V == RHS) { | ||||||||
550 | for (unsigned i = 0; i != NumElts; ++i) | ||||||||
551 | Mask.push_back(i + NumElts); | ||||||||
552 | return true; | ||||||||
553 | } | ||||||||
554 | |||||||||
555 | if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { | ||||||||
556 | // If this is an insert of an extract from some other vector, include it. | ||||||||
557 | Value *VecOp = IEI->getOperand(0); | ||||||||
558 | Value *ScalarOp = IEI->getOperand(1); | ||||||||
559 | Value *IdxOp = IEI->getOperand(2); | ||||||||
560 | |||||||||
561 | if (!isa<ConstantInt>(IdxOp)) | ||||||||
562 | return false; | ||||||||
563 | unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); | ||||||||
564 | |||||||||
565 | if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. | ||||||||
566 | // We can handle this if the vector we are inserting into is | ||||||||
567 | // transitively ok. | ||||||||
568 | if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { | ||||||||
569 | // If so, update the mask to reflect the inserted undef. | ||||||||
570 | Mask[InsertedIdx] = -1; | ||||||||
571 | return true; | ||||||||
572 | } | ||||||||
573 | } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ | ||||||||
574 | if (isa<ConstantInt>(EI->getOperand(1))) { | ||||||||
575 | unsigned ExtractedIdx = | ||||||||
576 | cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); | ||||||||
577 | unsigned NumLHSElts = | ||||||||
578 | cast<FixedVectorType>(LHS->getType())->getNumElements(); | ||||||||
579 | |||||||||
580 | // This must be extracting from either LHS or RHS. | ||||||||
581 | if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { | ||||||||
582 | // We can handle this if the vector we are inserting into is | ||||||||
583 | // transitively ok. | ||||||||
584 | if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { | ||||||||
585 | // If so, update the mask to reflect the inserted value. | ||||||||
586 | if (EI->getOperand(0) == LHS) { | ||||||||
587 | Mask[InsertedIdx % NumElts] = ExtractedIdx; | ||||||||
588 | } else { | ||||||||
589 | assert(EI->getOperand(0) == RHS)((void)0); | ||||||||
590 | Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts; | ||||||||
591 | } | ||||||||
592 | return true; | ||||||||
593 | } | ||||||||
594 | } | ||||||||
595 | } | ||||||||
596 | } | ||||||||
597 | } | ||||||||
598 | |||||||||
599 | return false; | ||||||||
600 | } | ||||||||
601 | |||||||||
602 | /// If we have insertion into a vector that is wider than the vector that we | ||||||||
603 | /// are extracting from, try to widen the source vector to allow a single | ||||||||
604 | /// shufflevector to replace one or more insert/extract pairs. | ||||||||
605 | static void replaceExtractElements(InsertElementInst *InsElt, | ||||||||
606 | ExtractElementInst *ExtElt, | ||||||||
607 | InstCombinerImpl &IC) { | ||||||||
608 | auto *InsVecType = cast<FixedVectorType>(InsElt->getType()); | ||||||||
609 | auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType()); | ||||||||
610 | unsigned NumInsElts = InsVecType->getNumElements(); | ||||||||
611 | unsigned NumExtElts = ExtVecType->getNumElements(); | ||||||||
612 | |||||||||
613 | // The inserted-to vector must be wider than the extracted-from vector. | ||||||||
614 | if (InsVecType->getElementType() != ExtVecType->getElementType() || | ||||||||
615 | NumExtElts >= NumInsElts) | ||||||||
616 | return; | ||||||||
617 | |||||||||
618 | // Create a shuffle mask to widen the extended-from vector using poison | ||||||||
619 | // values. The mask selects all of the values of the original vector followed | ||||||||
620 | // by as many poison values as needed to create a vector of the same length | ||||||||
621 | // as the inserted-to vector. | ||||||||
622 | SmallVector<int, 16> ExtendMask; | ||||||||
623 | for (unsigned i = 0; i < NumExtElts; ++i) | ||||||||
624 | ExtendMask.push_back(i); | ||||||||
625 | for (unsigned i = NumExtElts; i < NumInsElts; ++i) | ||||||||
626 | ExtendMask.push_back(-1); | ||||||||
627 | |||||||||
628 | Value *ExtVecOp = ExtElt->getVectorOperand(); | ||||||||
629 | auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp); | ||||||||
630 | BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) | ||||||||
631 | ? ExtVecOpInst->getParent() | ||||||||
632 | : ExtElt->getParent(); | ||||||||
633 | |||||||||
634 | // TODO: This restriction matches the basic block check below when creating | ||||||||
635 | // new extractelement instructions. If that limitation is removed, this one | ||||||||
636 | // could also be removed. But for now, we just bail out to ensure that we | ||||||||
637 | // will replace the extractelement instruction that is feeding our | ||||||||
638 | // insertelement instruction. This allows the insertelement to then be | ||||||||
639 | // replaced by a shufflevector. If the insertelement is not replaced, we can | ||||||||
640 | // induce infinite looping because there's an optimization for extractelement | ||||||||
641 | // that will delete our widening shuffle. This would trigger another attempt | ||||||||
642 | // here to create that shuffle, and we spin forever. | ||||||||
643 | if (InsertionBlock != InsElt->getParent()) | ||||||||
644 | return; | ||||||||
645 | |||||||||
646 | // TODO: This restriction matches the check in visitInsertElementInst() and | ||||||||
647 | // prevents an infinite loop caused by not turning the extract/insert pair | ||||||||
648 | // into a shuffle. We really should not need either check, but we're lacking | ||||||||
649 | // folds for shufflevectors because we're afraid to generate shuffle masks | ||||||||
650 | // that the backend can't handle. | ||||||||
651 | if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back())) | ||||||||
652 | return; | ||||||||
653 | |||||||||
654 | auto *WideVec = | ||||||||
655 | new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask); | ||||||||
656 | |||||||||
657 | // Insert the new shuffle after the vector operand of the extract is defined | ||||||||
658 | // (as long as it's not a PHI) or at the start of the basic block of the | ||||||||
659 | // extract, so any subsequent extracts in the same basic block can use it. | ||||||||
660 | // TODO: Insert before the earliest ExtractElementInst that is replaced. | ||||||||
661 | if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst)) | ||||||||
662 | WideVec->insertAfter(ExtVecOpInst); | ||||||||
663 | else | ||||||||
664 | IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); | ||||||||
665 | |||||||||
666 | // Replace extracts from the original narrow vector with extracts from the new | ||||||||
667 | // wide vector. | ||||||||
668 | for (User *U : ExtVecOp->users()) { | ||||||||
669 | ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U); | ||||||||
670 | if (!OldExt || OldExt->getParent() != WideVec->getParent()) | ||||||||
671 | continue; | ||||||||
672 | auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); | ||||||||
673 | NewExt->insertAfter(OldExt); | ||||||||
674 | IC.replaceInstUsesWith(*OldExt, NewExt); | ||||||||
675 | } | ||||||||
676 | } | ||||||||
677 | |||||||||
678 | /// We are building a shuffle to create V, which is a sequence of insertelement, | ||||||||
679 | /// extractelement pairs. If PermittedRHS is set, then we must either use it or | ||||||||
680 | /// not rely on the second vector source. Return a std::pair containing the | ||||||||
681 | /// left and right vectors of the proposed shuffle (or 0), and set the Mask | ||||||||
682 | /// parameter as required. | ||||||||
683 | /// | ||||||||
684 | /// Note: we intentionally don't try to fold earlier shuffles since they have | ||||||||
685 | /// often been chosen carefully to be efficiently implementable on the target. | ||||||||
686 | using ShuffleOps = std::pair<Value *, Value *>; | ||||||||
687 | |||||||||
688 | static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask, | ||||||||
689 | Value *PermittedRHS, | ||||||||
690 | InstCombinerImpl &IC) { | ||||||||
691 | assert(V->getType()->isVectorTy() && "Invalid shuffle!")((void)0); | ||||||||
692 | unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements(); | ||||||||
693 | |||||||||
694 | if (match(V, m_Undef())) { | ||||||||
695 | Mask.assign(NumElts, -1); | ||||||||
696 | return std::make_pair( | ||||||||
697 | PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); | ||||||||
698 | } | ||||||||
699 | |||||||||
700 | if (isa<ConstantAggregateZero>(V)) { | ||||||||
701 | Mask.assign(NumElts, 0); | ||||||||
702 | return std::make_pair(V, nullptr); | ||||||||
703 | } | ||||||||
704 | |||||||||
705 | if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { | ||||||||
706 | // If this is an insert of an extract from some other vector, include it. | ||||||||
707 | Value *VecOp = IEI->getOperand(0); | ||||||||
708 | Value *ScalarOp = IEI->getOperand(1); | ||||||||
709 | Value *IdxOp = IEI->getOperand(2); | ||||||||
710 | |||||||||
711 | if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { | ||||||||
712 | if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { | ||||||||
713 | unsigned ExtractedIdx = | ||||||||
714 | cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); | ||||||||
715 | unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); | ||||||||
716 | |||||||||
717 | // Either the extracted from or inserted into vector must be RHSVec, | ||||||||
718 | // otherwise we'd end up with a shuffle of three inputs. | ||||||||
719 | if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { | ||||||||
720 | Value *RHS = EI->getOperand(0); | ||||||||
721 | ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC); | ||||||||
722 | assert(LR.second == nullptr || LR.second == RHS)((void)0); | ||||||||
723 | |||||||||
724 | if (LR.first->getType() != RHS->getType()) { | ||||||||
725 | // Although we are giving up for now, see if we can create extracts | ||||||||
726 | // that match the inserts for another round of combining. | ||||||||
727 | replaceExtractElements(IEI, EI, IC); | ||||||||
728 | |||||||||
729 | // We tried our best, but we can't find anything compatible with RHS | ||||||||
730 | // further up the chain. Return a trivial shuffle. | ||||||||
731 | for (unsigned i = 0; i < NumElts; ++i) | ||||||||
732 | Mask[i] = i; | ||||||||
733 | return std::make_pair(V, nullptr); | ||||||||
734 | } | ||||||||
735 | |||||||||
736 | unsigned NumLHSElts = | ||||||||
737 | cast<FixedVectorType>(RHS->getType())->getNumElements(); | ||||||||
738 | Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx; | ||||||||
739 | return std::make_pair(LR.first, RHS); | ||||||||
740 | } | ||||||||
741 | |||||||||
742 | if (VecOp == PermittedRHS) { | ||||||||
743 | // We've gone as far as we can: anything on the other side of the | ||||||||
744 | // extractelement will already have been converted into a shuffle. | ||||||||
745 | unsigned NumLHSElts = | ||||||||
746 | cast<FixedVectorType>(EI->getOperand(0)->getType()) | ||||||||
747 | ->getNumElements(); | ||||||||
748 | for (unsigned i = 0; i != NumElts; ++i) | ||||||||
749 | Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i); | ||||||||
750 | return std::make_pair(EI->getOperand(0), PermittedRHS); | ||||||||
751 | } | ||||||||
752 | |||||||||
753 | // If this insertelement is a chain that comes from exactly these two | ||||||||
754 | // vectors, return the vector and the effective shuffle. | ||||||||
755 | if (EI->getOperand(0)->getType() == PermittedRHS->getType() && | ||||||||
756 | collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, | ||||||||
757 | Mask)) | ||||||||
758 | return std::make_pair(EI->getOperand(0), PermittedRHS); | ||||||||
759 | } | ||||||||
760 | } | ||||||||
761 | } | ||||||||
762 | |||||||||
763 | // Otherwise, we can't do anything fancy. Return an identity vector. | ||||||||
764 | for (unsigned i = 0; i != NumElts; ++i) | ||||||||
765 | Mask.push_back(i); | ||||||||
766 | return std::make_pair(V, nullptr); | ||||||||
767 | } | ||||||||
768 | |||||||||
769 | /// Look for chain of insertvalue's that fully define an aggregate, and trace | ||||||||
770 | /// back the values inserted, see if they are all were extractvalue'd from | ||||||||
771 | /// the same source aggregate from the exact same element indexes. | ||||||||
772 | /// If they were, just reuse the source aggregate. | ||||||||
773 | /// This potentially deals with PHI indirections. | ||||||||
774 | Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse( | ||||||||
775 | InsertValueInst &OrigIVI) { | ||||||||
776 | Type *AggTy = OrigIVI.getType(); | ||||||||
777 | unsigned NumAggElts; | ||||||||
778 | switch (AggTy->getTypeID()) { | ||||||||
779 | case Type::StructTyID: | ||||||||
780 | NumAggElts = AggTy->getStructNumElements(); | ||||||||
781 | break; | ||||||||
782 | case Type::ArrayTyID: | ||||||||
783 | NumAggElts = AggTy->getArrayNumElements(); | ||||||||
784 | break; | ||||||||
785 | default: | ||||||||
786 | llvm_unreachable("Unhandled aggregate type?")__builtin_unreachable(); | ||||||||
787 | } | ||||||||
788 | |||||||||
789 | // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able | ||||||||
790 | // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}), | ||||||||
791 | // FIXME: any interesting patterns to be caught with larger limit? | ||||||||
792 | assert(NumAggElts > 0 && "Aggregate should have elements.")((void)0); | ||||||||
793 | if (NumAggElts > 2) | ||||||||
794 | return nullptr; | ||||||||
795 | |||||||||
796 | static constexpr auto NotFound = None; | ||||||||
797 | static constexpr auto FoundMismatch = nullptr; | ||||||||
798 | |||||||||
799 | // Try to find a value of each element of an aggregate. | ||||||||
800 | // FIXME: deal with more complex, not one-dimensional, aggregate types | ||||||||
801 | SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound); | ||||||||
802 | |||||||||
803 | // Do we know values for each element of the aggregate? | ||||||||
804 | auto KnowAllElts = [&AggElts]() { | ||||||||
805 | return all_of(AggElts, | ||||||||
806 | [](Optional<Instruction *> Elt) { return Elt != NotFound; }); | ||||||||
807 | }; | ||||||||
808 | |||||||||
809 | int Depth = 0; | ||||||||
810 | |||||||||
811 | // Arbitrary `insertvalue` visitation depth limit. Let's be okay with | ||||||||
812 | // every element being overwritten twice, which should never happen. | ||||||||
813 | static const int DepthLimit = 2 * NumAggElts; | ||||||||
814 | |||||||||
815 | // Recurse up the chain of `insertvalue` aggregate operands until either we've | ||||||||
816 | // reconstructed full initializer or can't visit any more `insertvalue`'s. | ||||||||
817 | for (InsertValueInst *CurrIVI = &OrigIVI; | ||||||||
818 | Depth < DepthLimit && CurrIVI && !KnowAllElts(); | ||||||||
819 | CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()), | ||||||||
820 | ++Depth) { | ||||||||
821 | auto *InsertedValue = | ||||||||
822 | dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand()); | ||||||||
823 | if (!InsertedValue) | ||||||||
824 | return nullptr; // Inserted value must be produced by an instruction. | ||||||||
825 | |||||||||
826 | ArrayRef<unsigned int> Indices = CurrIVI->getIndices(); | ||||||||
827 | |||||||||
828 | // Don't bother with more than single-level aggregates. | ||||||||
829 | if (Indices.size() != 1) | ||||||||
830 | return nullptr; // FIXME: deal with more complex aggregates? | ||||||||
831 | |||||||||
832 | // Now, we may have already previously recorded the value for this element | ||||||||
833 | // of an aggregate. If we did, that means the CurrIVI will later be | ||||||||
834 | // overwritten with the already-recorded value. But if not, let's record it! | ||||||||
835 | Optional<Instruction *> &Elt = AggElts[Indices.front()]; | ||||||||
836 | Elt = Elt.getValueOr(InsertedValue); | ||||||||
837 | |||||||||
838 | // FIXME: should we handle chain-terminating undef base operand? | ||||||||
839 | } | ||||||||
840 | |||||||||
841 | // Was that sufficient to deduce the full initializer for the aggregate? | ||||||||
842 | if (!KnowAllElts()) | ||||||||
843 | return nullptr; // Give up then. | ||||||||
844 | |||||||||
845 | // We now want to find the source[s] of the aggregate elements we've found. | ||||||||
846 | // And with "source" we mean the original aggregate[s] from which | ||||||||
847 | // the inserted elements were extracted. This may require PHI translation. | ||||||||
848 | |||||||||
849 | enum class AggregateDescription { | ||||||||
850 | /// When analyzing the value that was inserted into an aggregate, we did | ||||||||
851 | /// not manage to find defining `extractvalue` instruction to analyze. | ||||||||
852 | NotFound, | ||||||||
853 | /// When analyzing the value that was inserted into an aggregate, we did | ||||||||
854 | /// manage to find defining `extractvalue` instruction[s], and everything | ||||||||
855 | /// matched perfectly - aggregate type, element insertion/extraction index. | ||||||||
856 | Found, | ||||||||
857 | /// When analyzing the value that was inserted into an aggregate, we did | ||||||||
858 | /// manage to find defining `extractvalue` instruction, but there was | ||||||||
859 | /// a mismatch: either the source type from which the extraction was didn't | ||||||||
860 | /// match the aggregate type into which the insertion was, | ||||||||
861 | /// or the extraction/insertion channels mismatched, | ||||||||
862 | /// or different elements had different source aggregates. | ||||||||
863 | FoundMismatch | ||||||||
864 | }; | ||||||||
865 | auto Describe = [](Optional<Value *> SourceAggregate) { | ||||||||
866 | if (SourceAggregate == NotFound) | ||||||||
867 | return AggregateDescription::NotFound; | ||||||||
868 | if (*SourceAggregate == FoundMismatch) | ||||||||
869 | return AggregateDescription::FoundMismatch; | ||||||||
870 | return AggregateDescription::Found; | ||||||||
871 | }; | ||||||||
872 | |||||||||
873 | // Given the value \p Elt that was being inserted into element \p EltIdx of an | ||||||||
874 | // aggregate AggTy, see if \p Elt was originally defined by an | ||||||||
875 | // appropriate extractvalue (same element index, same aggregate type). | ||||||||
876 | // If found, return the source aggregate from which the extraction was. | ||||||||
877 | // If \p PredBB is provided, does PHI translation of an \p Elt first. | ||||||||
878 | auto FindSourceAggregate = | ||||||||
879 | [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB, | ||||||||
880 | Optional<BasicBlock *> PredBB) -> Optional<Value *> { | ||||||||
881 | // For now(?), only deal with, at most, a single level of PHI indirection. | ||||||||
882 | if (UseBB && PredBB) | ||||||||
883 | Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB)); | ||||||||
884 | // FIXME: deal with multiple levels of PHI indirection? | ||||||||
885 | |||||||||
886 | // Did we find an extraction? | ||||||||
887 | auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt); | ||||||||
888 | if (!EVI) | ||||||||
889 | return NotFound; | ||||||||
890 | |||||||||
891 | Value *SourceAggregate = EVI->getAggregateOperand(); | ||||||||
892 | |||||||||
893 | // Is the extraction from the same type into which the insertion was? | ||||||||
894 | if (SourceAggregate->getType() != AggTy) | ||||||||
895 | return FoundMismatch; | ||||||||
896 | // And the element index doesn't change between extraction and insertion? | ||||||||
897 | if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front()) | ||||||||
898 | return FoundMismatch; | ||||||||
899 | |||||||||
900 | return SourceAggregate; // AggregateDescription::Found | ||||||||
901 | }; | ||||||||
902 | |||||||||
903 | // Given elements AggElts that were constructing an aggregate OrigIVI, | ||||||||
904 | // see if we can find appropriate source aggregate for each of the elements, | ||||||||
905 | // and see it's the same aggregate for each element. If so, return it. | ||||||||
906 | auto FindCommonSourceAggregate = | ||||||||
907 | [&](Optional<BasicBlock *> UseBB, | ||||||||
908 | Optional<BasicBlock *> PredBB) -> Optional<Value *> { | ||||||||
909 | Optional<Value *> SourceAggregate; | ||||||||
910 | |||||||||
911 | for (auto I : enumerate(AggElts)) { | ||||||||
912 | assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&((void)0) | ||||||||
913 | "We don't store nullptr in SourceAggregate!")((void)0); | ||||||||
914 | assert((Describe(SourceAggregate) == AggregateDescription::Found) ==((void)0) | ||||||||
915 | (I.index() != 0) &&((void)0) | ||||||||
916 | "SourceAggregate should be valid after the the first element,")((void)0); | ||||||||
917 | |||||||||
918 | // For this element, is there a plausible source aggregate? | ||||||||
919 | // FIXME: we could special-case undef element, IFF we know that in the | ||||||||
920 | // source aggregate said element isn't poison. | ||||||||
921 | Optional<Value *> SourceAggregateForElement = | ||||||||
922 | FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB); | ||||||||
923 | |||||||||
924 | // Okay, what have we found? Does that correlate with previous findings? | ||||||||
925 | |||||||||
926 | // Regardless of whether or not we have previously found source | ||||||||
927 | // aggregate for previous elements (if any), if we didn't find one for | ||||||||
928 | // this element, passthrough whatever we have just found. | ||||||||
929 | if (Describe(SourceAggregateForElement) != AggregateDescription::Found) | ||||||||
930 | return SourceAggregateForElement; | ||||||||
931 | |||||||||
932 | // Okay, we have found source aggregate for this element. | ||||||||
933 | // Let's see what we already know from previous elements, if any. | ||||||||
934 | switch (Describe(SourceAggregate)) { | ||||||||
935 | case AggregateDescription::NotFound: | ||||||||
936 | // This is apparently the first element that we have examined. | ||||||||
937 | SourceAggregate = SourceAggregateForElement; // Record the aggregate! | ||||||||
938 | continue; // Great, now look at next element. | ||||||||
939 | case AggregateDescription::Found: | ||||||||
940 | // We have previously already successfully examined other elements. | ||||||||
941 | // Is this the same source aggregate we've found for other elements? | ||||||||
942 | if (*SourceAggregateForElement != *SourceAggregate) | ||||||||
943 | return FoundMismatch; | ||||||||
944 | continue; // Still the same aggregate, look at next element. | ||||||||
945 | case AggregateDescription::FoundMismatch: | ||||||||
946 | llvm_unreachable("Can't happen. We would have early-exited then.")__builtin_unreachable(); | ||||||||
947 | }; | ||||||||
948 | } | ||||||||
949 | |||||||||
950 | assert(Describe(SourceAggregate) == AggregateDescription::Found &&((void)0) | ||||||||
951 | "Must be a valid Value")((void)0); | ||||||||
952 | return *SourceAggregate; | ||||||||
953 | }; | ||||||||
954 | |||||||||
955 | Optional<Value *> SourceAggregate; | ||||||||
956 | |||||||||
957 | // Can we find the source aggregate without looking at predecessors? | ||||||||
958 | SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None); | ||||||||
959 | if (Describe(SourceAggregate) != AggregateDescription::NotFound) { | ||||||||
960 | if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch) | ||||||||
961 | return nullptr; // Conflicting source aggregates! | ||||||||
962 | ++NumAggregateReconstructionsSimplified; | ||||||||
963 | return replaceInstUsesWith(OrigIVI, *SourceAggregate); | ||||||||
964 | } | ||||||||
965 | |||||||||
966 | // Okay, apparently we need to look at predecessors. | ||||||||
967 | |||||||||
968 | // We should be smart about picking the "use" basic block, which will be the | ||||||||
969 | // merge point for aggregate, where we'll insert the final PHI that will be | ||||||||
970 | // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice. | ||||||||
971 | // We should look in which blocks each of the AggElts is being defined, | ||||||||
972 | // they all should be defined in the same basic block. | ||||||||
973 | BasicBlock *UseBB = nullptr; | ||||||||
974 | |||||||||
975 | for (const Optional<Instruction *> &I : AggElts) { | ||||||||
976 | BasicBlock *BB = (*I)->getParent(); | ||||||||
977 | // If it's the first instruction we've encountered, record the basic block. | ||||||||
978 | if (!UseBB) { | ||||||||
979 | UseBB = BB; | ||||||||
980 | continue; | ||||||||
981 | } | ||||||||
982 | // Otherwise, this must be the same basic block we've seen previously. | ||||||||
983 | if (UseBB != BB) | ||||||||
984 | return nullptr; | ||||||||
985 | } | ||||||||
986 | |||||||||
987 | // If *all* of the elements are basic-block-independent, meaning they are | ||||||||
988 | // either function arguments, or constant expressions, then if we didn't | ||||||||
989 | // handle them without predecessor-aware handling, we won't handle them now. | ||||||||
990 | if (!UseBB) | ||||||||
991 | return nullptr; | ||||||||
992 | |||||||||
993 | // If we didn't manage to find source aggregate without looking at | ||||||||
994 | // predecessors, and there are no predecessors to look at, then we're done. | ||||||||
995 | if (pred_empty(UseBB)) | ||||||||
996 | return nullptr; | ||||||||
997 | |||||||||
998 | // Arbitrary predecessor count limit. | ||||||||
999 | static const int PredCountLimit = 64; | ||||||||
1000 | |||||||||
1001 | // Cache the (non-uniqified!) list of predecessors in a vector, | ||||||||
1002 | // checking the limit at the same time for efficiency. | ||||||||
1003 | SmallVector<BasicBlock *, 4> Preds; // May have duplicates! | ||||||||
1004 | for (BasicBlock *Pred : predecessors(UseBB)) { | ||||||||
1005 | // Don't bother if there are too many predecessors. | ||||||||
1006 | if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once? | ||||||||
1007 | return nullptr; | ||||||||
1008 | Preds.emplace_back(Pred); | ||||||||
1009 | } | ||||||||
1010 | |||||||||
1011 | // For each predecessor, what is the source aggregate, | ||||||||
1012 | // from which all the elements were originally extracted from? | ||||||||
1013 | // Note that we want for the map to have stable iteration order! | ||||||||
1014 | SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates; | ||||||||
1015 | for (BasicBlock *Pred : Preds) { | ||||||||
1016 | std::pair<decltype(SourceAggregates)::iterator, bool> IV = | ||||||||
1017 | SourceAggregates.insert({Pred, nullptr}); | ||||||||
1018 | // Did we already evaluate this predecessor? | ||||||||
1019 | if (!IV.second) | ||||||||
1020 | continue; | ||||||||
1021 | |||||||||
1022 | // Let's hope that when coming from predecessor Pred, all elements of the | ||||||||
1023 | // aggregate produced by OrigIVI must have been originally extracted from | ||||||||
1024 | // the same aggregate. Is that so? Can we find said original aggregate? | ||||||||
1025 | SourceAggregate = FindCommonSourceAggregate(UseBB, Pred); | ||||||||
1026 | if (Describe(SourceAggregate) != AggregateDescription::Found) | ||||||||
1027 | return nullptr; // Give up. | ||||||||
1028 | IV.first->second = *SourceAggregate; | ||||||||
1029 | } | ||||||||
1030 | |||||||||
1031 | // All good! Now we just need to thread the source aggregates here. | ||||||||
1032 | // Note that we have to insert the new PHI here, ourselves, because we can't | ||||||||
1033 | // rely on InstCombinerImpl::run() inserting it into the right basic block. | ||||||||
1034 | // Note that the same block can be a predecessor more than once, | ||||||||
1035 | // and we need to preserve that invariant for the PHI node. | ||||||||
1036 | BuilderTy::InsertPointGuard Guard(Builder); | ||||||||
1037 | Builder.SetInsertPoint(UseBB->getFirstNonPHI()); | ||||||||
1038 | auto *PHI = | ||||||||
1039 | Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged"); | ||||||||
1040 | for (BasicBlock *Pred : Preds) | ||||||||
1041 | PHI->addIncoming(SourceAggregates[Pred], Pred); | ||||||||
1042 | |||||||||
1043 | ++NumAggregateReconstructionsSimplified; | ||||||||
1044 | return replaceInstUsesWith(OrigIVI, PHI); | ||||||||
1045 | } | ||||||||
1046 | |||||||||
1047 | /// Try to find redundant insertvalue instructions, like the following ones: | ||||||||
1048 | /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 | ||||||||
1049 | /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 | ||||||||
1050 | /// Here the second instruction inserts values at the same indices, as the | ||||||||
1051 | /// first one, making the first one redundant. | ||||||||
1052 | /// It should be transformed to: | ||||||||
1053 | /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 | ||||||||
1054 | Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) { | ||||||||
1055 | bool IsRedundant = false; | ||||||||
1056 | ArrayRef<unsigned int> FirstIndices = I.getIndices(); | ||||||||
1057 | |||||||||
1058 | // If there is a chain of insertvalue instructions (each of them except the | ||||||||
1059 | // last one has only one use and it's another insertvalue insn from this | ||||||||
1060 | // chain), check if any of the 'children' uses the same indices as the first | ||||||||
1061 | // instruction. In this case, the first one is redundant. | ||||||||
1062 | Value *V = &I; | ||||||||
1063 | unsigned Depth = 0; | ||||||||
1064 | while (V->hasOneUse() && Depth < 10) { | ||||||||
1065 | User *U = V->user_back(); | ||||||||
1066 | auto UserInsInst = dyn_cast<InsertValueInst>(U); | ||||||||
1067 | if (!UserInsInst || U->getOperand(0) != V) | ||||||||
1068 | break; | ||||||||
1069 | if (UserInsInst->getIndices() == FirstIndices) { | ||||||||
1070 | IsRedundant = true; | ||||||||
1071 | break; | ||||||||
1072 | } | ||||||||
1073 | V = UserInsInst; | ||||||||
1074 | Depth++; | ||||||||
1075 | } | ||||||||
1076 | |||||||||
1077 | if (IsRedundant) | ||||||||
1078 | return replaceInstUsesWith(I, I.getOperand(0)); | ||||||||
1079 | |||||||||
1080 | if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) | ||||||||
1081 | return NewI; | ||||||||
1082 | |||||||||
1083 | return nullptr; | ||||||||
1084 | } | ||||||||
1085 | |||||||||
1086 | static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { | ||||||||
1087 | // Can not analyze scalable type, the number of elements is not a compile-time | ||||||||
1088 | // constant. | ||||||||
1089 | if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType())) | ||||||||
1090 | return false; | ||||||||
1091 | |||||||||
1092 | int MaskSize = Shuf.getShuffleMask().size(); | ||||||||
1093 | int VecSize = | ||||||||
1094 | cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements(); | ||||||||
1095 | |||||||||
1096 | // A vector select does not change the size of the operands. | ||||||||
1097 | if (MaskSize != VecSize) | ||||||||
1098 | return false; | ||||||||
1099 | |||||||||
1100 | // Each mask element must be undefined or choose a vector element from one of | ||||||||
1101 | // the source operands without crossing vector lanes. | ||||||||
1102 | for (int i = 0; i != MaskSize; ++i) { | ||||||||
1103 | int Elt = Shuf.getMaskValue(i); | ||||||||
1104 | if (Elt != -1 && Elt != i && Elt != i + VecSize) | ||||||||
1105 | return false; | ||||||||
1106 | } | ||||||||
1107 | |||||||||
1108 | return true; | ||||||||
1109 | } | ||||||||
1110 | |||||||||
1111 | /// Turn a chain of inserts that splats a value into an insert + shuffle: | ||||||||
1112 | /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... -> | ||||||||
1113 | /// shufflevector(insertelt(X, %k, 0), poison, zero) | ||||||||
1114 | static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { | ||||||||
1115 | // We are interested in the last insert in a chain. So if this insert has a | ||||||||
1116 | // single user and that user is an insert, bail. | ||||||||
1117 | if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back())) | ||||||||
1118 | return nullptr; | ||||||||
1119 | |||||||||
1120 | VectorType *VecTy = InsElt.getType(); | ||||||||
1121 | // Can not handle scalable type, the number of elements is not a compile-time | ||||||||
1122 | // constant. | ||||||||
1123 | if (isa<ScalableVectorType>(VecTy)) | ||||||||
1124 | return nullptr; | ||||||||
1125 | unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements(); | ||||||||
1126 | |||||||||
1127 | // Do not try to do this for a one-element vector, since that's a nop, | ||||||||
1128 | // and will cause an inf-loop. | ||||||||
1129 | if (NumElements == 1) | ||||||||
1130 | return nullptr; | ||||||||
1131 | |||||||||
1132 | Value *SplatVal = InsElt.getOperand(1); | ||||||||
1133 | InsertElementInst *CurrIE = &InsElt; | ||||||||
1134 | SmallBitVector ElementPresent(NumElements, false); | ||||||||
1135 | InsertElementInst *FirstIE = nullptr; | ||||||||
1136 | |||||||||
1137 | // Walk the chain backwards, keeping track of which indices we inserted into, | ||||||||
1138 | // until we hit something that isn't an insert of the splatted value. | ||||||||
1139 | while (CurrIE) { | ||||||||
1140 | auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2)); | ||||||||
1141 | if (!Idx || CurrIE->getOperand(1) != SplatVal) | ||||||||
1142 | return nullptr; | ||||||||
1143 | |||||||||
1144 | auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0)); | ||||||||
1145 | // Check none of the intermediate steps have any additional uses, except | ||||||||
1146 | // for the root insertelement instruction, which can be re-used, if it | ||||||||
1147 | // inserts at position 0. | ||||||||
1148 | if (CurrIE != &InsElt && | ||||||||
1149 | (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero()))) | ||||||||
1150 | return nullptr; | ||||||||
1151 | |||||||||
1152 | ElementPresent[Idx->getZExtValue()] = true; | ||||||||
1153 | FirstIE = CurrIE; | ||||||||
1154 | CurrIE = NextIE; | ||||||||
1155 | } | ||||||||
1156 | |||||||||
1157 | // If this is just a single insertelement (not a sequence), we are done. | ||||||||
1158 | if (FirstIE == &InsElt) | ||||||||
1159 | return nullptr; | ||||||||
1160 | |||||||||
1161 | // If we are not inserting into an undef vector, make sure we've seen an | ||||||||
1162 | // insert into every element. | ||||||||
1163 | // TODO: If the base vector is not undef, it might be better to create a splat | ||||||||
1164 | // and then a select-shuffle (blend) with the base vector. | ||||||||
1165 | if (!match(FirstIE->getOperand(0), m_Undef())) | ||||||||
1166 | if (!ElementPresent.all()) | ||||||||
1167 | return nullptr; | ||||||||
1168 | |||||||||
1169 | // Create the insert + shuffle. | ||||||||
1170 | Type *Int32Ty = Type::getInt32Ty(InsElt.getContext()); | ||||||||
1171 | PoisonValue *PoisonVec = PoisonValue::get(VecTy); | ||||||||
1172 | Constant *Zero = ConstantInt::get(Int32Ty, 0); | ||||||||
1173 | if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero()) | ||||||||
1174 | FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt); | ||||||||
1175 | |||||||||
1176 | // Splat from element 0, but replace absent elements with undef in the mask. | ||||||||
1177 | SmallVector<int, 16> Mask(NumElements, 0); | ||||||||
1178 | for (unsigned i = 0; i != NumElements; ++i) | ||||||||
1179 | if (!ElementPresent[i]) | ||||||||
1180 | Mask[i] = -1; | ||||||||
1181 | |||||||||
1182 | return new ShuffleVectorInst(FirstIE, PoisonVec, Mask); | ||||||||
1183 | } | ||||||||
1184 | |||||||||
1185 | /// Try to fold an insert element into an existing splat shuffle by changing | ||||||||
1186 | /// the shuffle's mask to include the index of this insert element. | ||||||||
1187 | static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { | ||||||||
1188 | // Check if the vector operand of this insert is a canonical splat shuffle. | ||||||||
1189 | auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); | ||||||||
1190 | if (!Shuf || !Shuf->isZeroEltSplat()) | ||||||||
1191 | return nullptr; | ||||||||
1192 | |||||||||
1193 | // Bail out early if shuffle is scalable type. The number of elements in | ||||||||
1194 | // shuffle mask is unknown at compile-time. | ||||||||
1195 | if (isa<ScalableVectorType>(Shuf->getType())) | ||||||||
1196 | return nullptr; | ||||||||
1197 | |||||||||
1198 | // Check for a constant insertion index. | ||||||||
1199 | uint64_t IdxC; | ||||||||
1200 | if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) | ||||||||
1201 | return nullptr; | ||||||||
1202 | |||||||||
1203 | // Check if the splat shuffle's input is the same as this insert's scalar op. | ||||||||
1204 | Value *X = InsElt.getOperand(1); | ||||||||
1205 | Value *Op0 = Shuf->getOperand(0); | ||||||||
1206 | if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt()))) | ||||||||
1207 | return nullptr; | ||||||||
1208 | |||||||||
1209 | // Replace the shuffle mask element at the index of this insert with a zero. | ||||||||
1210 | // For example: | ||||||||
1211 | // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1 | ||||||||
1212 | // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef> | ||||||||
1213 | unsigned NumMaskElts = | ||||||||
1214 | cast<FixedVectorType>(Shuf->getType())->getNumElements(); | ||||||||
1215 | SmallVector<int, 16> NewMask(NumMaskElts); | ||||||||
1216 | for (unsigned i = 0; i != NumMaskElts; ++i) | ||||||||
1217 | NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i); | ||||||||
1218 | |||||||||
1219 | return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); | ||||||||
1220 | } | ||||||||
1221 | |||||||||
1222 | /// Try to fold an extract+insert element into an existing identity shuffle by | ||||||||
1223 | /// changing the shuffle's mask to include the index of this insert element. | ||||||||
1224 | static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) { | ||||||||
1225 | // Check if the vector operand of this insert is an identity shuffle. | ||||||||
1226 | auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0)); | ||||||||
1227 | if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) || | ||||||||
1228 | !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) | ||||||||
1229 | return nullptr; | ||||||||
1230 | |||||||||
1231 | // Bail out early if shuffle is scalable type. The number of elements in | ||||||||
1232 | // shuffle mask is unknown at compile-time. | ||||||||
1233 | if (isa<ScalableVectorType>(Shuf->getType())) | ||||||||
1234 | return nullptr; | ||||||||
1235 | |||||||||
1236 | // Check for a constant insertion index. | ||||||||
1237 | uint64_t IdxC; | ||||||||
1238 | if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) | ||||||||
1239 | return nullptr; | ||||||||
1240 | |||||||||
1241 | // Check if this insert's scalar op is extracted from the identity shuffle's | ||||||||
1242 | // input vector. | ||||||||
1243 | Value *Scalar = InsElt.getOperand(1); | ||||||||
1244 | Value *X = Shuf->getOperand(0); | ||||||||
1245 | if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC)))) | ||||||||
1246 | return nullptr; | ||||||||
1247 | |||||||||
1248 | // Replace the shuffle mask element at the index of this extract+insert with | ||||||||
1249 | // that same index value. | ||||||||
1250 | // For example: | ||||||||
1251 | // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask' | ||||||||
1252 | unsigned NumMaskElts = | ||||||||
1253 | cast<FixedVectorType>(Shuf->getType())->getNumElements(); | ||||||||
1254 | SmallVector<int, 16> NewMask(NumMaskElts); | ||||||||
1255 | ArrayRef<int> OldMask = Shuf->getShuffleMask(); | ||||||||
1256 | for (unsigned i = 0; i != NumMaskElts; ++i) { | ||||||||
1257 | if (i != IdxC) { | ||||||||
1258 | // All mask elements besides the inserted element remain the same. | ||||||||
1259 | NewMask[i] = OldMask[i]; | ||||||||
1260 | } else if (OldMask[i] == (int)IdxC) { | ||||||||
1261 | // If the mask element was already set, there's nothing to do | ||||||||
1262 | // (demanded elements analysis may unset it later). | ||||||||
1263 | return nullptr; | ||||||||
1264 | } else { | ||||||||
1265 | assert(OldMask[i] == UndefMaskElem &&((void)0) | ||||||||
1266 | "Unexpected shuffle mask element for identity shuffle")((void)0); | ||||||||
1267 | NewMask[i] = IdxC; | ||||||||
1268 | } | ||||||||
1269 | } | ||||||||
1270 | |||||||||
1271 | return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask); | ||||||||
1272 | } | ||||||||
1273 | |||||||||
1274 | /// If we have an insertelement instruction feeding into another insertelement | ||||||||
1275 | /// and the 2nd is inserting a constant into the vector, canonicalize that | ||||||||
1276 | /// constant insertion before the insertion of a variable: | ||||||||
1277 | /// | ||||||||
1278 | /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 --> | ||||||||
1279 | /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1 | ||||||||
1280 | /// | ||||||||
1281 | /// This has the potential of eliminating the 2nd insertelement instruction | ||||||||
1282 | /// via constant folding of the scalar constant into a vector constant. | ||||||||
1283 | static Instruction *hoistInsEltConst(InsertElementInst &InsElt2, | ||||||||
1284 | InstCombiner::BuilderTy &Builder) { | ||||||||
1285 | auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0)); | ||||||||
1286 | if (!InsElt1 || !InsElt1->hasOneUse()) | ||||||||
1287 | return nullptr; | ||||||||
1288 | |||||||||
1289 | Value *X, *Y; | ||||||||
1290 | Constant *ScalarC; | ||||||||
1291 | ConstantInt *IdxC1, *IdxC2; | ||||||||
1292 | if (match(InsElt1->getOperand(0), m_Value(X)) && | ||||||||
1293 | match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) && | ||||||||
1294 | match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) && | ||||||||
1295 | match(InsElt2.getOperand(1), m_Constant(ScalarC)) && | ||||||||
1296 | match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) { | ||||||||
1297 | Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2); | ||||||||
1298 | return InsertElementInst::Create(NewInsElt1, Y, IdxC1); | ||||||||
1299 | } | ||||||||
1300 | |||||||||
1301 | return nullptr; | ||||||||
1302 | } | ||||||||
1303 | |||||||||
1304 | /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex | ||||||||
1305 | /// --> shufflevector X, CVec', Mask' | ||||||||
1306 | static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { | ||||||||
1307 | auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0)); | ||||||||
1308 | // Bail out if the parent has more than one use. In that case, we'd be | ||||||||
1309 | // replacing the insertelt with a shuffle, and that's not a clear win. | ||||||||
1310 | if (!Inst || !Inst->hasOneUse()) | ||||||||
1311 | return nullptr; | ||||||||
1312 | if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) { | ||||||||
1313 | // The shuffle must have a constant vector operand. The insertelt must have | ||||||||
1314 | // a constant scalar being inserted at a constant position in the vector. | ||||||||
1315 | Constant *ShufConstVec, *InsEltScalar; | ||||||||
1316 | uint64_t InsEltIndex; | ||||||||
1317 | if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || | ||||||||
1318 | !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || | ||||||||
1319 | !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) | ||||||||
1320 | return nullptr; | ||||||||
1321 | |||||||||
1322 | // Adding an element to an arbitrary shuffle could be expensive, but a | ||||||||
1323 | // shuffle that selects elements from vectors without crossing lanes is | ||||||||
1324 | // assumed cheap. | ||||||||
1325 | // If we're just adding a constant into that shuffle, it will still be | ||||||||
1326 | // cheap. | ||||||||
1327 | if (!isShuffleEquivalentToSelect(*Shuf)) | ||||||||
1328 | return nullptr; | ||||||||
1329 | |||||||||
1330 | // From the above 'select' check, we know that the mask has the same number | ||||||||
1331 | // of elements as the vector input operands. We also know that each constant | ||||||||
1332 | // input element is used in its lane and can not be used more than once by | ||||||||
1333 | // the shuffle. Therefore, replace the constant in the shuffle's constant | ||||||||
1334 | // vector with the insertelt constant. Replace the constant in the shuffle's | ||||||||
1335 | // mask vector with the insertelt index plus the length of the vector | ||||||||
1336 | // (because the constant vector operand of a shuffle is always the 2nd | ||||||||
1337 | // operand). | ||||||||
1338 | ArrayRef<int> Mask = Shuf->getShuffleMask(); | ||||||||
1339 | unsigned NumElts = Mask.size(); | ||||||||
1340 | SmallVector<Constant *, 16> NewShufElts(NumElts); | ||||||||
1341 | SmallVector<int, 16> NewMaskElts(NumElts); | ||||||||
1342 | for (unsigned I = 0; I != NumElts; ++I) { | ||||||||
1343 | if (I == InsEltIndex) { | ||||||||
1344 | NewShufElts[I] = InsEltScalar; | ||||||||
1345 | NewMaskElts[I] = InsEltIndex + NumElts; | ||||||||
1346 | } else { | ||||||||
1347 | // Copy over the existing values. | ||||||||
1348 | NewShufElts[I] = ShufConstVec->getAggregateElement(I); | ||||||||
1349 | NewMaskElts[I] = Mask[I]; | ||||||||
1350 | } | ||||||||
1351 | } | ||||||||
1352 | |||||||||
1353 | // Create new operands for a shuffle that includes the constant of the | ||||||||
1354 | // original insertelt. The old shuffle will be dead now. | ||||||||
1355 | return new ShuffleVectorInst(Shuf->getOperand(0), | ||||||||
1356 | ConstantVector::get(NewShufElts), NewMaskElts); | ||||||||
1357 | } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) { | ||||||||
1358 | // Transform sequences of insertelements ops with constant data/indexes into | ||||||||
1359 | // a single shuffle op. | ||||||||
1360 | // Can not handle scalable type, the number of elements needed to create | ||||||||
1361 | // shuffle mask is not a compile-time constant. | ||||||||
1362 | if (isa<ScalableVectorType>(InsElt.getType())) | ||||||||
1363 | return nullptr; | ||||||||
1364 | unsigned NumElts = | ||||||||
1365 | cast<FixedVectorType>(InsElt.getType())->getNumElements(); | ||||||||
1366 | |||||||||
1367 | uint64_t InsertIdx[2]; | ||||||||
1368 | Constant *Val[2]; | ||||||||
1369 | if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || | ||||||||
1370 | !match(InsElt.getOperand(1), m_Constant(Val[0])) || | ||||||||
1371 | !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || | ||||||||
1372 | !match(IEI->getOperand(1), m_Constant(Val[1]))) | ||||||||
1373 | return nullptr; | ||||||||
1374 | SmallVector<Constant *, 16> Values(NumElts); | ||||||||
1375 | SmallVector<int, 16> Mask(NumElts); | ||||||||
1376 | auto ValI = std::begin(Val); | ||||||||
1377 | // Generate new constant vector and mask. | ||||||||
1378 | // We have 2 values/masks from the insertelements instructions. Insert them | ||||||||
1379 | // into new value/mask vectors. | ||||||||
1380 | for (uint64_t I : InsertIdx) { | ||||||||
1381 | if (!Values[I]) { | ||||||||
1382 | Values[I] = *ValI; | ||||||||
1383 | Mask[I] = NumElts + I; | ||||||||
1384 | } | ||||||||
1385 | ++ValI; | ||||||||
1386 | } | ||||||||
1387 | // Remaining values are filled with 'undef' values. | ||||||||
1388 | for (unsigned I = 0; I < NumElts; ++I) { | ||||||||
1389 | if (!Values[I]) { | ||||||||
1390 | Values[I] = UndefValue::get(InsElt.getType()->getElementType()); | ||||||||
1391 | Mask[I] = I; | ||||||||
1392 | } | ||||||||
1393 | } | ||||||||
1394 | // Create new operands for a shuffle that includes the constant of the | ||||||||
1395 | // original insertelt. | ||||||||
1396 | return new ShuffleVectorInst(IEI->getOperand(0), | ||||||||
1397 | ConstantVector::get(Values), Mask); | ||||||||
1398 | } | ||||||||
1399 | return nullptr; | ||||||||
1400 | } | ||||||||
1401 | |||||||||
1402 | Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) { | ||||||||
1403 | Value *VecOp = IE.getOperand(0); | ||||||||
1404 | Value *ScalarOp = IE.getOperand(1); | ||||||||
1405 | Value *IdxOp = IE.getOperand(2); | ||||||||
1406 | |||||||||
1407 | if (auto *V = SimplifyInsertElementInst( | ||||||||
1408 | VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) | ||||||||
1409 | return replaceInstUsesWith(IE, V); | ||||||||
1410 | |||||||||
1411 | // If the scalar is bitcast and inserted into undef, do the insert in the | ||||||||
1412 | // source type followed by bitcast. | ||||||||
1413 | // TODO: Generalize for insert into any constant, not just undef? | ||||||||
1414 | Value *ScalarSrc; | ||||||||
1415 | if (match(VecOp, m_Undef()) && | ||||||||
1416 | match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) && | ||||||||
1417 | (ScalarSrc->getType()->isIntegerTy() || | ||||||||
1418 | ScalarSrc->getType()->isFloatingPointTy())) { | ||||||||
1419 | // inselt undef, (bitcast ScalarSrc), IdxOp --> | ||||||||
1420 | // bitcast (inselt undef, ScalarSrc, IdxOp) | ||||||||
1421 | Type *ScalarTy = ScalarSrc->getType(); | ||||||||
1422 | Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); | ||||||||
1423 | UndefValue *NewUndef = UndefValue::get(VecTy); | ||||||||
1424 | Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); | ||||||||
1425 | return new BitCastInst(NewInsElt, IE.getType()); | ||||||||
1426 | } | ||||||||
1427 | |||||||||
1428 | // If the vector and scalar are both bitcast from the same element type, do | ||||||||
1429 | // the insert in that source type followed by bitcast. | ||||||||
1430 | Value *VecSrc; | ||||||||
1431 | if (match(VecOp, m_BitCast(m_Value(VecSrc))) && | ||||||||
1432 | match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && | ||||||||
1433 | (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && | ||||||||
1434 | VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() && | ||||||||
1435 | cast<VectorType>(VecSrc->getType())->getElementType() == | ||||||||
1436 | ScalarSrc->getType()) { | ||||||||
1437 | // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp --> | ||||||||
1438 | // bitcast (inselt VecSrc, ScalarSrc, IdxOp) | ||||||||
1439 | Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp); | ||||||||
1440 | return new BitCastInst(NewInsElt, IE.getType()); | ||||||||
1441 | } | ||||||||
1442 | |||||||||
1443 | // If the inserted element was extracted from some other fixed-length vector | ||||||||
1444 | // and both indexes are valid constants, try to turn this into a shuffle. | ||||||||
1445 | // Can not handle scalable vector type, the number of elements needed to | ||||||||
1446 | // create shuffle mask is not a compile-time constant. | ||||||||
1447 | uint64_t InsertedIdx, ExtractedIdx; | ||||||||
1448 | Value *ExtVecOp; | ||||||||
1449 | if (isa<FixedVectorType>(IE.getType()) && | ||||||||
1450 | match(IdxOp, m_ConstantInt(InsertedIdx)) && | ||||||||
1451 | match(ScalarOp, | ||||||||
1452 | m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) && | ||||||||
1453 | isa<FixedVectorType>(ExtVecOp->getType()) && | ||||||||
1454 | ExtractedIdx < | ||||||||
1455 | cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) { | ||||||||
1456 | // TODO: Looking at the user(s) to determine if this insert is a | ||||||||
1457 | // fold-to-shuffle opportunity does not match the usual instcombine | ||||||||
1458 | // constraints. We should decide if the transform is worthy based only | ||||||||
1459 | // on this instruction and its operands, but that may not work currently. | ||||||||
1460 | // | ||||||||
1461 | // Here, we are trying to avoid creating shuffles before reaching | ||||||||
1462 | // the end of a chain of extract-insert pairs. This is complicated because | ||||||||
1463 | // we do not generally form arbitrary shuffle masks in instcombine | ||||||||
1464 | // (because those may codegen poorly), but collectShuffleElements() does | ||||||||
1465 | // exactly that. | ||||||||
1466 | // | ||||||||
1467 | // The rules for determining what is an acceptable target-independent | ||||||||
1468 | // shuffle mask are fuzzy because they evolve based on the backend's | ||||||||
1469 | // capabilities and real-world impact. | ||||||||
1470 | auto isShuffleRootCandidate = [](InsertElementInst &Insert) { | ||||||||
1471 | if (!Insert.hasOneUse()) | ||||||||
1472 | return true; | ||||||||
1473 | auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back()); | ||||||||
1474 | if (!InsertUser) | ||||||||
1475 | return true; | ||||||||
1476 | return false; | ||||||||
1477 | }; | ||||||||
1478 | |||||||||
1479 | // Try to form a shuffle from a chain of extract-insert ops. | ||||||||
1480 | if (isShuffleRootCandidate(IE)) { | ||||||||
1481 | SmallVector<int, 16> Mask; | ||||||||
1482 | ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this); | ||||||||
1483 | |||||||||
1484 | // The proposed shuffle may be trivial, in which case we shouldn't | ||||||||
1485 | // perform the combine. | ||||||||
1486 | if (LR.first != &IE && LR.second != &IE) { | ||||||||
1487 | // We now have a shuffle of LHS, RHS, Mask. | ||||||||
1488 | if (LR.second == nullptr) | ||||||||
1489 | LR.second = UndefValue::get(LR.first->getType()); | ||||||||
1490 | return new ShuffleVectorInst(LR.first, LR.second, Mask); | ||||||||
1491 | } | ||||||||
1492 | } | ||||||||
1493 | } | ||||||||
1494 | |||||||||
1495 | if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) { | ||||||||
1496 | unsigned VWidth = VecTy->getNumElements(); | ||||||||
1497 | APInt UndefElts(VWidth, 0); | ||||||||
1498 | APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); | ||||||||
1499 | if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { | ||||||||
1500 | if (V != &IE) | ||||||||
1501 | return replaceInstUsesWith(IE, V); | ||||||||
1502 | return &IE; | ||||||||
1503 | } | ||||||||
1504 | } | ||||||||
1505 | |||||||||
1506 | if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) | ||||||||
1507 | return Shuf; | ||||||||
1508 | |||||||||
1509 | if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) | ||||||||
1510 | return NewInsElt; | ||||||||
1511 | |||||||||
1512 | if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE)) | ||||||||
1513 | return Broadcast; | ||||||||
1514 | |||||||||
1515 | if (Instruction *Splat = foldInsEltIntoSplat(IE)) | ||||||||
1516 | return Splat; | ||||||||
1517 | |||||||||
1518 | if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE)) | ||||||||
1519 | return IdentityShuf; | ||||||||
1520 | |||||||||
1521 | return nullptr; | ||||||||
1522 | } | ||||||||
1523 | |||||||||
1524 | /// Return true if we can evaluate the specified expression tree if the vector | ||||||||
1525 | /// elements were shuffled in a different order. | ||||||||
1526 | static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, | ||||||||
1527 | unsigned Depth = 5) { | ||||||||
1528 | // We can always reorder the elements of a constant. | ||||||||
1529 | if (isa<Constant>(V)) | ||||||||
1530 | return true; | ||||||||
1531 | |||||||||
1532 | // We won't reorder vector arguments. No IPO here. | ||||||||
1533 | Instruction *I = dyn_cast<Instruction>(V); | ||||||||
1534 | if (!I) return false; | ||||||||
1535 | |||||||||
1536 | // Two users may expect different orders of the elements. Don't try it. | ||||||||
1537 | if (!I->hasOneUse()) | ||||||||
1538 | return false; | ||||||||
1539 | |||||||||
1540 | if (Depth == 0) return false; | ||||||||
1541 | |||||||||
1542 | switch (I->getOpcode()) { | ||||||||
1543 | case Instruction::UDiv: | ||||||||
1544 | case Instruction::SDiv: | ||||||||
1545 | case Instruction::URem: | ||||||||
1546 | case Instruction::SRem: | ||||||||
1547 | // Propagating an undefined shuffle mask element to integer div/rem is not | ||||||||
1548 | // allowed because those opcodes can create immediate undefined behavior | ||||||||
1549 | // from an undefined element in an operand. | ||||||||
1550 | if (llvm::is_contained(Mask, -1)) | ||||||||
1551 | return false; | ||||||||
1552 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||
1553 | case Instruction::Add: | ||||||||
1554 | case Instruction::FAdd: | ||||||||
1555 | case Instruction::Sub: | ||||||||
1556 | case Instruction::FSub: | ||||||||
1557 | case Instruction::Mul: | ||||||||
1558 | case Instruction::FMul: | ||||||||
1559 | case Instruction::FDiv: | ||||||||
1560 | case Instruction::FRem: | ||||||||
1561 | case Instruction::Shl: | ||||||||
1562 | case Instruction::LShr: | ||||||||
1563 | case Instruction::AShr: | ||||||||
1564 | case Instruction::And: | ||||||||
1565 | case Instruction::Or: | ||||||||
1566 | case Instruction::Xor: | ||||||||
1567 | case Instruction::ICmp: | ||||||||
1568 | case Instruction::FCmp: | ||||||||
1569 | case Instruction::Trunc: | ||||||||
1570 | case Instruction::ZExt: | ||||||||
1571 | case Instruction::SExt: | ||||||||
1572 | case Instruction::FPToUI: | ||||||||
1573 | case Instruction::FPToSI: | ||||||||
1574 | case Instruction::UIToFP: | ||||||||
1575 | case Instruction::SIToFP: | ||||||||
1576 | case Instruction::FPTrunc: | ||||||||
1577 | case Instruction::FPExt: | ||||||||
1578 | case Instruction::GetElementPtr: { | ||||||||
1579 | // Bail out if we would create longer vector ops. We could allow creating | ||||||||
1580 | // longer vector ops, but that may result in more expensive codegen. | ||||||||
1581 | Type *ITy = I->getType(); | ||||||||
1582 | if (ITy->isVectorTy() && | ||||||||
1583 | Mask.size() > cast<FixedVectorType>(ITy)->getNumElements()) | ||||||||
1584 | return false; | ||||||||
1585 | for (Value *Operand : I->operands()) { | ||||||||
1586 | if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) | ||||||||
1587 | return false; | ||||||||
1588 | } | ||||||||
1589 | return true; | ||||||||
1590 | } | ||||||||
1591 | case Instruction::InsertElement: { | ||||||||
1592 | ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); | ||||||||
1593 | if (!CI) return false; | ||||||||
1594 | int ElementNumber = CI->getLimitedValue(); | ||||||||
1595 | |||||||||
1596 | // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' | ||||||||
1597 | // can't put an element into multiple indices. | ||||||||
1598 | bool SeenOnce = false; | ||||||||
1599 | for (int i = 0, e = Mask.size(); i != e; ++i) { | ||||||||
1600 | if (Mask[i] == ElementNumber) { | ||||||||
1601 | if (SeenOnce) | ||||||||
1602 | return false; | ||||||||
1603 | SeenOnce = true; | ||||||||
1604 | } | ||||||||
1605 | } | ||||||||
1606 | return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1); | ||||||||
1607 | } | ||||||||
1608 | } | ||||||||
1609 | return false; | ||||||||
1610 | } | ||||||||
1611 | |||||||||
1612 | /// Rebuild a new instruction just like 'I' but with the new operands given. | ||||||||
1613 | /// In the event of type mismatch, the type of the operands is correct. | ||||||||
1614 | static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { | ||||||||
1615 | // We don't want to use the IRBuilder here because we want the replacement | ||||||||
1616 | // instructions to appear next to 'I', not the builder's insertion point. | ||||||||
1617 | switch (I->getOpcode()) { | ||||||||
1618 | case Instruction::Add: | ||||||||
1619 | case Instruction::FAdd: | ||||||||
1620 | case Instruction::Sub: | ||||||||
1621 | case Instruction::FSub: | ||||||||
1622 | case Instruction::Mul: | ||||||||
1623 | case Instruction::FMul: | ||||||||
1624 | case Instruction::UDiv: | ||||||||
1625 | case Instruction::SDiv: | ||||||||
1626 | case Instruction::FDiv: | ||||||||
1627 | case Instruction::URem: | ||||||||
1628 | case Instruction::SRem: | ||||||||
1629 | case Instruction::FRem: | ||||||||
1630 | case Instruction::Shl: | ||||||||
1631 | case Instruction::LShr: | ||||||||
1632 | case Instruction::AShr: | ||||||||
1633 | case Instruction::And: | ||||||||
1634 | case Instruction::Or: | ||||||||
1635 | case Instruction::Xor: { | ||||||||
1636 | BinaryOperator *BO = cast<BinaryOperator>(I); | ||||||||
1637 | assert(NewOps.size() == 2 && "binary operator with #ops != 2")((void)0); | ||||||||
1638 | BinaryOperator *New = | ||||||||
1639 | BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), | ||||||||
1640 | NewOps[0], NewOps[1], "", BO); | ||||||||
1641 | if (isa<OverflowingBinaryOperator>(BO)) { | ||||||||
1642 | New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); | ||||||||
1643 | New->setHasNoSignedWrap(BO->hasNoSignedWrap()); | ||||||||
1644 | } | ||||||||
1645 | if (isa<PossiblyExactOperator>(BO)) { | ||||||||
1646 | New->setIsExact(BO->isExact()); | ||||||||
1647 | } | ||||||||
1648 | if (isa<FPMathOperator>(BO)) | ||||||||
1649 | New->copyFastMathFlags(I); | ||||||||
1650 | return New; | ||||||||
1651 | } | ||||||||
1652 | case Instruction::ICmp: | ||||||||
1653 | assert(NewOps.size() == 2 && "icmp with #ops != 2")((void)0); | ||||||||
1654 | return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), | ||||||||
1655 | NewOps[0], NewOps[1]); | ||||||||
1656 | case Instruction::FCmp: | ||||||||
1657 | assert(NewOps.size() == 2 && "fcmp with #ops != 2")((void)0); | ||||||||
1658 | return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), | ||||||||
1659 | NewOps[0], NewOps[1]); | ||||||||
1660 | case Instruction::Trunc: | ||||||||
1661 | case Instruction::ZExt: | ||||||||
1662 | case Instruction::SExt: | ||||||||
1663 | case Instruction::FPToUI: | ||||||||
1664 | case Instruction::FPToSI: | ||||||||
1665 | case Instruction::UIToFP: | ||||||||
1666 | case Instruction::SIToFP: | ||||||||
1667 | case Instruction::FPTrunc: | ||||||||
1668 | case Instruction::FPExt: { | ||||||||
1669 | // It's possible that the mask has a different number of elements from | ||||||||
1670 | // the original cast. We recompute the destination type to match the mask. | ||||||||
1671 | Type *DestTy = VectorType::get( | ||||||||
1672 | I->getType()->getScalarType(), | ||||||||
1673 | cast<VectorType>(NewOps[0]->getType())->getElementCount()); | ||||||||
1674 | assert(NewOps.size() == 1 && "cast with #ops != 1")((void)0); | ||||||||
1675 | return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, | ||||||||
1676 | "", I); | ||||||||
1677 | } | ||||||||
1678 | case Instruction::GetElementPtr: { | ||||||||
1679 | Value *Ptr = NewOps[0]; | ||||||||
1680 | ArrayRef<Value*> Idx = NewOps.slice(1); | ||||||||
1681 | GetElementPtrInst *GEP = GetElementPtrInst::Create( | ||||||||
1682 | cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); | ||||||||
1683 | GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); | ||||||||
1684 | return GEP; | ||||||||
1685 | } | ||||||||
1686 | } | ||||||||
1687 | llvm_unreachable("failed to rebuild vector instructions")__builtin_unreachable(); | ||||||||
1688 | } | ||||||||
1689 | |||||||||
1690 | static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { | ||||||||
1691 | // Mask.size() does not need to be equal to the number of vector elements. | ||||||||
1692 | |||||||||
1693 | assert(V->getType()->isVectorTy() && "can't reorder non-vector elements")((void)0); | ||||||||
1694 | Type *EltTy = V->getType()->getScalarType(); | ||||||||
1695 | Type *I32Ty = IntegerType::getInt32Ty(V->getContext()); | ||||||||
1696 | if (match(V, m_Undef())) | ||||||||
1697 | return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); | ||||||||
1698 | |||||||||
1699 | if (isa<ConstantAggregateZero>(V)) | ||||||||
1700 | return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size())); | ||||||||
1701 | |||||||||
1702 | if (Constant *C = dyn_cast<Constant>(V)) | ||||||||
1703 | return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()), | ||||||||
1704 | Mask); | ||||||||
1705 | |||||||||
1706 | Instruction *I = cast<Instruction>(V); | ||||||||
1707 | switch (I->getOpcode()) { | ||||||||
1708 | case Instruction::Add: | ||||||||
1709 | case Instruction::FAdd: | ||||||||
1710 | case Instruction::Sub: | ||||||||
1711 | case Instruction::FSub: | ||||||||
1712 | case Instruction::Mul: | ||||||||
1713 | case Instruction::FMul: | ||||||||
1714 | case Instruction::UDiv: | ||||||||
1715 | case Instruction::SDiv: | ||||||||
1716 | case Instruction::FDiv: | ||||||||
1717 | case Instruction::URem: | ||||||||
1718 | case Instruction::SRem: | ||||||||
1719 | case Instruction::FRem: | ||||||||
1720 | case Instruction::Shl: | ||||||||
1721 | case Instruction::LShr: | ||||||||
1722 | case Instruction::AShr: | ||||||||
1723 | case Instruction::And: | ||||||||
1724 | case Instruction::Or: | ||||||||
1725 | case Instruction::Xor: | ||||||||
1726 | case Instruction::ICmp: | ||||||||
1727 | case Instruction::FCmp: | ||||||||
1728 | case Instruction::Trunc: | ||||||||
1729 | case Instruction::ZExt: | ||||||||
1730 | case Instruction::SExt: | ||||||||
1731 | case Instruction::FPToUI: | ||||||||
1732 | case Instruction::FPToSI: | ||||||||
1733 | case Instruction::UIToFP: | ||||||||
1734 | case Instruction::SIToFP: | ||||||||
1735 | case Instruction::FPTrunc: | ||||||||
1736 | case Instruction::FPExt: | ||||||||
1737 | case Instruction::Select: | ||||||||
1738 | case Instruction::GetElementPtr: { | ||||||||
1739 | SmallVector<Value*, 8> NewOps; | ||||||||
1740 | bool NeedsRebuild = | ||||||||
1741 | (Mask.size() != | ||||||||
1742 | cast<FixedVectorType>(I->getType())->getNumElements()); | ||||||||
1743 | for (int i = 0, e = I->getNumOperands(); i != e; ++i) { | ||||||||
1744 | Value *V; | ||||||||
1745 | // Recursively call evaluateInDifferentElementOrder on vector arguments | ||||||||
1746 | // as well. E.g. GetElementPtr may have scalar operands even if the | ||||||||
1747 | // return value is a vector, so we need to examine the operand type. | ||||||||
1748 | if (I->getOperand(i)->getType()->isVectorTy()) | ||||||||
1749 | V = evaluateInDifferentElementOrder(I->getOperand(i), Mask); | ||||||||
1750 | else | ||||||||
1751 | V = I->getOperand(i); | ||||||||
1752 | NewOps.push_back(V); | ||||||||
1753 | NeedsRebuild |= (V != I->getOperand(i)); | ||||||||
1754 | } | ||||||||
1755 | if (NeedsRebuild) { | ||||||||
1756 | return buildNew(I, NewOps); | ||||||||
1757 | } | ||||||||
1758 | return I; | ||||||||
1759 | } | ||||||||
1760 | case Instruction::InsertElement: { | ||||||||
1761 | int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); | ||||||||
1762 | |||||||||
1763 | // The insertelement was inserting at Element. Figure out which element | ||||||||
1764 | // that becomes after shuffling. The answer is guaranteed to be unique | ||||||||
1765 | // by CanEvaluateShuffled. | ||||||||
1766 | bool Found = false; | ||||||||
1767 | int Index = 0; | ||||||||
1768 | for (int e = Mask.size(); Index != e; ++Index) { | ||||||||
1769 | if (Mask[Index] == Element) { | ||||||||
1770 | Found = true; | ||||||||
1771 | break; | ||||||||
1772 | } | ||||||||
1773 | } | ||||||||
1774 | |||||||||
1775 | // If element is not in Mask, no need to handle the operand 1 (element to | ||||||||
1776 | // be inserted). Just evaluate values in operand 0 according to Mask. | ||||||||
1777 | if (!Found) | ||||||||
1778 | return evaluateInDifferentElementOrder(I->getOperand(0), Mask); | ||||||||
1779 | |||||||||
1780 | Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask); | ||||||||
1781 | return InsertElementInst::Create(V, I->getOperand(1), | ||||||||
1782 | ConstantInt::get(I32Ty, Index), "", I); | ||||||||
1783 | } | ||||||||
1784 | } | ||||||||
1785 | llvm_unreachable("failed to reorder elements of vector instruction!")__builtin_unreachable(); | ||||||||
1786 | } | ||||||||
1787 | |||||||||
1788 | // Returns true if the shuffle is extracting a contiguous range of values from | ||||||||
1789 | // LHS, for example: | ||||||||
1790 | // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||||||||
1791 | // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| | ||||||||
1792 | // Shuffles to: |EE|FF|GG|HH| | ||||||||
1793 | // +--+--+--+--+ | ||||||||
1794 | static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, | ||||||||
1795 | ArrayRef<int> Mask) { | ||||||||
1796 | unsigned LHSElems = | ||||||||
1797 | cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements(); | ||||||||
1798 | unsigned MaskElems = Mask.size(); | ||||||||
1799 | unsigned BegIdx = Mask.front(); | ||||||||
1800 | unsigned EndIdx = Mask.back(); | ||||||||
1801 | if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) | ||||||||
1802 | return false; | ||||||||
1803 | for (unsigned I = 0; I != MaskElems; ++I) | ||||||||
1804 | if (static_cast<unsigned>(Mask[I]) != BegIdx + I) | ||||||||
1805 | return false; | ||||||||
1806 | return true; | ||||||||
1807 | } | ||||||||
1808 | |||||||||
1809 | /// These are the ingredients in an alternate form binary operator as described | ||||||||
1810 | /// below. | ||||||||
1811 | struct BinopElts { | ||||||||
1812 | BinaryOperator::BinaryOps Opcode; | ||||||||
1813 | Value *Op0; | ||||||||
1814 | Value *Op1; | ||||||||
1815 | BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0, | ||||||||
1816 | Value *V0 = nullptr, Value *V1 = nullptr) : | ||||||||
1817 | Opcode(Opc), Op0(V0), Op1(V1) {} | ||||||||
1818 | operator bool() const { return Opcode != 0; } | ||||||||
1819 | }; | ||||||||
1820 | |||||||||
1821 | /// Binops may be transformed into binops with different opcodes and operands. | ||||||||
1822 | /// Reverse the usual canonicalization to enable folds with the non-canonical | ||||||||
1823 | /// form of the binop. If a transform is possible, return the elements of the | ||||||||
1824 | /// new binop. If not, return invalid elements. | ||||||||
1825 | static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { | ||||||||
1826 | Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1); | ||||||||
1827 | Type *Ty = BO->getType(); | ||||||||
1828 | switch (BO->getOpcode()) { | ||||||||
1829 | case Instruction::Shl: { | ||||||||
1830 | // shl X, C --> mul X, (1 << C) | ||||||||
1831 | Constant *C; | ||||||||
1832 | if (match(BO1, m_Constant(C))) { | ||||||||
1833 | Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C); | ||||||||
1834 | return { Instruction::Mul, BO0, ShlOne }; | ||||||||
1835 | } | ||||||||
1836 | break; | ||||||||
1837 | } | ||||||||
1838 | case Instruction::Or: { | ||||||||
1839 | // or X, C --> add X, C (when X and C have no common bits set) | ||||||||
1840 | const APInt *C; | ||||||||
1841 | if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL)) | ||||||||
1842 | return { Instruction::Add, BO0, BO1 }; | ||||||||
1843 | break; | ||||||||
1844 | } | ||||||||
1845 | default: | ||||||||
1846 | break; | ||||||||
1847 | } | ||||||||
1848 | return {}; | ||||||||
1849 | } | ||||||||
1850 | |||||||||
1851 | static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { | ||||||||
1852 | assert(Shuf.isSelect() && "Must have select-equivalent shuffle")((void)0); | ||||||||
1853 | |||||||||
1854 | // Are we shuffling together some value and that same value after it has been | ||||||||
1855 | // modified by a binop with a constant? | ||||||||
1856 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||||||
1857 | Constant *C; | ||||||||
1858 | bool Op0IsBinop; | ||||||||
1859 | if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C)))) | ||||||||
1860 | Op0IsBinop = true; | ||||||||
1861 | else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C)))) | ||||||||
1862 | Op0IsBinop = false; | ||||||||
1863 | else | ||||||||
1864 | return nullptr; | ||||||||
1865 | |||||||||
1866 | // The identity constant for a binop leaves a variable operand unchanged. For | ||||||||
1867 | // a vector, this is a splat of something like 0, -1, or 1. | ||||||||
1868 | // If there's no identity constant for this binop, we're done. | ||||||||
1869 | auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1); | ||||||||
1870 | BinaryOperator::BinaryOps BOpcode = BO->getOpcode(); | ||||||||
1871 | Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true); | ||||||||
1872 | if (!IdC) | ||||||||
1873 | return nullptr; | ||||||||
1874 | |||||||||
1875 | // Shuffle identity constants into the lanes that return the original value. | ||||||||
1876 | // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4} | ||||||||
1877 | // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4} | ||||||||
1878 | // The existing binop constant vector remains in the same operand position. | ||||||||
1879 | ArrayRef<int> Mask = Shuf.getShuffleMask(); | ||||||||
1880 | Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) : | ||||||||
1881 | ConstantExpr::getShuffleVector(IdC, C, Mask); | ||||||||
1882 | |||||||||
1883 | bool MightCreatePoisonOrUB = | ||||||||
1884 | is_contained(Mask, UndefMaskElem) && | ||||||||
1885 | (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); | ||||||||
1886 | if (MightCreatePoisonOrUB) | ||||||||
1887 | NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true); | ||||||||
1888 | |||||||||
1889 | // shuf (bop X, C), X, M --> bop X, C' | ||||||||
1890 | // shuf X, (bop X, C), M --> bop X, C' | ||||||||
1891 | Value *X = Op0IsBinop ? Op1 : Op0; | ||||||||
1892 | Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC); | ||||||||
1893 | NewBO->copyIRFlags(BO); | ||||||||
1894 | |||||||||
1895 | // An undef shuffle mask element may propagate as an undef constant element in | ||||||||
1896 | // the new binop. That would produce poison where the original code might not. | ||||||||
1897 | // If we already made a safe constant, then there's no danger. | ||||||||
1898 | if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB) | ||||||||
1899 | NewBO->dropPoisonGeneratingFlags(); | ||||||||
1900 | return NewBO; | ||||||||
1901 | } | ||||||||
1902 | |||||||||
1903 | /// If we have an insert of a scalar to a non-zero element of an undefined | ||||||||
1904 | /// vector and then shuffle that value, that's the same as inserting to the zero | ||||||||
1905 | /// element and shuffling. Splatting from the zero element is recognized as the | ||||||||
1906 | /// canonical form of splat. | ||||||||
1907 | static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf, | ||||||||
1908 | InstCombiner::BuilderTy &Builder) { | ||||||||
1909 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||||||
1910 | ArrayRef<int> Mask = Shuf.getShuffleMask(); | ||||||||
1911 | Value *X; | ||||||||
1912 | uint64_t IndexC; | ||||||||
1913 | |||||||||
1914 | // Match a shuffle that is a splat to a non-zero element. | ||||||||
1915 | if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X), | ||||||||
1916 | m_ConstantInt(IndexC)))) || | ||||||||
1917 | !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0) | ||||||||
1918 | return nullptr; | ||||||||
1919 | |||||||||
1920 | // Insert into element 0 of an undef vector. | ||||||||
1921 | UndefValue *UndefVec = UndefValue::get(Shuf.getType()); | ||||||||
1922 | Constant *Zero = Builder.getInt32(0); | ||||||||
1923 | Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero); | ||||||||
1924 | |||||||||
1925 | // Splat from element 0. Any mask element that is undefined remains undefined. | ||||||||
1926 | // For example: | ||||||||
1927 | // shuf (inselt undef, X, 2), undef, <2,2,undef> | ||||||||
1928 | // --> shuf (inselt undef, X, 0), undef, <0,0,undef> | ||||||||
1929 | unsigned NumMaskElts = | ||||||||
1930 | cast<FixedVectorType>(Shuf.getType())->getNumElements(); | ||||||||
1931 | SmallVector<int, 16> NewMask(NumMaskElts, 0); | ||||||||
1932 | for (unsigned i = 0; i != NumMaskElts; ++i) | ||||||||
1933 | if (Mask[i] == UndefMaskElem) | ||||||||
1934 | NewMask[i] = Mask[i]; | ||||||||
1935 | |||||||||
1936 | return new ShuffleVectorInst(NewIns, UndefVec, NewMask); | ||||||||
1937 | } | ||||||||
1938 | |||||||||
1939 | /// Try to fold shuffles that are the equivalent of a vector select. | ||||||||
1940 | static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, | ||||||||
1941 | InstCombiner::BuilderTy &Builder, | ||||||||
1942 | const DataLayout &DL) { | ||||||||
1943 | if (!Shuf.isSelect()) | ||||||||
1944 | return nullptr; | ||||||||
1945 | |||||||||
1946 | // Canonicalize to choose from operand 0 first unless operand 1 is undefined. | ||||||||
1947 | // Commuting undef to operand 0 conflicts with another canonicalization. | ||||||||
1948 | unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); | ||||||||
1949 | if (!match(Shuf.getOperand(1), m_Undef()) && | ||||||||
1950 | Shuf.getMaskValue(0) >= (int)NumElts) { | ||||||||
1951 | // TODO: Can we assert that both operands of a shuffle-select are not undef | ||||||||
1952 | // (otherwise, it would have been folded by instsimplify? | ||||||||
1953 | Shuf.commute(); | ||||||||
1954 | return &Shuf; | ||||||||
1955 | } | ||||||||
1956 | |||||||||
1957 | if (Instruction *I = foldSelectShuffleWith1Binop(Shuf)) | ||||||||
1958 | return I; | ||||||||
1959 | |||||||||
1960 | BinaryOperator *B0, *B1; | ||||||||
1961 | if (!match(Shuf.getOperand(0), m_BinOp(B0)) || | ||||||||
1962 | !match(Shuf.getOperand(1), m_BinOp(B1))) | ||||||||
1963 | return nullptr; | ||||||||
1964 | |||||||||
1965 | Value *X, *Y; | ||||||||
1966 | Constant *C0, *C1; | ||||||||
1967 | bool ConstantsAreOp1; | ||||||||
1968 | if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) && | ||||||||
1969 | match(B1, m_BinOp(m_Value(Y), m_Constant(C1)))) | ||||||||
1970 | ConstantsAreOp1 = true; | ||||||||
1971 | else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) && | ||||||||
1972 | match(B1, m_BinOp(m_Constant(C1), m_Value(Y)))) | ||||||||
1973 | ConstantsAreOp1 = false; | ||||||||
1974 | else | ||||||||
1975 | return nullptr; | ||||||||
1976 | |||||||||
1977 | // We need matching binops to fold the lanes together. | ||||||||
1978 | BinaryOperator::BinaryOps Opc0 = B0->getOpcode(); | ||||||||
1979 | BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); | ||||||||
1980 | bool DropNSW = false; | ||||||||
1981 | if (ConstantsAreOp1 && Opc0 != Opc1) { | ||||||||
1982 | // TODO: We drop "nsw" if shift is converted into multiply because it may | ||||||||
1983 | // not be correct when the shift amount is BitWidth - 1. We could examine | ||||||||
1984 | // each vector element to determine if it is safe to keep that flag. | ||||||||
1985 | if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl) | ||||||||
1986 | DropNSW = true; | ||||||||
1987 | if (BinopElts AltB0 = getAlternateBinop(B0, DL)) { | ||||||||
1988 | assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop")((void)0); | ||||||||
1989 | Opc0 = AltB0.Opcode; | ||||||||
1990 | C0 = cast<Constant>(AltB0.Op1); | ||||||||
1991 | } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) { | ||||||||
1992 | assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop")((void)0); | ||||||||
1993 | Opc1 = AltB1.Opcode; | ||||||||
1994 | C1 = cast<Constant>(AltB1.Op1); | ||||||||
1995 | } | ||||||||
1996 | } | ||||||||
1997 | |||||||||
1998 | if (Opc0 != Opc1) | ||||||||
1999 | return nullptr; | ||||||||
2000 | |||||||||
2001 | // The opcodes must be the same. Use a new name to make that clear. | ||||||||
2002 | BinaryOperator::BinaryOps BOpc = Opc0; | ||||||||
2003 | |||||||||
2004 | // Select the constant elements needed for the single binop. | ||||||||
2005 | ArrayRef<int> Mask = Shuf.getShuffleMask(); | ||||||||
2006 | Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask); | ||||||||
2007 | |||||||||
2008 | // We are moving a binop after a shuffle. When a shuffle has an undefined | ||||||||
2009 | // mask element, the result is undefined, but it is not poison or undefined | ||||||||
2010 | // behavior. That is not necessarily true for div/rem/shift. | ||||||||
2011 | bool MightCreatePoisonOrUB = | ||||||||
2012 | is_contained(Mask, UndefMaskElem) && | ||||||||
2013 | (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc)); | ||||||||
2014 | if (MightCreatePoisonOrUB) | ||||||||
2015 | NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC, | ||||||||
2016 | ConstantsAreOp1); | ||||||||
2017 | |||||||||
2018 | Value *V; | ||||||||
2019 | if (X == Y) { | ||||||||
2020 | // Remove a binop and the shuffle by rearranging the constant: | ||||||||
2021 | // shuffle (op V, C0), (op V, C1), M --> op V, C' | ||||||||
2022 | // shuffle (op C0, V), (op C1, V), M --> op C', V | ||||||||
2023 | V = X; | ||||||||
2024 | } else { | ||||||||
2025 | // If there are 2 different variable operands, we must create a new shuffle | ||||||||
2026 | // (select) first, so check uses to ensure that we don't end up with more | ||||||||
2027 | // instructions than we started with. | ||||||||
2028 | if (!B0->hasOneUse() && !B1->hasOneUse()) | ||||||||
2029 | return nullptr; | ||||||||
2030 | |||||||||
2031 | // If we use the original shuffle mask and op1 is *variable*, we would be | ||||||||
2032 | // putting an undef into operand 1 of div/rem/shift. This is either UB or | ||||||||
2033 | // poison. We do not have to guard against UB when *constants* are op1 | ||||||||
2034 | // because safe constants guarantee that we do not overflow sdiv/srem (and | ||||||||
2035 | // there's no danger for other opcodes). | ||||||||
2036 | // TODO: To allow this case, create a new shuffle mask with no undefs. | ||||||||
2037 | if (MightCreatePoisonOrUB && !ConstantsAreOp1) | ||||||||
2038 | return nullptr; | ||||||||
2039 | |||||||||
2040 | // Note: In general, we do not create new shuffles in InstCombine because we | ||||||||
2041 | // do not know if a target can lower an arbitrary shuffle optimally. In this | ||||||||
2042 | // case, the shuffle uses the existing mask, so there is no additional risk. | ||||||||
2043 | |||||||||
2044 | // Select the variable vectors first, then perform the binop: | ||||||||
2045 | // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C' | ||||||||
2046 | // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M) | ||||||||
2047 | V = Builder.CreateShuffleVector(X, Y, Mask); | ||||||||
2048 | } | ||||||||
2049 | |||||||||
2050 | Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) : | ||||||||
2051 | BinaryOperator::Create(BOpc, NewC, V); | ||||||||
2052 | |||||||||
2053 | // Flags are intersected from the 2 source binops. But there are 2 exceptions: | ||||||||
2054 | // 1. If we changed an opcode, poison conditions might have changed. | ||||||||
2055 | // 2. If the shuffle had undef mask elements, the new binop might have undefs | ||||||||
2056 | // where the original code did not. But if we already made a safe constant, | ||||||||
2057 | // then there's no danger. | ||||||||
2058 | NewBO->copyIRFlags(B0); | ||||||||
2059 | NewBO->andIRFlags(B1); | ||||||||
2060 | if (DropNSW) | ||||||||
2061 | NewBO->setHasNoSignedWrap(false); | ||||||||
2062 | if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB) | ||||||||
2063 | NewBO->dropPoisonGeneratingFlags(); | ||||||||
2064 | return NewBO; | ||||||||
2065 | } | ||||||||
2066 | |||||||||
2067 | /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate. | ||||||||
2068 | /// Example (little endian): | ||||||||
2069 | /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8> | ||||||||
2070 | static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf, | ||||||||
2071 | bool IsBigEndian) { | ||||||||
2072 | // This must be a bitcasted shuffle of 1 vector integer operand. | ||||||||
2073 | Type *DestType = Shuf.getType(); | ||||||||
2074 | Value *X; | ||||||||
2075 | if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) || | ||||||||
2076 | !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy()) | ||||||||
2077 | return nullptr; | ||||||||
2078 | |||||||||
2079 | // The source type must have the same number of elements as the shuffle, | ||||||||
2080 | // and the source element type must be larger than the shuffle element type. | ||||||||
2081 | Type *SrcType = X->getType(); | ||||||||
2082 | if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() || | ||||||||
2083 | cast<FixedVectorType>(SrcType)->getNumElements() != | ||||||||
2084 | cast<FixedVectorType>(DestType)->getNumElements() || | ||||||||
2085 | SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0) | ||||||||
2086 | return nullptr; | ||||||||
2087 | |||||||||
2088 | assert(Shuf.changesLength() && !Shuf.increasesLength() &&((void)0) | ||||||||
2089 | "Expected a shuffle that decreases length")((void)0); | ||||||||
2090 | |||||||||
2091 | // Last, check that the mask chooses the correct low bits for each narrow | ||||||||
2092 | // element in the result. | ||||||||
2093 | uint64_t TruncRatio = | ||||||||
2094 | SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits(); | ||||||||
2095 | ArrayRef<int> Mask = Shuf.getShuffleMask(); | ||||||||
2096 | for (unsigned i = 0, e = Mask.size(); i != e; ++i) { | ||||||||
2097 | if (Mask[i] == UndefMaskElem) | ||||||||
2098 | continue; | ||||||||
2099 | uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio; | ||||||||
2100 | assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits")((void)0); | ||||||||
2101 | if (Mask[i] != (int)LSBIndex) | ||||||||
2102 | return nullptr; | ||||||||
2103 | } | ||||||||
2104 | |||||||||
2105 | return new TruncInst(X, DestType); | ||||||||
2106 | } | ||||||||
2107 | |||||||||
2108 | /// Match a shuffle-select-shuffle pattern where the shuffles are widening and | ||||||||
2109 | /// narrowing (concatenating with undef and extracting back to the original | ||||||||
2110 | /// length). This allows replacing the wide select with a narrow select. | ||||||||
2111 | static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf, | ||||||||
2112 | InstCombiner::BuilderTy &Builder) { | ||||||||
2113 | // This must be a narrowing identity shuffle. It extracts the 1st N elements | ||||||||
2114 | // of the 1st vector operand of a shuffle. | ||||||||
2115 | if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract()) | ||||||||
2116 | return nullptr; | ||||||||
2117 | |||||||||
2118 | // The vector being shuffled must be a vector select that we can eliminate. | ||||||||
2119 | // TODO: The one-use requirement could be eased if X and/or Y are constants. | ||||||||
2120 | Value *Cond, *X, *Y; | ||||||||
2121 | if (!match(Shuf.getOperand(0), | ||||||||
2122 | m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y))))) | ||||||||
2123 | return nullptr; | ||||||||
2124 | |||||||||
2125 | // We need a narrow condition value. It must be extended with undef elements | ||||||||
2126 | // and have the same number of elements as this shuffle. | ||||||||
2127 | unsigned NarrowNumElts = | ||||||||
2128 | cast<FixedVectorType>(Shuf.getType())->getNumElements(); | ||||||||
2129 | Value *NarrowCond; | ||||||||
2130 | if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) || | ||||||||
2131 | cast<FixedVectorType>(NarrowCond->getType())->getNumElements() != | ||||||||
2132 | NarrowNumElts || | ||||||||
2133 | !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding()) | ||||||||
2134 | return nullptr; | ||||||||
2135 | |||||||||
2136 | // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) --> | ||||||||
2137 | // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask) | ||||||||
2138 | Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask()); | ||||||||
2139 | Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask()); | ||||||||
2140 | return SelectInst::Create(NarrowCond, NarrowX, NarrowY); | ||||||||
2141 | } | ||||||||
2142 | |||||||||
2143 | /// Try to fold an extract subvector operation. | ||||||||
2144 | static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { | ||||||||
2145 | Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); | ||||||||
2146 | if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef())) | ||||||||
2147 | return nullptr; | ||||||||
2148 | |||||||||
2149 | // Check if we are extracting all bits of an inserted scalar: | ||||||||
2150 | // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type | ||||||||
2151 | Value *X; | ||||||||
2152 | if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) && | ||||||||
2153 | X->getType()->getPrimitiveSizeInBits() == | ||||||||
2154 | Shuf.getType()->getPrimitiveSizeInBits()) | ||||||||
2155 | return new BitCastInst(X, Shuf.getType()); | ||||||||
2156 | |||||||||
2157 | // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. | ||||||||
2158 | Value *Y; | ||||||||
2159 | ArrayRef<int> Mask; | ||||||||
2160 | if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) | ||||||||
2161 | return nullptr; | ||||||||
2162 | |||||||||
2163 | // Be conservative with shuffle transforms. If we can't kill the 1st shuffle, | ||||||||
2164 | // then combining may result in worse codegen. | ||||||||
2165 | if (!Op0->hasOneUse()) | ||||||||
2166 | return nullptr; | ||||||||
2167 | |||||||||
2168 | // We are extracting a subvector from a shuffle. Remove excess elements from | ||||||||
2169 | // the 1st shuffle mask to eliminate the extract. | ||||||||
2170 | // | ||||||||
2171 | // This transform is conservatively limited to identity extracts because we do | ||||||||
2172 | // not allow arbitrary shuffle mask creation as a target-independent transform | ||||||||
2173 | // (because we can't guarantee that will lower efficiently). | ||||||||
2174 | // | ||||||||
2175 | // If the extracting shuffle has an undef mask element, it transfers to the | ||||||||
2176 | // new shuffle mask. Otherwise, copy the original mask element. Example: | ||||||||
2177 | // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> --> | ||||||||
2178 | // shuf X, Y, <C0, undef, C2, undef> | ||||||||
2179 | unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements(); | ||||||||
2180 | SmallVector<int, 16> NewMask(NumElts); | ||||||||
2181 | assert(NumElts < Mask.size() &&((void)0) | ||||||||
2182 | "Identity with extract must have less elements than its inputs")((void)0); | ||||||||
2183 | |||||||||
2184 | for (unsigned i = 0; i != NumElts; ++i) { | ||||||||
2185 | int ExtractMaskElt = Shuf.getMaskValue(i); | ||||||||
2186 | int MaskElt = Mask[i]; | ||||||||
2187 | NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt; | ||||||||
2188 | } | ||||||||
2189 | return new ShuffleVectorInst(X, Y, NewMask); | ||||||||
2190 | } | ||||||||
2191 | |||||||||
2192 | /// Try to replace a shuffle with an insertelement or try to replace a shuffle | ||||||||
2193 | /// operand with the operand of an insertelement. | ||||||||
2194 | static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, | ||||||||
2195 | InstCombinerImpl &IC) { | ||||||||
2196 | Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1); | ||||||||
2197 | SmallVector<int, 16> Mask; | ||||||||
2198 | Shuf.getShuffleMask(Mask); | ||||||||
2199 | |||||||||
2200 | // The shuffle must not change vector sizes. | ||||||||
2201 | // TODO: This restriction could be removed if the insert has only one use | ||||||||
2202 | // (because the transform would require a new length-changing shuffle). | ||||||||
2203 | int NumElts = Mask.size(); | ||||||||
2204 | if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements())) | ||||||||
2205 | return nullptr; | ||||||||
2206 | |||||||||
2207 | // This is a specialization of a fold in SimplifyDemandedVectorElts. We may | ||||||||
2208 | // not be able to handle it there if the insertelement has >1 use. | ||||||||
2209 | // If the shuffle has an insertelement operand but does not choose the | ||||||||
2210 | // inserted scalar element from that value, then we can replace that shuffle | ||||||||
2211 | // operand with the source vector of the insertelement. | ||||||||
2212 | Value *X; | ||||||||
2213 | uint64_t IdxC; | ||||||||
2214 | if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { | ||||||||
2215 | // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask | ||||||||
2216 | if (!is_contained(Mask, (int)IdxC)) | ||||||||
2217 | return IC.replaceOperand(Shuf, 0, X); | ||||||||
2218 | } | ||||||||
2219 | if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { | ||||||||
2220 | // Offset the index constant by the vector width because we are checking for | ||||||||
2221 | // accesses to the 2nd vector input of the shuffle. | ||||||||
2222 | IdxC += NumElts; | ||||||||
2223 | // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask | ||||||||
2224 | if (!is_contained(Mask, (int)IdxC)) | ||||||||
2225 | return IC.replaceOperand(Shuf, 1, X); | ||||||||
2226 | } | ||||||||
2227 | |||||||||
2228 | // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' | ||||||||
2229 | auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { | ||||||||
2230 | // We need an insertelement with a constant index. | ||||||||
2231 | if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar), | ||||||||
2232 | m_ConstantInt(IndexC)))) | ||||||||
2233 | return false; | ||||||||
2234 | |||||||||
2235 | // Test the shuffle mask to see if it splices the inserted scalar into the | ||||||||
2236 | // operand 1 vector of the shuffle. | ||||||||
2237 | int NewInsIndex = -1; | ||||||||
2238 | for (int i = 0; i != NumElts; ++i) { | ||||||||
2239 | // Ignore undef mask elements. | ||||||||
2240 | if (Mask[i] == -1) | ||||||||
2241 | continue; | ||||||||
2242 | |||||||||
2243 | // The shuffle takes elements of operand 1 without lane changes. | ||||||||
2244 | if (Mask[i] == NumElts + i) | ||||||||
2245 | continue; | ||||||||
2246 | |||||||||
2247 | // The shuffle must choose the inserted scalar exactly once. | ||||||||
2248 | if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue()) | ||||||||
2249 | return false; | ||||||||
2250 | |||||||||
2251 | // The shuffle is placing the inserted scalar into element i. | ||||||||
2252 | NewInsIndex = i; | ||||||||
2253 | } | ||||||||
2254 | |||||||||
2255 | assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?")((void)0); | ||||||||
2256 | |||||||||
2257 | // Index is updated to the potentially translated insertion lane. | ||||||||
2258 | IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex); | ||||||||
2259 | return true; | ||||||||
2260 | }; | ||||||||
2261 | |||||||||
2262 | // If the shuffle is unnecessary, insert the scalar operand directly into | ||||||||
2263 | // operand 1 of the shuffle. Example: | ||||||||
2264 | // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0 | ||||||||
2265 | Value *Scalar; | ||||||||
2266 | ConstantInt *IndexC; | ||||||||
2267 | if (isShufflingScalarIntoOp1(Scalar, IndexC)) | ||||||||
2268 | return InsertElementInst::Create(V1, Scalar, IndexC); | ||||||||
2269 | |||||||||
2270 | // Try again after commuting shuffle. Example: | ||||||||
2271 | // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> --> | ||||||||
2272 | // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3 | ||||||||
2273 | std::swap(V0, V1); | ||||||||
2274 | ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); | ||||||||
2275 | if (isShufflingScalarIntoOp1(Scalar, IndexC)) | ||||||||
2276 | return InsertElementInst::Create(V1, Scalar, IndexC); | ||||||||
2277 | |||||||||
2278 | return nullptr; | ||||||||
2279 | } | ||||||||
2280 | |||||||||
2281 | static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { | ||||||||
2282 | // Match the operands as identity with padding (also known as concatenation | ||||||||
2283 | // with undef) shuffles of the same source type. The backend is expected to | ||||||||
2284 | // recreate these concatenations from a shuffle of narrow operands. | ||||||||
2285 | auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0)); | ||||||||
2286 | auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1)); | ||||||||
2287 | if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || | ||||||||
2288 | !Shuffle1 || !Shuffle1->isIdentityWithPadding()) | ||||||||
2289 | return nullptr; | ||||||||
2290 | |||||||||
2291 | // We limit this transform to power-of-2 types because we expect that the | ||||||||
2292 | // backend can convert the simplified IR patterns to identical nodes as the | ||||||||
2293 | // original IR. | ||||||||
2294 | // TODO: If we can verify the same behavior for arbitrary types, the | ||||||||
2295 | // power-of-2 checks can be removed. | ||||||||
2296 | Value *X = Shuffle0->getOperand(0); | ||||||||
2297 | Value *Y = Shuffle1->getOperand(0); | ||||||||
2298 | if (X->getType() != Y->getType() || | ||||||||
2299 | !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) || | ||||||||
2300 | !isPowerOf2_32( | ||||||||
2301 | cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) || | ||||||||
2302 | !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) || | ||||||||
2303 | match(X, m_Undef()) || match(Y, m_Undef())) | ||||||||
2304 | return nullptr; | ||||||||
2305 | assert(match(Shuffle0->getOperand(1), m_Undef()) &&((void)0) | ||||||||
2306 | match(Shuffle1->getOperand(1), m_Undef()) &&((void)0) | ||||||||
2307 | "Unexpected operand for identity shuffle")((void)0); | ||||||||
2308 | |||||||||
2309 | // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source | ||||||||
2310 | // operands directly by adjusting the shuffle mask to account for the narrower | ||||||||
2311 | // types: | ||||||||
2312 | // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' | ||||||||
2313 | int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements(); | ||||||||
2314 | int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements(); | ||||||||
2315 | assert(WideElts > NarrowElts && "Unexpected types for identity with padding")((void)0); | ||||||||
2316 | |||||||||
2317 | ArrayRef<int> Mask = Shuf.getShuffleMask(); | ||||||||
2318 | SmallVector<int, 16> NewMask(Mask.size(), -1); | ||||||||
2319 | for (int i = 0, e = Mask.size(); i != e; ++i) { | ||||||||
2320 | if (Mask[i] == -1) | ||||||||
2321 | continue; | ||||||||
2322 | |||||||||
2323 | // If this shuffle is choosing an undef element from 1 of the sources, that | ||||||||
2324 | // element is undef. | ||||||||
2325 | if (Mask[i] < WideElts) { | ||||||||
2326 | if (Shuffle0->getMaskValue(Mask[i]) == -1) | ||||||||
2327 | continue; | ||||||||
2328 | } else { | ||||||||
2329 | if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1) | ||||||||
2330 | continue; | ||||||||
2331 | } | ||||||||
2332 | |||||||||
2333 | // If this shuffle is choosing from the 1st narrow op, the mask element is | ||||||||
2334 | // the same. If this shuffle is choosing from the 2nd narrow op, the mask | ||||||||
2335 | // element is offset down to adjust for the narrow vector widths. | ||||||||
2336 | if (Mask[i] < WideElts) { | ||||||||
2337 | assert(Mask[i] < NarrowElts && "Unexpected shuffle mask")((void)0); | ||||||||
2338 | NewMask[i] = Mask[i]; | ||||||||
2339 | } else { | ||||||||
2340 | assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask")((void)0); | ||||||||
2341 | NewMask[i] = Mask[i] - (WideElts - NarrowElts); | ||||||||
2342 | } | ||||||||
2343 | } | ||||||||
2344 | return new ShuffleVectorInst(X, Y, NewMask); | ||||||||
2345 | } | ||||||||
2346 | |||||||||
2347 | Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { | ||||||||
2348 | Value *LHS = SVI.getOperand(0); | ||||||||
2349 | Value *RHS = SVI.getOperand(1); | ||||||||
2350 | SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); | ||||||||
2351 | if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), | ||||||||
2352 | SVI.getType(), ShufQuery)) | ||||||||
2353 | return replaceInstUsesWith(SVI, V); | ||||||||
2354 | |||||||||
2355 | // Bail out for scalable vectors | ||||||||
2356 | if (isa<ScalableVectorType>(LHS->getType())) | ||||||||
2357 | return nullptr; | ||||||||
2358 | |||||||||
2359 | unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements(); | ||||||||
2360 | unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements(); | ||||||||
2361 | |||||||||
2362 | // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask) | ||||||||
2363 | // | ||||||||
2364 | // if X and Y are of the same (vector) type, and the element size is not | ||||||||
2365 | // changed by the bitcasts, we can distribute the bitcasts through the | ||||||||
2366 | // shuffle, hopefully reducing the number of instructions. We make sure that | ||||||||
2367 | // at least one bitcast only has one use, so we don't *increase* the number of | ||||||||
2368 | // instructions here. | ||||||||
2369 | Value *X, *Y; | ||||||||
2370 | if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) && | ||||||||
2371 | X->getType()->isVectorTy() && X->getType() == Y->getType() && | ||||||||
2372 | X->getType()->getScalarSizeInBits() == | ||||||||
2373 | SVI.getType()->getScalarSizeInBits() && | ||||||||
2374 | (LHS->hasOneUse() || RHS->hasOneUse())) { | ||||||||
2375 | Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(), | ||||||||
2376 | SVI.getName() + ".uncasted"); | ||||||||
2377 | return new BitCastInst(V, SVI.getType()); | ||||||||
2378 | } | ||||||||
2379 | |||||||||
2380 | ArrayRef<int> Mask = SVI.getShuffleMask(); | ||||||||
2381 | Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); | ||||||||
2382 | |||||||||
2383 | // Peek through a bitcasted shuffle operand by scaling the mask. If the | ||||||||
2384 | // simulated shuffle can simplify, then this shuffle is unnecessary: | ||||||||
2385 | // shuf (bitcast X), undef, Mask --> bitcast X' | ||||||||
2386 | // TODO: This could be extended to allow length-changing shuffles. | ||||||||
2387 | // The transform might also be obsoleted if we allowed canonicalization | ||||||||
2388 | // of bitcasted shuffles. | ||||||||
2389 | if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && | ||||||||
2390 | X->getType()->isVectorTy() && VWidth == LHSWidth) { | ||||||||
2391 | // Try to create a scaled mask constant. | ||||||||
2392 | auto *XType = cast<FixedVectorType>(X->getType()); | ||||||||
2393 | unsigned XNumElts = XType->getNumElements(); | ||||||||
2394 | SmallVector<int, 16> ScaledMask; | ||||||||
2395 | if (XNumElts >= VWidth) { | ||||||||
2396 | assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast")((void)0); | ||||||||
2397 | narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask); | ||||||||
2398 | } else { | ||||||||
2399 | assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast")((void)0); | ||||||||
2400 | if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask)) | ||||||||
2401 | ScaledMask.clear(); | ||||||||
2402 | } | ||||||||
2403 | if (!ScaledMask.empty()) { | ||||||||
2404 | // If the shuffled source vector simplifies, cast that value to this | ||||||||
2405 | // shuffle's type. | ||||||||
2406 | if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType), | ||||||||
2407 | ScaledMask, XType, ShufQuery)) | ||||||||
2408 | return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); | ||||||||
2409 | } | ||||||||
2410 | } | ||||||||
2411 | |||||||||
2412 | // shuffle x, x, mask --> shuffle x, undef, mask' | ||||||||
2413 | if (LHS == RHS) { | ||||||||
2414 | assert(!match(RHS, m_Undef()) &&((void)0) | ||||||||
2415 | "Shuffle with 2 undef ops not simplified?")((void)0); | ||||||||
2416 | // Remap any references to RHS to use LHS. | ||||||||
2417 | SmallVector<int, 16> Elts; | ||||||||
2418 | for (unsigned i = 0; i != VWidth; ++i) { | ||||||||
2419 | // Propagate undef elements or force mask to LHS. | ||||||||
2420 | if (Mask[i] < 0) | ||||||||
2421 | Elts.push_back(UndefMaskElem); | ||||||||
2422 | else | ||||||||
2423 | Elts.push_back(Mask[i] % LHSWidth); | ||||||||
2424 | } | ||||||||
2425 | return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts); | ||||||||
2426 | } | ||||||||
2427 | |||||||||
2428 | // shuffle undef, x, mask --> shuffle x, undef, mask' | ||||||||
2429 | if (match(LHS, m_Undef())) { | ||||||||
2430 | SVI.commute(); | ||||||||
2431 | return &SVI; | ||||||||
2432 | } | ||||||||
2433 | |||||||||
2434 | if (Instruction *I = canonicalizeInsertSplat(SVI, Builder)) | ||||||||
2435 | return I; | ||||||||
2436 | |||||||||
2437 | if (Instruction *I = foldSelectShuffle(SVI, Builder, DL)) | ||||||||
2438 | return I; | ||||||||
2439 | |||||||||
2440 | if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian())) | ||||||||
2441 | return I; | ||||||||
2442 | |||||||||
2443 | if (Instruction *I = narrowVectorSelect(SVI, Builder)) | ||||||||
2444 | return I; | ||||||||
2445 | |||||||||
2446 | APInt UndefElts(VWidth, 0); | ||||||||
2447 | APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); | ||||||||
2448 | if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { | ||||||||
2449 | if (V != &SVI) | ||||||||
2450 | return replaceInstUsesWith(SVI, V); | ||||||||
2451 | return &SVI; | ||||||||
2452 | } | ||||||||
2453 | |||||||||
2454 | if (Instruction *I = foldIdentityExtractShuffle(SVI)) | ||||||||
2455 | return I; | ||||||||
2456 | |||||||||
2457 | // These transforms have the potential to lose undef knowledge, so they are | ||||||||
2458 | // intentionally placed after SimplifyDemandedVectorElts(). | ||||||||
2459 | if (Instruction *I = foldShuffleWithInsert(SVI, *this)) | ||||||||
2460 | return I; | ||||||||
2461 | if (Instruction *I = foldIdentityPaddedShuffles(SVI)) | ||||||||
2462 | return I; | ||||||||
2463 | |||||||||
2464 | if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) { | ||||||||
2465 | Value *V = evaluateInDifferentElementOrder(LHS, Mask); | ||||||||
2466 | return replaceInstUsesWith(SVI, V); | ||||||||
2467 | } | ||||||||
2468 | |||||||||
2469 | // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to | ||||||||
2470 | // a non-vector type. We can instead bitcast the original vector followed by | ||||||||
2471 | // an extract of the desired element: | ||||||||
2472 | // | ||||||||
2473 | // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, | ||||||||
2474 | // <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||||||||
2475 | // %1 = bitcast <4 x i8> %sroa to i32 | ||||||||
2476 | // Becomes: | ||||||||
2477 | // %bc = bitcast <16 x i8> %in to <4 x i32> | ||||||||
2478 | // %ext = extractelement <4 x i32> %bc, i32 0 | ||||||||
2479 | // | ||||||||
2480 | // If the shuffle is extracting a contiguous range of values from the input | ||||||||
2481 | // vector then each use which is a bitcast of the extracted size can be | ||||||||
2482 | // replaced. This will work if the vector types are compatible, and the begin | ||||||||
2483 | // index is aligned to a value in the casted vector type. If the begin index | ||||||||
2484 | // isn't aligned then we can shuffle the original vector (keeping the same | ||||||||
2485 | // vector type) before extracting. | ||||||||
2486 | // | ||||||||
2487 | // This code will bail out if the target type is fundamentally incompatible | ||||||||
2488 | // with vectors of the source type. | ||||||||
2489 | // | ||||||||
2490 | // Example of <16 x i8>, target type i32: | ||||||||
2491 | // Index range [4,8): v-----------v Will work. | ||||||||
2492 | // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | ||||||||
2493 | // <16 x i8>: | | | | | | | | | | | | | | | | | | ||||||||
2494 | // <4 x i32>: | | | | | | ||||||||
2495 | // +-----------+-----------+-----------+-----------+ | ||||||||
2496 | // Index range [6,10): ^-----------^ Needs an extra shuffle. | ||||||||
2497 | // Target type i40: ^--------------^ Won't work, bail. | ||||||||
2498 | bool MadeChange = false; | ||||||||
2499 | if (isShuffleExtractingFromLHS(SVI, Mask)) { | ||||||||
2500 | Value *V = LHS; | ||||||||
2501 | unsigned MaskElems = Mask.size(); | ||||||||
2502 | auto *SrcTy = cast<FixedVectorType>(V->getType()); | ||||||||
2503 | unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize(); | ||||||||
2504 | unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); | ||||||||
2505 | assert(SrcElemBitWidth && "vector elements must have a bitwidth")((void)0); | ||||||||
2506 | unsigned SrcNumElems = SrcTy->getNumElements(); | ||||||||
2507 | SmallVector<BitCastInst *, 8> BCs; | ||||||||
2508 | DenseMap<Type *, Value *> NewBCs; | ||||||||
2509 | for (User *U : SVI.users()) | ||||||||
2510 | if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) | ||||||||
2511 | if (!BC->use_empty()) | ||||||||
2512 | // Only visit bitcasts that weren't previously handled. | ||||||||
2513 | BCs.push_back(BC); | ||||||||
2514 | for (BitCastInst *BC : BCs) { | ||||||||
2515 | unsigned BegIdx = Mask.front(); | ||||||||
2516 | Type *TgtTy = BC->getDestTy(); | ||||||||
2517 | unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); | ||||||||
2518 | if (!TgtElemBitWidth) | ||||||||
2519 | continue; | ||||||||
2520 | unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; | ||||||||
2521 | bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; | ||||||||
2522 | bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); | ||||||||
2523 | if (!VecBitWidthsEqual) | ||||||||
2524 | continue; | ||||||||
2525 | if (!VectorType::isValidElementType(TgtTy)) | ||||||||
2526 | continue; | ||||||||
2527 | auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems); | ||||||||
2528 | if (!BegIsAligned) { | ||||||||
2529 | // Shuffle the input so [0,NumElements) contains the output, and | ||||||||
2530 | // [NumElems,SrcNumElems) is undef. | ||||||||
2531 | SmallVector<int, 16> ShuffleMask(SrcNumElems, -1); | ||||||||
2532 | for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) | ||||||||
2533 | ShuffleMask[I] = Idx; | ||||||||
2534 | V = Builder.CreateShuffleVector(V, ShuffleMask, | ||||||||
2535 | SVI.getName() + ".extract"); | ||||||||
2536 | BegIdx = 0; | ||||||||
2537 | } | ||||||||
2538 | unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; | ||||||||
2539 | assert(SrcElemsPerTgtElem)((void)0); | ||||||||
2540 | BegIdx /= SrcElemsPerTgtElem; | ||||||||
2541 | bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); | ||||||||
2542 | auto *NewBC = | ||||||||
2543 | BCAlreadyExists | ||||||||
2544 | ? NewBCs[CastSrcTy] | ||||||||
2545 | : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); | ||||||||
2546 | if (!BCAlreadyExists) | ||||||||
2547 | NewBCs[CastSrcTy] = NewBC; | ||||||||
2548 | auto *Ext = Builder.CreateExtractElement( | ||||||||
2549 | NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); | ||||||||
2550 | // The shufflevector isn't being replaced: the bitcast that used it | ||||||||
2551 | // is. InstCombine will visit the newly-created instructions. | ||||||||
2552 | replaceInstUsesWith(*BC, Ext); | ||||||||
2553 | MadeChange = true; | ||||||||
2554 | } | ||||||||
2555 | } | ||||||||
2556 | |||||||||
2557 | // If the LHS is a shufflevector itself, see if we can combine it with this | ||||||||
2558 | // one without producing an unusual shuffle. | ||||||||
2559 | // Cases that might be simplified: | ||||||||
2560 | // 1. | ||||||||
2561 | // x1=shuffle(v1,v2,mask1) | ||||||||
2562 | // x=shuffle(x1,undef,mask) | ||||||||
2563 | // ==> | ||||||||
2564 | // x=shuffle(v1,undef,newMask) | ||||||||
2565 | // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 | ||||||||
2566 | // 2. | ||||||||
2567 | // x1=shuffle(v1,undef,mask1) | ||||||||
2568 | // x=shuffle(x1,x2,mask) | ||||||||
2569 | // where v1.size() == mask1.size() | ||||||||
2570 | // ==> | ||||||||
2571 | // x=shuffle(v1,x2,newMask) | ||||||||
2572 | // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] | ||||||||
2573 | // 3. | ||||||||
2574 | // x2=shuffle(v2,undef,mask2) | ||||||||
2575 | // x=shuffle(x1,x2,mask) | ||||||||
2576 | // where v2.size() == mask2.size() | ||||||||
2577 | // ==> | ||||||||
2578 | // x=shuffle(x1,v2,newMask) | ||||||||
2579 | // newMask[i] = (mask[i] < x1.size()) | ||||||||
2580 | // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() | ||||||||
2581 | // 4. | ||||||||
2582 | // x1=shuffle(v1,undef,mask1) | ||||||||
2583 | // x2=shuffle(v2,undef,mask2) | ||||||||
2584 | // x=shuffle(x1,x2,mask) | ||||||||
2585 | // where v1.size() == v2.size() | ||||||||
2586 | // ==> | ||||||||
2587 | // x=shuffle(v1,v2,newMask) | ||||||||
2588 | // newMask[i] = (mask[i] < x1.size()) | ||||||||
2589 | // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() | ||||||||
2590 | // | ||||||||
2591 | // Here we are really conservative: | ||||||||
2592 | // we are absolutely afraid of producing a shuffle mask not in the input | ||||||||
2593 | // program, because the code gen may not be smart enough to turn a merged | ||||||||
2594 | // shuffle into two specific shuffles: it may produce worse code. As such, | ||||||||
2595 | // we only merge two shuffles if the result is either a splat or one of the | ||||||||
2596 | // input shuffle masks. In this case, merging the shuffles just removes | ||||||||
2597 | // one instruction, which we know is safe. This is good for things like | ||||||||
2598 | // turning: (splat(splat)) -> splat, or | ||||||||
2599 | // merge(V[0..n], V[n+1..2n]) -> V[0..2n] | ||||||||
2600 | ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); | ||||||||
2601 | ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); | ||||||||
2602 | if (LHSShuffle) | ||||||||
2603 | if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef())) | ||||||||
2604 | LHSShuffle = nullptr; | ||||||||
2605 | if (RHSShuffle) | ||||||||
2606 | if (!match(RHSShuffle->getOperand(1), m_Undef())) | ||||||||
2607 | RHSShuffle = nullptr; | ||||||||
2608 | if (!LHSShuffle && !RHSShuffle) | ||||||||
2609 | return MadeChange ? &SVI : nullptr; | ||||||||
2610 | |||||||||
2611 | Value* LHSOp0 = nullptr; | ||||||||
2612 | Value* LHSOp1 = nullptr; | ||||||||
2613 | Value* RHSOp0 = nullptr; | ||||||||
2614 | unsigned LHSOp0Width = 0; | ||||||||
2615 | unsigned RHSOp0Width = 0; | ||||||||
2616 | if (LHSShuffle) { | ||||||||
2617 | LHSOp0 = LHSShuffle->getOperand(0); | ||||||||
2618 | LHSOp1 = LHSShuffle->getOperand(1); | ||||||||
2619 | LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements(); | ||||||||
2620 | } | ||||||||
2621 | if (RHSShuffle) { | ||||||||
2622 | RHSOp0 = RHSShuffle->getOperand(0); | ||||||||
2623 | RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements(); | ||||||||
2624 | } | ||||||||
2625 | Value* newLHS = LHS; | ||||||||
2626 | Value* newRHS = RHS; | ||||||||
2627 | if (LHSShuffle) { | ||||||||
2628 | // case 1 | ||||||||
2629 | if (match(RHS, m_Undef())) { | ||||||||
2630 | newLHS = LHSOp0; | ||||||||
2631 | newRHS = LHSOp1; | ||||||||
2632 | } | ||||||||
2633 | // case 2 or 4 | ||||||||
2634 | else if (LHSOp0Width == LHSWidth) { | ||||||||
2635 | newLHS = LHSOp0; | ||||||||
2636 | } | ||||||||
2637 | } | ||||||||
2638 | // case 3 or 4 | ||||||||
2639 | if (RHSShuffle && RHSOp0Width == LHSWidth) { | ||||||||
2640 | newRHS = RHSOp0; | ||||||||
2641 | } | ||||||||
2642 | // case 4 | ||||||||
2643 | if (LHSOp0 == RHSOp0) { | ||||||||
2644 | newLHS = LHSOp0; | ||||||||
2645 | newRHS = nullptr; | ||||||||
2646 | } | ||||||||
2647 | |||||||||
2648 | if (newLHS == LHS && newRHS == RHS) | ||||||||
2649 | return MadeChange ? &SVI : nullptr; | ||||||||
2650 | |||||||||
2651 | ArrayRef<int> LHSMask; | ||||||||
2652 | ArrayRef<int> RHSMask; | ||||||||
2653 | if (newLHS != LHS) | ||||||||
2654 | LHSMask = LHSShuffle->getShuffleMask(); | ||||||||
2655 | if (RHSShuffle && newRHS != RHS) | ||||||||
2656 | RHSMask = RHSShuffle->getShuffleMask(); | ||||||||
2657 | |||||||||
2658 | unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; | ||||||||
2659 | SmallVector<int, 16> newMask; | ||||||||
2660 | bool isSplat = true; | ||||||||
2661 | int SplatElt = -1; | ||||||||
2662 | // Create a new mask for the new ShuffleVectorInst so that the new | ||||||||
2663 | // ShuffleVectorInst is equivalent to the original one. | ||||||||
2664 | for (unsigned i = 0; i < VWidth; ++i) { | ||||||||
2665 | int eltMask; | ||||||||
2666 | if (Mask[i] < 0) { | ||||||||
2667 | // This element is an undef value. | ||||||||
2668 | eltMask = -1; | ||||||||
2669 | } else if (Mask[i] < (int)LHSWidth) { | ||||||||
2670 | // This element is from left hand side vector operand. | ||||||||
2671 | // | ||||||||
2672 | // If LHS is going to be replaced (case 1, 2, or 4), calculate the | ||||||||
2673 | // new mask value for the element. | ||||||||
2674 | if (newLHS != LHS) { | ||||||||
2675 | eltMask = LHSMask[Mask[i]]; | ||||||||
2676 | // If the value selected is an undef value, explicitly specify it | ||||||||
2677 | // with a -1 mask value. | ||||||||
2678 | if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) | ||||||||
2679 | eltMask = -1; | ||||||||
2680 | } else | ||||||||
2681 | eltMask = Mask[i]; | ||||||||
2682 | } else { | ||||||||
2683 | // This element is from right hand side vector operand | ||||||||
2684 | // | ||||||||
2685 | // If the value selected is an undef value, explicitly specify it | ||||||||
2686 | // with a -1 mask value. (case 1) | ||||||||
2687 | if (match(RHS, m_Undef())) | ||||||||
2688 | eltMask = -1; | ||||||||
2689 | // If RHS is going to be replaced (case 3 or 4), calculate the | ||||||||
2690 | // new mask value for the element. | ||||||||
2691 | else if (newRHS != RHS) { | ||||||||
2692 | eltMask = RHSMask[Mask[i]-LHSWidth]; | ||||||||
2693 | // If the value selected is an undef value, explicitly specify it | ||||||||
2694 | // with a -1 mask value. | ||||||||
2695 | if (eltMask >= (int)RHSOp0Width) { | ||||||||
2696 | assert(match(RHSShuffle->getOperand(1), m_Undef()) &&((void)0) | ||||||||
2697 | "should have been check above")((void)0); | ||||||||
2698 | eltMask = -1; | ||||||||
2699 | } | ||||||||
2700 | } else | ||||||||
2701 | eltMask = Mask[i]-LHSWidth; | ||||||||
2702 | |||||||||
2703 | // If LHS's width is changed, shift the mask value accordingly. | ||||||||
2704 | // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any | ||||||||
2705 | // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. | ||||||||
2706 | // If newRHS == newLHS, we want to remap any references from newRHS to | ||||||||
2707 | // newLHS so that we can properly identify splats that may occur due to | ||||||||
2708 | // obfuscation across the two vectors. | ||||||||
2709 | if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) | ||||||||
2710 | eltMask += newLHSWidth; | ||||||||
2711 | } | ||||||||
2712 | |||||||||
2713 | // Check if this could still be a splat. | ||||||||
2714 | if (eltMask >= 0) { | ||||||||
2715 | if (SplatElt >= 0 && SplatElt != eltMask) | ||||||||
2716 | isSplat = false; | ||||||||
2717 | SplatElt = eltMask; | ||||||||
2718 | } | ||||||||
2719 | |||||||||
2720 | newMask.push_back(eltMask); | ||||||||
2721 | } | ||||||||
2722 | |||||||||
2723 | // If the result mask is equal to one of the original shuffle masks, | ||||||||
2724 | // or is a splat, do the replacement. | ||||||||
2725 | if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { | ||||||||
2726 | if (!newRHS) | ||||||||
2727 | newRHS = UndefValue::get(newLHS->getType()); | ||||||||
2728 | return new ShuffleVectorInst(newLHS, newRHS, newMask); | ||||||||
2729 | } | ||||||||
2730 | |||||||||
2731 | return MadeChange ? &SVI : nullptr; | ||||||||
2732 | } |
1 | //===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the declarations of classes that represent "derived |
10 | // types". These are things like "arrays of x" or "structure of x, y, z" or |
11 | // "function returning x taking (y,z) as parameters", etc... |
12 | // |
13 | // The implementations of these classes live in the Type.cpp file. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #ifndef LLVM_IR_DERIVEDTYPES_H |
18 | #define LLVM_IR_DERIVEDTYPES_H |
19 | |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/STLExtras.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/IR/Type.h" |
24 | #include "llvm/Support/Casting.h" |
25 | #include "llvm/Support/Compiler.h" |
26 | #include "llvm/Support/TypeSize.h" |
27 | #include <cassert> |
28 | #include <cstdint> |
29 | |
30 | namespace llvm { |
31 | |
32 | class Value; |
33 | class APInt; |
34 | class LLVMContext; |
35 | |
36 | /// Class to represent integer types. Note that this class is also used to |
37 | /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and |
38 | /// Int64Ty. |
39 | /// Integer representation type |
40 | class IntegerType : public Type { |
41 | friend class LLVMContextImpl; |
42 | |
43 | protected: |
44 | explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){ |
45 | setSubclassData(NumBits); |
46 | } |
47 | |
48 | public: |
49 | /// This enum is just used to hold constants we need for IntegerType. |
50 | enum { |
51 | MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified |
52 | MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified |
53 | ///< Note that bit width is stored in the Type classes SubclassData field |
54 | ///< which has 24 bits. This yields a maximum bit width of 16,777,215 |
55 | ///< bits. |
56 | }; |
57 | |
58 | /// This static method is the primary way of constructing an IntegerType. |
59 | /// If an IntegerType with the same NumBits value was previously instantiated, |
60 | /// that instance will be returned. Otherwise a new one will be created. Only |
61 | /// one instance with a given NumBits value is ever created. |
62 | /// Get or create an IntegerType instance. |
63 | static IntegerType *get(LLVMContext &C, unsigned NumBits); |
64 | |
65 | /// Returns type twice as wide the input type. |
66 | IntegerType *getExtendedType() const { |
67 | return Type::getIntNTy(getContext(), 2 * getScalarSizeInBits()); |
68 | } |
69 | |
70 | /// Get the number of bits in this IntegerType |
71 | unsigned getBitWidth() const { return getSubclassData(); } |
72 | |
73 | /// Return a bitmask with ones set for all of the bits that can be set by an |
74 | /// unsigned version of this type. This is 0xFF for i8, 0xFFFF for i16, etc. |
75 | uint64_t getBitMask() const { |
76 | return ~uint64_t(0UL) >> (64-getBitWidth()); |
77 | } |
78 | |
79 | /// Return a uint64_t with just the most significant bit set (the sign bit, if |
80 | /// the value is treated as a signed number). |
81 | uint64_t getSignBit() const { |
82 | return 1ULL << (getBitWidth()-1); |
83 | } |
84 | |
85 | /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc. |
86 | /// @returns a bit mask with ones set for all the bits of this type. |
87 | /// Get a bit mask for this type. |
88 | APInt getMask() const; |
89 | |
90 | /// Methods for support type inquiry through isa, cast, and dyn_cast. |
91 | static bool classof(const Type *T) { |
92 | return T->getTypeID() == IntegerTyID; |
93 | } |
94 | }; |
95 | |
96 | unsigned Type::getIntegerBitWidth() const { |
97 | return cast<IntegerType>(this)->getBitWidth(); |
98 | } |
99 | |
100 | /// Class to represent function types |
101 | /// |
102 | class FunctionType : public Type { |
103 | FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs); |
104 | |
105 | public: |
106 | FunctionType(const FunctionType &) = delete; |
107 | FunctionType &operator=(const FunctionType &) = delete; |
108 | |
109 | /// This static method is the primary way of constructing a FunctionType. |
110 | static FunctionType *get(Type *Result, |
111 | ArrayRef<Type*> Params, bool isVarArg); |
112 | |
113 | /// Create a FunctionType taking no parameters. |
114 | static FunctionType *get(Type *Result, bool isVarArg); |
115 | |
116 | /// Return true if the specified type is valid as a return type. |
117 | static bool isValidReturnType(Type *RetTy); |
118 | |
119 | /// Return true if the specified type is valid as an argument type. |
120 | static bool isValidArgumentType(Type *ArgTy); |
121 | |
122 | bool isVarArg() const { return getSubclassData()!=0; } |
123 | Type *getReturnType() const { return ContainedTys[0]; } |
124 | |
125 | using param_iterator = Type::subtype_iterator; |
126 | |
127 | param_iterator param_begin() const { return ContainedTys + 1; } |
128 | param_iterator param_end() const { return &ContainedTys[NumContainedTys]; } |
129 | ArrayRef<Type *> params() const { |
130 | return makeArrayRef(param_begin(), param_end()); |
131 | } |
132 | |
133 | /// Parameter type accessors. |
134 | Type *getParamType(unsigned i) const { return ContainedTys[i+1]; } |
135 | |
136 | /// Return the number of fixed parameters this function type requires. |
137 | /// This does not consider varargs. |
138 | unsigned getNumParams() const { return NumContainedTys - 1; } |
139 | |
140 | /// Methods for support type inquiry through isa, cast, and dyn_cast. |
141 | static bool classof(const Type *T) { |
142 | return T->getTypeID() == FunctionTyID; |
143 | } |
144 | }; |
145 | static_assert(alignof(FunctionType) >= alignof(Type *), |
146 | "Alignment sufficient for objects appended to FunctionType"); |
147 | |
148 | bool Type::isFunctionVarArg() const { |
149 | return cast<FunctionType>(this)->isVarArg(); |
150 | } |
151 | |
152 | Type *Type::getFunctionParamType(unsigned i) const { |
153 | return cast<FunctionType>(this)->getParamType(i); |
154 | } |
155 | |
156 | unsigned Type::getFunctionNumParams() const { |
157 | return cast<FunctionType>(this)->getNumParams(); |
158 | } |
159 | |
160 | /// A handy container for a FunctionType+Callee-pointer pair, which can be |
161 | /// passed around as a single entity. This assists in replacing the use of |
162 | /// PointerType::getElementType() to access the function's type, since that's |
163 | /// slated for removal as part of the [opaque pointer types] project. |
164 | class FunctionCallee { |
165 | public: |
166 | // Allow implicit conversion from types which have a getFunctionType member |
167 | // (e.g. Function and InlineAsm). |
168 | template <typename T, typename U = decltype(&T::getFunctionType)> |
169 | FunctionCallee(T *Fn) |
170 | : FnTy(Fn ? Fn->getFunctionType() : nullptr), Callee(Fn) {} |
171 | |
172 | FunctionCallee(FunctionType *FnTy, Value *Callee) |
173 | : FnTy(FnTy), Callee(Callee) { |
174 | assert((FnTy == nullptr) == (Callee == nullptr))((void)0); |
175 | } |
176 | |
177 | FunctionCallee(std::nullptr_t) {} |
178 | |
179 | FunctionCallee() = default; |
180 | |
181 | FunctionType *getFunctionType() { return FnTy; } |
182 | |
183 | Value *getCallee() { return Callee; } |
184 | |
185 | explicit operator bool() { return Callee; } |
186 | |
187 | private: |
188 | FunctionType *FnTy = nullptr; |
189 | Value *Callee = nullptr; |
190 | }; |
191 | |
192 | /// Class to represent struct types. There are two different kinds of struct |
193 | /// types: Literal structs and Identified structs. |
194 | /// |
195 | /// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must |
196 | /// always have a body when created. You can get one of these by using one of |
197 | /// the StructType::get() forms. |
198 | /// |
199 | /// Identified structs (e.g. %foo or %42) may optionally have a name and are not |
200 | /// uniqued. The names for identified structs are managed at the LLVMContext |
201 | /// level, so there can only be a single identified struct with a given name in |
202 | /// a particular LLVMContext. Identified structs may also optionally be opaque |
203 | /// (have no body specified). You get one of these by using one of the |
204 | /// StructType::create() forms. |
205 | /// |
206 | /// Independent of what kind of struct you have, the body of a struct type are |
207 | /// laid out in memory consecutively with the elements directly one after the |
208 | /// other (if the struct is packed) or (if not packed) with padding between the |
209 | /// elements as defined by DataLayout (which is required to match what the code |
210 | /// generator for a target expects). |
211 | /// |
212 | class StructType : public Type { |
213 | StructType(LLVMContext &C) : Type(C, StructTyID) {} |
214 | |
215 | enum { |
216 | /// This is the contents of the SubClassData field. |
217 | SCDB_HasBody = 1, |
218 | SCDB_Packed = 2, |
219 | SCDB_IsLiteral = 4, |
220 | SCDB_IsSized = 8 |
221 | }; |
222 | |
223 | /// For a named struct that actually has a name, this is a pointer to the |
224 | /// symbol table entry (maintained by LLVMContext) for the struct. |
225 | /// This is null if the type is an literal struct or if it is a identified |
226 | /// type that has an empty name. |
227 | void *SymbolTableEntry = nullptr; |
228 | |
229 | public: |
230 | StructType(const StructType &) = delete; |
231 | StructType &operator=(const StructType &) = delete; |
232 | |
233 | /// This creates an identified struct. |
234 | static StructType *create(LLVMContext &Context, StringRef Name); |
235 | static StructType *create(LLVMContext &Context); |
236 | |
237 | static StructType *create(ArrayRef<Type *> Elements, StringRef Name, |
238 | bool isPacked = false); |
239 | static StructType *create(ArrayRef<Type *> Elements); |
240 | static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements, |
241 | StringRef Name, bool isPacked = false); |
242 | static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements); |
243 | template <class... Tys> |
244 | static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *> |
245 | create(StringRef Name, Type *elt1, Tys *... elts) { |
246 | assert(elt1 && "Cannot create a struct type with no elements with this")((void)0); |
247 | return create(ArrayRef<Type *>({elt1, elts...}), Name); |
248 | } |
249 | |
250 | /// This static method is the primary way to create a literal StructType. |
251 | static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements, |
252 | bool isPacked = false); |
253 | |
254 | /// Create an empty structure type. |
255 | static StructType *get(LLVMContext &Context, bool isPacked = false); |
256 | |
257 | /// This static method is a convenience method for creating structure types by |
258 | /// specifying the elements as arguments. Note that this method always returns |
259 | /// a non-packed struct, and requires at least one element type. |
260 | template <class... Tys> |
261 | static std::enable_if_t<are_base_of<Type, Tys...>::value, StructType *> |
262 | get(Type *elt1, Tys *... elts) { |
263 | assert(elt1 && "Cannot create a struct type with no elements with this")((void)0); |
264 | LLVMContext &Ctx = elt1->getContext(); |
265 | return StructType::get(Ctx, ArrayRef<Type *>({elt1, elts...})); |
266 | } |
267 | |
268 | /// Return the type with the specified name, or null if there is none by that |
269 | /// name. |
270 | static StructType *getTypeByName(LLVMContext &C, StringRef Name); |
271 | |
272 | bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; } |
273 | |
274 | /// Return true if this type is uniqued by structural equivalence, false if it |
275 | /// is a struct definition. |
276 | bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; } |
277 | |
278 | /// Return true if this is a type with an identity that has no body specified |
279 | /// yet. These prints as 'opaque' in .ll files. |
280 | bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; } |
281 | |
282 | /// isSized - Return true if this is a sized type. |
283 | bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const; |
284 | |
285 | /// Returns true if this struct contains a scalable vector. |
286 | bool containsScalableVectorType() const; |
287 | |
288 | /// Return true if this is a named struct that has a non-empty name. |
289 | bool hasName() const { return SymbolTableEntry != nullptr; } |
290 | |
291 | /// Return the name for this struct type if it has an identity. |
292 | /// This may return an empty string for an unnamed struct type. Do not call |
293 | /// this on an literal type. |
294 | StringRef getName() const; |
295 | |
296 | /// Change the name of this type to the specified name, or to a name with a |
297 | /// suffix if there is a collision. Do not call this on an literal type. |
298 | void setName(StringRef Name); |
299 | |
300 | /// Specify a body for an opaque identified type. |
301 | void setBody(ArrayRef<Type*> Elements, bool isPacked = false); |
302 | |
303 | template <typename... Tys> |
304 | std::enable_if_t<are_base_of<Type, Tys...>::value, void> |
305 | setBody(Type *elt1, Tys *... elts) { |
306 | assert(elt1 && "Cannot create a struct type with no elements with this")((void)0); |
307 | setBody(ArrayRef<Type *>({elt1, elts...})); |
308 | } |
309 | |
310 | /// Return true if the specified type is valid as a element type. |
311 | static bool isValidElementType(Type *ElemTy); |
312 | |
313 | // Iterator access to the elements. |
314 | using element_iterator = Type::subtype_iterator; |
315 | |
316 | element_iterator element_begin() const { return ContainedTys; } |
317 | element_iterator element_end() const { return &ContainedTys[NumContainedTys];} |
318 | ArrayRef<Type *> elements() const { |
319 | return makeArrayRef(element_begin(), element_end()); |
320 | } |
321 | |
322 | /// Return true if this is layout identical to the specified struct. |
323 | bool isLayoutIdentical(StructType *Other) const; |
324 | |
325 | /// Random access to the elements |
326 | unsigned getNumElements() const { return NumContainedTys; } |
327 | Type *getElementType(unsigned N) const { |
328 | assert(N < NumContainedTys && "Element number out of range!")((void)0); |
329 | return ContainedTys[N]; |
330 | } |
331 | /// Given an index value into the type, return the type of the element. |
332 | Type *getTypeAtIndex(const Value *V) const; |
333 | Type *getTypeAtIndex(unsigned N) const { return getElementType(N); } |
334 | bool indexValid(const Value *V) const; |
335 | bool indexValid(unsigned Idx) const { return Idx < getNumElements(); } |
336 | |
337 | /// Methods for support type inquiry through isa, cast, and dyn_cast. |
338 | static bool classof(const Type *T) { |
339 | return T->getTypeID() == StructTyID; |
340 | } |
341 | }; |
342 | |
343 | StringRef Type::getStructName() const { |
344 | return cast<StructType>(this)->getName(); |
345 | } |
346 | |
347 | unsigned Type::getStructNumElements() const { |
348 | return cast<StructType>(this)->getNumElements(); |
349 | } |
350 | |
351 | Type *Type::getStructElementType(unsigned N) const { |
352 | return cast<StructType>(this)->getElementType(N); |
353 | } |
354 | |
355 | /// Class to represent array types. |
356 | class ArrayType : public Type { |
357 | /// The element type of the array. |
358 | Type *ContainedType; |
359 | /// Number of elements in the array. |
360 | uint64_t NumElements; |
361 | |
362 | ArrayType(Type *ElType, uint64_t NumEl); |
363 | |
364 | public: |
365 | ArrayType(const ArrayType &) = delete; |
366 | ArrayType &operator=(const ArrayType &) = delete; |
367 | |
368 | uint64_t getNumElements() const { return NumElements; } |
369 | Type *getElementType() const { return ContainedType; } |
370 | |
371 | /// This static method is the primary way to construct an ArrayType |
372 | static ArrayType *get(Type *ElementType, uint64_t NumElements); |
373 | |
374 | /// Return true if the specified type is valid as a element type. |
375 | static bool isValidElementType(Type *ElemTy); |
376 | |
377 | /// Methods for support type inquiry through isa, cast, and dyn_cast. |
378 | static bool classof(const Type *T) { |
379 | return T->getTypeID() == ArrayTyID; |
380 | } |
381 | }; |
382 | |
383 | uint64_t Type::getArrayNumElements() const { |
384 | return cast<ArrayType>(this)->getNumElements(); |
385 | } |
386 | |
387 | /// Base class of all SIMD vector types |
388 | class VectorType : public Type { |
389 | /// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the |
390 | /// minimum number of elements of type Ty contained within the vector, and |
391 | /// 'vscale x' indicates that the total element count is an integer multiple |
392 | /// of 'n', where the multiple is either guaranteed to be one, or is |
393 | /// statically unknown at compile time. |
394 | /// |
395 | /// If the multiple is known to be 1, then the extra term is discarded in |
396 | /// textual IR: |
397 | /// |
398 | /// <4 x i32> - a vector containing 4 i32s |
399 | /// <vscale x 4 x i32> - a vector containing an unknown integer multiple |
400 | /// of 4 i32s |
401 | |
402 | /// The element type of the vector. |
403 | Type *ContainedType; |
404 | |
405 | protected: |
406 | /// The element quantity of this vector. The meaning of this value depends |
407 | /// on the type of vector: |
408 | /// - For FixedVectorType = <ElementQuantity x ty>, there are |
409 | /// exactly ElementQuantity elements in this vector. |
410 | /// - For ScalableVectorType = <vscale x ElementQuantity x ty>, |
411 | /// there are vscale * ElementQuantity elements in this vector, where |
412 | /// vscale is a runtime-constant integer greater than 0. |
413 | const unsigned ElementQuantity; |
414 | |
415 | VectorType(Type *ElType, unsigned EQ, Type::TypeID TID); |
416 | |
417 | public: |
418 | VectorType(const VectorType &) = delete; |
419 | VectorType &operator=(const VectorType &) = delete; |
420 | |
421 | Type *getElementType() const { return ContainedType; } |
422 | |
423 | /// This static method is the primary way to construct an VectorType. |
424 | static VectorType *get(Type *ElementType, ElementCount EC); |
425 | |
426 | static VectorType *get(Type *ElementType, unsigned NumElements, |
427 | bool Scalable) { |
428 | return VectorType::get(ElementType, |
429 | ElementCount::get(NumElements, Scalable)); |
430 | } |
431 | |
432 | static VectorType *get(Type *ElementType, const VectorType *Other) { |
433 | return VectorType::get(ElementType, Other->getElementCount()); |
434 | } |
435 | |
436 | /// This static method gets a VectorType with the same number of elements as |
437 | /// the input type, and the element type is an integer type of the same width |
438 | /// as the input element type. |
439 | static VectorType *getInteger(VectorType *VTy) { |
440 | unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); |
441 | assert(EltBits && "Element size must be of a non-zero size")((void)0); |
442 | Type *EltTy = IntegerType::get(VTy->getContext(), EltBits); |
443 | return VectorType::get(EltTy, VTy->getElementCount()); |
444 | } |
445 | |
446 | /// This static method is like getInteger except that the element types are |
447 | /// twice as wide as the elements in the input type. |
448 | static VectorType *getExtendedElementVectorType(VectorType *VTy) { |
449 | assert(VTy->isIntOrIntVectorTy() && "VTy expected to be a vector of ints.")((void)0); |
450 | auto *EltTy = cast<IntegerType>(VTy->getElementType()); |
451 | return VectorType::get(EltTy->getExtendedType(), VTy->getElementCount()); |
452 | } |
453 | |
454 | // This static method gets a VectorType with the same number of elements as |
455 | // the input type, and the element type is an integer or float type which |
456 | // is half as wide as the elements in the input type. |
457 | static VectorType *getTruncatedElementVectorType(VectorType *VTy) { |
458 | Type *EltTy; |
459 | if (VTy->getElementType()->isFloatingPointTy()) { |
460 | switch(VTy->getElementType()->getTypeID()) { |
461 | case DoubleTyID: |
462 | EltTy = Type::getFloatTy(VTy->getContext()); |
463 | break; |
464 | case FloatTyID: |
465 | EltTy = Type::getHalfTy(VTy->getContext()); |
466 | break; |
467 | default: |
468 | llvm_unreachable("Cannot create narrower fp vector element type")__builtin_unreachable(); |
469 | } |
470 | } else { |
471 | unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); |
472 | assert((EltBits & 1) == 0 &&((void)0) |
473 | "Cannot truncate vector element with odd bit-width")((void)0); |
474 | EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); |
475 | } |
476 | return VectorType::get(EltTy, VTy->getElementCount()); |
477 | } |
478 | |
479 | // This static method returns a VectorType with a smaller number of elements |
480 | // of a larger type than the input element type. For example, a <16 x i8> |
481 | // subdivided twice would return <4 x i32> |
482 | static VectorType *getSubdividedVectorType(VectorType *VTy, int NumSubdivs) { |
483 | for (int i = 0; i < NumSubdivs; ++i) { |
484 | VTy = VectorType::getDoubleElementsVectorType(VTy); |
485 | VTy = VectorType::getTruncatedElementVectorType(VTy); |
486 | } |
487 | return VTy; |
488 | } |
489 | |
490 | /// This static method returns a VectorType with half as many elements as the |
491 | /// input type and the same element type. |
492 | static VectorType *getHalfElementsVectorType(VectorType *VTy) { |
493 | auto EltCnt = VTy->getElementCount(); |
494 | assert(EltCnt.isKnownEven() &&((void)0) |
495 | "Cannot halve vector with odd number of elements.")((void)0); |
496 | return VectorType::get(VTy->getElementType(), |
497 | EltCnt.divideCoefficientBy(2)); |
498 | } |
499 | |
500 | /// This static method returns a VectorType with twice as many elements as the |
501 | /// input type and the same element type. |
502 | static VectorType *getDoubleElementsVectorType(VectorType *VTy) { |
503 | auto EltCnt = VTy->getElementCount(); |
504 | assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX &&((void)0) |
505 | "Too many elements in vector")((void)0); |
506 | return VectorType::get(VTy->getElementType(), EltCnt * 2); |
507 | } |
508 | |
509 | /// Return true if the specified type is valid as a element type. |
510 | static bool isValidElementType(Type *ElemTy); |
511 | |
512 | /// Return an ElementCount instance to represent the (possibly scalable) |
513 | /// number of elements in the vector. |
514 | inline ElementCount getElementCount() const; |
515 | |
516 | /// Methods for support type inquiry through isa, cast, and dyn_cast. |
517 | static bool classof(const Type *T) { |
518 | return T->getTypeID() == FixedVectorTyID || |
519 | T->getTypeID() == ScalableVectorTyID; |
520 | } |
521 | }; |
522 | |
523 | /// Class to represent fixed width SIMD vectors |
524 | class FixedVectorType : public VectorType { |
525 | protected: |
526 | FixedVectorType(Type *ElTy, unsigned NumElts) |
527 | : VectorType(ElTy, NumElts, FixedVectorTyID) {} |
528 | |
529 | public: |
530 | static FixedVectorType *get(Type *ElementType, unsigned NumElts); |
531 | |
532 | static FixedVectorType *get(Type *ElementType, const FixedVectorType *FVTy) { |
533 | return get(ElementType, FVTy->getNumElements()); |
534 | } |
535 | |
536 | static FixedVectorType *getInteger(FixedVectorType *VTy) { |
537 | return cast<FixedVectorType>(VectorType::getInteger(VTy)); |
538 | } |
539 | |
540 | static FixedVectorType *getExtendedElementVectorType(FixedVectorType *VTy) { |
541 | return cast<FixedVectorType>(VectorType::getExtendedElementVectorType(VTy)); |
542 | } |
543 | |
544 | static FixedVectorType *getTruncatedElementVectorType(FixedVectorType *VTy) { |
545 | return cast<FixedVectorType>( |
546 | VectorType::getTruncatedElementVectorType(VTy)); |
547 | } |
548 | |
549 | static FixedVectorType *getSubdividedVectorType(FixedVectorType *VTy, |
550 | int NumSubdivs) { |
551 | return cast<FixedVectorType>( |
552 | VectorType::getSubdividedVectorType(VTy, NumSubdivs)); |
553 | } |
554 | |
555 | static FixedVectorType *getHalfElementsVectorType(FixedVectorType *VTy) { |
556 | return cast<FixedVectorType>(VectorType::getHalfElementsVectorType(VTy)); |
557 | } |
558 | |
559 | static FixedVectorType *getDoubleElementsVectorType(FixedVectorType *VTy) { |
560 | return cast<FixedVectorType>(VectorType::getDoubleElementsVectorType(VTy)); |
561 | } |
562 | |
563 | static bool classof(const Type *T) { |
564 | return T->getTypeID() == FixedVectorTyID; |
565 | } |
566 | |
567 | unsigned getNumElements() const { return ElementQuantity; } |
568 | }; |
569 | |
570 | /// Class to represent scalable SIMD vectors |
571 | class ScalableVectorType : public VectorType { |
572 | protected: |
573 | ScalableVectorType(Type *ElTy, unsigned MinNumElts) |
574 | : VectorType(ElTy, MinNumElts, ScalableVectorTyID) {} |
575 | |
576 | public: |
577 | static ScalableVectorType *get(Type *ElementType, unsigned MinNumElts); |
578 | |
579 | static ScalableVectorType *get(Type *ElementType, |
580 | const ScalableVectorType *SVTy) { |
581 | return get(ElementType, SVTy->getMinNumElements()); |
582 | } |
583 | |
584 | static ScalableVectorType *getInteger(ScalableVectorType *VTy) { |
585 | return cast<ScalableVectorType>(VectorType::getInteger(VTy)); |
586 | } |
587 | |
588 | static ScalableVectorType * |
589 | getExtendedElementVectorType(ScalableVectorType *VTy) { |
590 | return cast<ScalableVectorType>( |
591 | VectorType::getExtendedElementVectorType(VTy)); |
592 | } |
593 | |
594 | static ScalableVectorType * |
595 | getTruncatedElementVectorType(ScalableVectorType *VTy) { |
596 | return cast<ScalableVectorType>( |
597 | VectorType::getTruncatedElementVectorType(VTy)); |
598 | } |
599 | |
600 | static ScalableVectorType *getSubdividedVectorType(ScalableVectorType *VTy, |
601 | int NumSubdivs) { |
602 | return cast<ScalableVectorType>( |
603 | VectorType::getSubdividedVectorType(VTy, NumSubdivs)); |
604 | } |
605 | |
606 | static ScalableVectorType * |
607 | getHalfElementsVectorType(ScalableVectorType *VTy) { |
608 | return cast<ScalableVectorType>(VectorType::getHalfElementsVectorType(VTy)); |
609 | } |
610 | |
611 | static ScalableVectorType * |
612 | getDoubleElementsVectorType(ScalableVectorType *VTy) { |
613 | return cast<ScalableVectorType>( |
614 | VectorType::getDoubleElementsVectorType(VTy)); |
615 | } |
616 | |
617 | /// Get the minimum number of elements in this vector. The actual number of |
618 | /// elements in the vector is an integer multiple of this value. |
619 | uint64_t getMinNumElements() const { return ElementQuantity; } |
620 | |
621 | static bool classof(const Type *T) { |
622 | return T->getTypeID() == ScalableVectorTyID; |
623 | } |
624 | }; |
625 | |
626 | inline ElementCount VectorType::getElementCount() const { |
627 | return ElementCount::get(ElementQuantity, isa<ScalableVectorType>(this)); |
628 | } |
629 | |
630 | /// Class to represent pointers. |
631 | class PointerType : public Type { |
632 | explicit PointerType(Type *ElType, unsigned AddrSpace); |
633 | explicit PointerType(LLVMContext &C, unsigned AddrSpace); |
634 | |
635 | Type *PointeeTy; |
636 | |
637 | public: |
638 | PointerType(const PointerType &) = delete; |
639 | PointerType &operator=(const PointerType &) = delete; |
640 | |
641 | /// This constructs a pointer to an object of the specified type in a numbered |
642 | /// address space. |
643 | static PointerType *get(Type *ElementType, unsigned AddressSpace); |
644 | /// This constructs an opaque pointer to an object in a numbered address |
645 | /// space. |
646 | static PointerType *get(LLVMContext &C, unsigned AddressSpace); |
647 | |
648 | /// This constructs a pointer to an object of the specified type in the |
649 | /// default address space (address space zero). |
650 | static PointerType *getUnqual(Type *ElementType) { |
651 | return PointerType::get(ElementType, 0); |
652 | } |
653 | |
654 | /// This constructs an opaque pointer to an object in the |
655 | /// default address space (address space zero). |
656 | static PointerType *getUnqual(LLVMContext &C) { |
657 | return PointerType::get(C, 0); |
658 | } |
659 | |
660 | /// This constructs a pointer type with the same pointee type as input |
661 | /// PointerType (or opaque pointer is the input PointerType is opaque) and the |
662 | /// given address space. This is only useful during the opaque pointer |
663 | /// transition. |
664 | /// TODO: remove after opaque pointer transition is complete. |
665 | static PointerType *getWithSamePointeeType(PointerType *PT, |
666 | unsigned AddressSpace) { |
667 | if (PT->isOpaque()) |
668 | return get(PT->getContext(), AddressSpace); |
669 | return get(PT->getElementType(), AddressSpace); |
670 | } |
671 | |
672 | Type *getElementType() const { |
673 | assert(!isOpaque() && "Attempting to get element type of opaque pointer")((void)0); |
674 | return PointeeTy; |
675 | } |
676 | |
677 | bool isOpaque() const { return !PointeeTy; } |
678 | |
679 | /// Return true if the specified type is valid as a element type. |
680 | static bool isValidElementType(Type *ElemTy); |
681 | |
682 | /// Return true if we can load or store from a pointer to this type. |
683 | static bool isLoadableOrStorableType(Type *ElemTy); |
684 | |
685 | /// Return the address space of the Pointer type. |
686 | inline unsigned getAddressSpace() const { return getSubclassData(); } |
687 | |
688 | /// Return true if either this is an opaque pointer type or if this pointee |
689 | /// type matches Ty. Primarily used for checking if an instruction's pointer |
690 | /// operands are valid types. Will be useless after non-opaque pointers are |
691 | /// removed. |
692 | bool isOpaqueOrPointeeTypeMatches(Type *Ty) { |
693 | return isOpaque() || PointeeTy == Ty; |
694 | } |
695 | |
696 | /// Return true if both pointer types have the same element type. Two opaque |
697 | /// pointers are considered to have the same element type, while an opaque |
698 | /// and a non-opaque pointer have different element types. |
699 | /// TODO: Remove after opaque pointer transition is complete. |
700 | bool hasSameElementTypeAs(PointerType *Other) { |
701 | return PointeeTy == Other->PointeeTy; |
702 | } |
703 | |
704 | /// Implement support type inquiry through isa, cast, and dyn_cast. |
705 | static bool classof(const Type *T) { |
706 | return T->getTypeID() == PointerTyID; |
707 | } |
708 | }; |
709 | |
710 | Type *Type::getExtendedType() const { |
711 | assert(((void)0) |
712 | isIntOrIntVectorTy() &&((void)0) |
713 | "Original type expected to be a vector of integers or a scalar integer.")((void)0); |
714 | if (auto *VTy = dyn_cast<VectorType>(this)) |
715 | return VectorType::getExtendedElementVectorType( |
716 | const_cast<VectorType *>(VTy)); |
717 | return cast<IntegerType>(this)->getExtendedType(); |
718 | } |
719 | |
720 | Type *Type::getWithNewType(Type *EltTy) const { |
721 | if (auto *VTy = dyn_cast<VectorType>(this)) |
722 | return VectorType::get(EltTy, VTy->getElementCount()); |
723 | return EltTy; |
724 | } |
725 | |
726 | Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const { |
727 | assert(((void)0) |
728 | isIntOrIntVectorTy() &&((void)0) |
729 | "Original type expected to be a vector of integers or a scalar integer.")((void)0); |
730 | return getWithNewType(getIntNTy(getContext(), NewBitWidth)); |
731 | } |
732 | |
733 | unsigned Type::getPointerAddressSpace() const { |
734 | return cast<PointerType>(getScalarType())->getAddressSpace(); |
735 | } |
736 | |
737 | } // end namespace llvm |
738 | |
739 | #endif // LLVM_IR_DERIVEDTYPES_H |
1 | //===- TypeSize.h - Wrapper around type sizes -------------------*- C++ -*-===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // This file provides a struct that can be used to query the size of IR types | ||||||||
10 | // which may be scalable vectors. It provides convenience operators so that | ||||||||
11 | // it can be used in much the same way as a single scalar value. | ||||||||
12 | // | ||||||||
13 | //===----------------------------------------------------------------------===// | ||||||||
14 | |||||||||
15 | #ifndef LLVM_SUPPORT_TYPESIZE_H | ||||||||
16 | #define LLVM_SUPPORT_TYPESIZE_H | ||||||||
17 | |||||||||
18 | #include "llvm/ADT/ArrayRef.h" | ||||||||
19 | #include "llvm/Support/MathExtras.h" | ||||||||
20 | #include "llvm/Support/WithColor.h" | ||||||||
21 | |||||||||
22 | #include <algorithm> | ||||||||
23 | #include <array> | ||||||||
24 | #include <cassert> | ||||||||
25 | #include <cstdint> | ||||||||
26 | #include <type_traits> | ||||||||
27 | |||||||||
28 | namespace llvm { | ||||||||
29 | |||||||||
30 | /// Reports a diagnostic message to indicate an invalid size request has been | ||||||||
31 | /// done on a scalable vector. This function may not return. | ||||||||
32 | void reportInvalidSizeRequest(const char *Msg); | ||||||||
33 | |||||||||
34 | template <typename LeafTy> struct LinearPolyBaseTypeTraits {}; | ||||||||
35 | |||||||||
36 | //===----------------------------------------------------------------------===// | ||||||||
37 | // LinearPolyBase - a base class for linear polynomials with multiple | ||||||||
38 | // dimensions. This can e.g. be used to describe offsets that are have both a | ||||||||
39 | // fixed and scalable component. | ||||||||
40 | //===----------------------------------------------------------------------===// | ||||||||
41 | |||||||||
42 | /// LinearPolyBase describes a linear polynomial: | ||||||||
43 | /// c0 * scale0 + c1 * scale1 + ... + cK * scaleK | ||||||||
44 | /// where the scale is implicit, so only the coefficients are encoded. | ||||||||
45 | template <typename LeafTy> | ||||||||
46 | class LinearPolyBase { | ||||||||
47 | public: | ||||||||
48 | using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy; | ||||||||
49 | static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions; | ||||||||
50 | static_assert(Dimensions != std::numeric_limits<unsigned>::max(), | ||||||||
51 | "Dimensions out of range"); | ||||||||
52 | |||||||||
53 | private: | ||||||||
54 | std::array<ScalarTy, Dimensions> Coefficients; | ||||||||
55 | |||||||||
56 | protected: | ||||||||
57 | LinearPolyBase(ArrayRef<ScalarTy> Values) { | ||||||||
58 | std::copy(Values.begin(), Values.end(), Coefficients.begin()); | ||||||||
59 | } | ||||||||
60 | |||||||||
61 | public: | ||||||||
62 | friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) { | ||||||||
63 | for (unsigned I=0; I<Dimensions; ++I) | ||||||||
64 | LHS.Coefficients[I] += RHS.Coefficients[I]; | ||||||||
65 | return LHS; | ||||||||
66 | } | ||||||||
67 | |||||||||
68 | friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) { | ||||||||
69 | for (unsigned I=0; I<Dimensions; ++I) | ||||||||
70 | LHS.Coefficients[I] -= RHS.Coefficients[I]; | ||||||||
71 | return LHS; | ||||||||
72 | } | ||||||||
73 | |||||||||
74 | friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) { | ||||||||
75 | for (auto &C : LHS.Coefficients) | ||||||||
76 | C *= RHS; | ||||||||
77 | return LHS; | ||||||||
78 | } | ||||||||
79 | |||||||||
80 | friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) { | ||||||||
81 | LeafTy Copy = LHS; | ||||||||
82 | return Copy += RHS; | ||||||||
83 | } | ||||||||
84 | |||||||||
85 | friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) { | ||||||||
86 | LeafTy Copy = LHS; | ||||||||
87 | return Copy -= RHS; | ||||||||
88 | } | ||||||||
89 | |||||||||
90 | friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) { | ||||||||
91 | LeafTy Copy = LHS; | ||||||||
92 | return Copy *= RHS; | ||||||||
93 | } | ||||||||
94 | |||||||||
95 | template <typename U = ScalarTy> | ||||||||
96 | friend typename std::enable_if_t<std::is_signed<U>::value, LeafTy> | ||||||||
97 | operator-(const LeafTy &LHS) { | ||||||||
98 | LeafTy Copy = LHS; | ||||||||
99 | return Copy *= -1; | ||||||||
100 | } | ||||||||
101 | |||||||||
102 | bool operator==(const LinearPolyBase &RHS) const { | ||||||||
103 | return std::equal(Coefficients.begin(), Coefficients.end(), | ||||||||
104 | RHS.Coefficients.begin()); | ||||||||
105 | } | ||||||||
106 | |||||||||
107 | bool operator!=(const LinearPolyBase &RHS) const { | ||||||||
108 | return !(*this == RHS); | ||||||||
109 | } | ||||||||
110 | |||||||||
111 | bool isZero() const { | ||||||||
112 | return all_of(Coefficients, [](const ScalarTy &C) { return C == 0; }); | ||||||||
113 | } | ||||||||
114 | bool isNonZero() const { return !isZero(); } | ||||||||
115 | explicit operator bool() const { return isNonZero(); } | ||||||||
116 | |||||||||
117 | ScalarTy getValue(unsigned Dim) const { return Coefficients[Dim]; } | ||||||||
118 | }; | ||||||||
119 | |||||||||
120 | //===----------------------------------------------------------------------===// | ||||||||
121 | // StackOffset - Represent an offset with named fixed and scalable components. | ||||||||
122 | //===----------------------------------------------------------------------===// | ||||||||
123 | |||||||||
124 | class StackOffset; | ||||||||
125 | template <> struct LinearPolyBaseTypeTraits<StackOffset> { | ||||||||
126 | using ScalarTy = int64_t; | ||||||||
127 | static constexpr unsigned Dimensions = 2; | ||||||||
128 | }; | ||||||||
129 | |||||||||
130 | /// StackOffset is a class to represent an offset with 2 dimensions, | ||||||||
131 | /// named fixed and scalable, respectively. This class allows a value for both | ||||||||
132 | /// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed | ||||||||
133 | /// to represent stack offsets. | ||||||||
134 | class StackOffset : public LinearPolyBase<StackOffset> { | ||||||||
135 | protected: | ||||||||
136 | StackOffset(ScalarTy Fixed, ScalarTy Scalable) | ||||||||
137 | : LinearPolyBase<StackOffset>({Fixed, Scalable}) {} | ||||||||
138 | |||||||||
139 | public: | ||||||||
140 | StackOffset() : StackOffset({0, 0}) {} | ||||||||
141 | StackOffset(const LinearPolyBase<StackOffset> &Other) | ||||||||
142 | : LinearPolyBase<StackOffset>(Other) {} | ||||||||
143 | static StackOffset getFixed(ScalarTy Fixed) { return {Fixed, 0}; } | ||||||||
144 | static StackOffset getScalable(ScalarTy Scalable) { return {0, Scalable}; } | ||||||||
145 | static StackOffset get(ScalarTy Fixed, ScalarTy Scalable) { | ||||||||
146 | return {Fixed, Scalable}; | ||||||||
147 | } | ||||||||
148 | |||||||||
149 | ScalarTy getFixed() const { return this->getValue(0); } | ||||||||
150 | ScalarTy getScalable() const { return this->getValue(1); } | ||||||||
151 | }; | ||||||||
152 | |||||||||
153 | //===----------------------------------------------------------------------===// | ||||||||
154 | // UnivariateLinearPolyBase - a base class for linear polynomials with multiple | ||||||||
155 | // dimensions, but where only one dimension can be set at any time. | ||||||||
156 | // This can e.g. be used to describe sizes that are either fixed or scalable. | ||||||||
157 | //===----------------------------------------------------------------------===// | ||||||||
158 | |||||||||
159 | /// UnivariateLinearPolyBase is a base class for ElementCount and TypeSize. | ||||||||
160 | /// Like LinearPolyBase it tries to represent a linear polynomial | ||||||||
161 | /// where only one dimension can be set at any time, e.g. | ||||||||
162 | /// 0 * scale0 + 0 * scale1 + ... + cJ * scaleJ + ... + 0 * scaleK | ||||||||
163 | /// The dimension that is set is the univariate dimension. | ||||||||
164 | template <typename LeafTy> | ||||||||
165 | class UnivariateLinearPolyBase { | ||||||||
166 | public: | ||||||||
167 | using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy; | ||||||||
168 | static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions; | ||||||||
169 | static_assert(Dimensions != std::numeric_limits<unsigned>::max(), | ||||||||
170 | "Dimensions out of range"); | ||||||||
171 | |||||||||
172 | protected: | ||||||||
173 | ScalarTy Value; // The value at the univeriate dimension. | ||||||||
174 | unsigned UnivariateDim; // The univeriate dimension. | ||||||||
175 | |||||||||
176 | UnivariateLinearPolyBase(ScalarTy Val, unsigned UnivariateDim) | ||||||||
177 | : Value(Val), UnivariateDim(UnivariateDim) { | ||||||||
178 | assert(UnivariateDim < Dimensions && "Dimension out of range")((void)0); | ||||||||
179 | } | ||||||||
180 | |||||||||
181 | friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) { | ||||||||
182 | assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")((void)0); | ||||||||
183 | LHS.Value += RHS.Value; | ||||||||
184 | return LHS; | ||||||||
185 | } | ||||||||
186 | |||||||||
187 | friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) { | ||||||||
188 | assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions")((void)0); | ||||||||
189 | LHS.Value -= RHS.Value; | ||||||||
190 | return LHS; | ||||||||
191 | } | ||||||||
192 | |||||||||
193 | friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) { | ||||||||
194 | LHS.Value *= RHS; | ||||||||
195 | return LHS; | ||||||||
196 | } | ||||||||
197 | |||||||||
198 | friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) { | ||||||||
199 | LeafTy Copy = LHS; | ||||||||
200 | return Copy += RHS; | ||||||||
201 | } | ||||||||
202 | |||||||||
203 | friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) { | ||||||||
204 | LeafTy Copy = LHS; | ||||||||
205 | return Copy -= RHS; | ||||||||
206 | } | ||||||||
207 | |||||||||
208 | friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) { | ||||||||
209 | LeafTy Copy = LHS; | ||||||||
210 | return Copy *= RHS; | ||||||||
211 | } | ||||||||
212 | |||||||||
213 | template <typename U = ScalarTy> | ||||||||
214 | friend typename std::enable_if<std::is_signed<U>::value, LeafTy>::type | ||||||||
215 | operator-(const LeafTy &LHS) { | ||||||||
216 | LeafTy Copy = LHS; | ||||||||
217 | return Copy *= -1; | ||||||||
218 | } | ||||||||
219 | |||||||||
220 | public: | ||||||||
221 | bool operator==(const UnivariateLinearPolyBase &RHS) const { | ||||||||
222 | return Value == RHS.Value && UnivariateDim == RHS.UnivariateDim; | ||||||||
223 | } | ||||||||
224 | |||||||||
225 | bool operator!=(const UnivariateLinearPolyBase &RHS) const { | ||||||||
226 | return !(*this == RHS); | ||||||||
227 | } | ||||||||
228 | |||||||||
229 | bool isZero() const { return !Value; } | ||||||||
230 | bool isNonZero() const { return !isZero(); } | ||||||||
231 | explicit operator bool() const { return isNonZero(); } | ||||||||
232 | ScalarTy getValue() const { return Value; } | ||||||||
233 | ScalarTy getValue(unsigned Dim) const { | ||||||||
234 | return Dim == UnivariateDim ? Value : 0; | ||||||||
235 | } | ||||||||
236 | |||||||||
237 | /// Add \p RHS to the value at the univariate dimension. | ||||||||
238 | LeafTy getWithIncrement(ScalarTy RHS) const { | ||||||||
239 | return static_cast<LeafTy>( | ||||||||
240 | UnivariateLinearPolyBase(Value + RHS, UnivariateDim)); | ||||||||
241 | } | ||||||||
242 | |||||||||
243 | /// Subtract \p RHS from the value at the univariate dimension. | ||||||||
244 | LeafTy getWithDecrement(ScalarTy RHS) const { | ||||||||
245 | return static_cast<LeafTy>( | ||||||||
246 | UnivariateLinearPolyBase(Value - RHS, UnivariateDim)); | ||||||||
247 | } | ||||||||
248 | }; | ||||||||
249 | |||||||||
250 | |||||||||
251 | //===----------------------------------------------------------------------===// | ||||||||
252 | // LinearPolySize - base class for fixed- or scalable sizes. | ||||||||
253 | // ^ ^ | ||||||||
254 | // | | | ||||||||
255 | // | +----- ElementCount - Leaf class to represent an element count | ||||||||
256 | // | (vscale x unsigned) | ||||||||
257 | // | | ||||||||
258 | // +-------- TypeSize - Leaf class to represent a type size | ||||||||
259 | // (vscale x uint64_t) | ||||||||
260 | //===----------------------------------------------------------------------===// | ||||||||
261 | |||||||||
262 | /// LinearPolySize is a base class to represent sizes. It is either | ||||||||
263 | /// fixed-sized or it is scalable-sized, but it cannot be both. | ||||||||
264 | template <typename LeafTy> | ||||||||
265 | class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> { | ||||||||
266 | // Make the parent class a friend, so that it can access the protected | ||||||||
267 | // conversion/copy-constructor for UnivariatePolyBase<LeafTy> -> | ||||||||
268 | // LinearPolySize<LeafTy>. | ||||||||
269 | friend class UnivariateLinearPolyBase<LeafTy>; | ||||||||
270 | |||||||||
271 | public: | ||||||||
272 | using ScalarTy = typename UnivariateLinearPolyBase<LeafTy>::ScalarTy; | ||||||||
273 | enum Dims : unsigned { FixedDim = 0, ScalableDim = 1 }; | ||||||||
274 | |||||||||
275 | protected: | ||||||||
276 | LinearPolySize(ScalarTy MinVal, Dims D) | ||||||||
277 | : UnivariateLinearPolyBase<LeafTy>(MinVal, D) {} | ||||||||
278 | |||||||||
279 | LinearPolySize(const UnivariateLinearPolyBase<LeafTy> &V) | ||||||||
280 | : UnivariateLinearPolyBase<LeafTy>(V) {} | ||||||||
281 | |||||||||
282 | public: | ||||||||
283 | |||||||||
284 | static LeafTy getFixed(ScalarTy MinVal) { | ||||||||
285 | return static_cast<LeafTy>(LinearPolySize(MinVal, FixedDim)); | ||||||||
286 | } | ||||||||
287 | static LeafTy getScalable(ScalarTy MinVal) { | ||||||||
288 | return static_cast<LeafTy>(LinearPolySize(MinVal, ScalableDim)); | ||||||||
289 | } | ||||||||
290 | static LeafTy get(ScalarTy MinVal, bool Scalable) { | ||||||||
291 | return static_cast<LeafTy>( | ||||||||
292 | LinearPolySize(MinVal, Scalable
| ||||||||
293 | } | ||||||||
294 | static LeafTy getNull() { return get(0, false); } | ||||||||
295 | |||||||||
296 | /// Returns the minimum value this size can represent. | ||||||||
297 | ScalarTy getKnownMinValue() const { return this->getValue(); } | ||||||||
298 | /// Returns whether the size is scaled by a runtime quantity (vscale). | ||||||||
299 | bool isScalable() const { return this->UnivariateDim == ScalableDim; } | ||||||||
300 | /// A return value of true indicates we know at compile time that the number | ||||||||
301 | /// of elements (vscale * Min) is definitely even. However, returning false | ||||||||
302 | /// does not guarantee that the total number of elements is odd. | ||||||||
303 | bool isKnownEven() const { return (getKnownMinValue() & 0x1) == 0; } | ||||||||
304 | /// This function tells the caller whether the element count is known at | ||||||||
305 | /// compile time to be a multiple of the scalar value RHS. | ||||||||
306 | bool isKnownMultipleOf(ScalarTy RHS) const { | ||||||||
307 | return getKnownMinValue() % RHS == 0; | ||||||||
308 | } | ||||||||
309 | |||||||||
310 | // Return the minimum value with the assumption that the count is exact. | ||||||||
311 | // Use in places where a scalable count doesn't make sense (e.g. non-vector | ||||||||
312 | // types, or vectors in backends which don't support scalable vectors). | ||||||||
313 | ScalarTy getFixedValue() const { | ||||||||
314 | assert(!isScalable() &&((void)0) | ||||||||
315 | "Request for a fixed element count on a scalable object")((void)0); | ||||||||
316 | return getKnownMinValue(); | ||||||||
317 | } | ||||||||
318 | |||||||||
319 | // For some cases, size ordering between scalable and fixed size types cannot | ||||||||
320 | // be determined at compile time, so such comparisons aren't allowed. | ||||||||
321 | // | ||||||||
322 | // e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime | ||||||||
323 | // vscale >= 5, equal sized with a vscale of 4, and smaller with | ||||||||
324 | // a vscale <= 3. | ||||||||
325 | // | ||||||||
326 | // All the functions below make use of the fact vscale is always >= 1, which | ||||||||
327 | // means that <vscale x 4 x i32> is guaranteed to be >= <4 x i32>, etc. | ||||||||
328 | |||||||||
329 | static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS) { | ||||||||
330 | if (!LHS.isScalable() || RHS.isScalable()) | ||||||||
331 | return LHS.getKnownMinValue() < RHS.getKnownMinValue(); | ||||||||
332 | return false; | ||||||||
333 | } | ||||||||
334 | |||||||||
335 | static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS) { | ||||||||
336 | if (LHS.isScalable() || !RHS.isScalable()) | ||||||||
337 | return LHS.getKnownMinValue() > RHS.getKnownMinValue(); | ||||||||
338 | return false; | ||||||||
339 | } | ||||||||
340 | |||||||||
341 | static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS) { | ||||||||
342 | if (!LHS.isScalable() || RHS.isScalable()) | ||||||||
343 | return LHS.getKnownMinValue() <= RHS.getKnownMinValue(); | ||||||||
344 | return false; | ||||||||
345 | } | ||||||||
346 | |||||||||
347 | static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS) { | ||||||||
348 | if (LHS.isScalable() || !RHS.isScalable()) | ||||||||
349 | return LHS.getKnownMinValue() >= RHS.getKnownMinValue(); | ||||||||
350 | return false; | ||||||||
351 | } | ||||||||
352 | |||||||||
353 | /// We do not provide the '/' operator here because division for polynomial | ||||||||
354 | /// types does not work in the same way as for normal integer types. We can | ||||||||
355 | /// only divide the minimum value (or coefficient) by RHS, which is not the | ||||||||
356 | /// same as | ||||||||
357 | /// (Min * Vscale) / RHS | ||||||||
358 | /// The caller is recommended to use this function in combination with | ||||||||
359 | /// isKnownMultipleOf(RHS), which lets the caller know if it's possible to | ||||||||
360 | /// perform a lossless divide by RHS. | ||||||||
361 | LeafTy divideCoefficientBy(ScalarTy RHS) const { | ||||||||
362 | return static_cast<LeafTy>( | ||||||||
363 | LinearPolySize::get(getKnownMinValue() / RHS, isScalable())); | ||||||||
364 | } | ||||||||
365 | |||||||||
366 | LeafTy coefficientNextPowerOf2() const { | ||||||||
367 | return static_cast<LeafTy>(LinearPolySize::get( | ||||||||
368 | static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())), | ||||||||
369 | isScalable())); | ||||||||
370 | } | ||||||||
371 | |||||||||
372 | /// Printing function. | ||||||||
373 | void print(raw_ostream &OS) const { | ||||||||
374 | if (isScalable()) | ||||||||
375 | OS << "vscale x "; | ||||||||
376 | OS << getKnownMinValue(); | ||||||||
377 | } | ||||||||
378 | }; | ||||||||
379 | |||||||||
380 | class ElementCount; | ||||||||
381 | template <> struct LinearPolyBaseTypeTraits<ElementCount> { | ||||||||
382 | using ScalarTy = unsigned; | ||||||||
383 | static constexpr unsigned Dimensions = 2; | ||||||||
384 | }; | ||||||||
385 | |||||||||
386 | class ElementCount : public LinearPolySize<ElementCount> { | ||||||||
387 | public: | ||||||||
388 | ElementCount() : LinearPolySize(LinearPolySize::getNull()) {} | ||||||||
389 | |||||||||
390 | ElementCount(const LinearPolySize<ElementCount> &V) : LinearPolySize(V) {} | ||||||||
391 | |||||||||
392 | /// Counting predicates. | ||||||||
393 | /// | ||||||||
394 | ///@{ Number of elements.. | ||||||||
395 | /// Exactly one element. | ||||||||
396 | bool isScalar() const { return !isScalable() && getKnownMinValue() == 1; } | ||||||||
397 | /// One or more elements. | ||||||||
398 | bool isVector() const { | ||||||||
399 | return (isScalable() && getKnownMinValue() != 0) || getKnownMinValue() > 1; | ||||||||
400 | } | ||||||||
401 | ///@} | ||||||||
402 | }; | ||||||||
403 | |||||||||
404 | // This class is used to represent the size of types. If the type is of fixed | ||||||||
405 | class TypeSize; | ||||||||
406 | template <> struct LinearPolyBaseTypeTraits<TypeSize> { | ||||||||
407 | using ScalarTy = uint64_t; | ||||||||
408 | static constexpr unsigned Dimensions = 2; | ||||||||
409 | }; | ||||||||
410 | |||||||||
411 | // TODO: Most functionality in this class will gradually be phased out | ||||||||
412 | // so it will resemble LinearPolySize as much as possible. | ||||||||
413 | // | ||||||||
414 | // TypeSize is used to represent the size of types. If the type is of fixed | ||||||||
415 | // size, it will represent the exact size. If the type is a scalable vector, | ||||||||
416 | // it will represent the known minimum size. | ||||||||
417 | class TypeSize : public LinearPolySize<TypeSize> { | ||||||||
418 | public: | ||||||||
419 | TypeSize(const LinearPolySize<TypeSize> &V) : LinearPolySize(V) {} | ||||||||
420 | TypeSize(ScalarTy MinVal, bool IsScalable) | ||||||||
421 | : LinearPolySize(LinearPolySize::get(MinVal, IsScalable)) {} | ||||||||
422 | |||||||||
423 | static TypeSize Fixed(ScalarTy MinVal) { return TypeSize(MinVal, false); } | ||||||||
424 | static TypeSize Scalable(ScalarTy MinVal) { return TypeSize(MinVal, true); } | ||||||||
425 | |||||||||
426 | ScalarTy getFixedSize() const { return getFixedValue(); } | ||||||||
427 | ScalarTy getKnownMinSize() const { return getKnownMinValue(); } | ||||||||
428 | |||||||||
429 | // All code for this class below this point is needed because of the | ||||||||
430 | // temporary implicit conversion to uint64_t. The operator overloads are | ||||||||
431 | // needed because otherwise the conversion of the parent class | ||||||||
432 | // UnivariateLinearPolyBase -> TypeSize is ambiguous. | ||||||||
433 | // TODO: Remove the implicit conversion. | ||||||||
434 | |||||||||
435 | // Casts to a uint64_t if this is a fixed-width size. | ||||||||
436 | // | ||||||||
437 | // This interface is deprecated and will be removed in a future version | ||||||||
438 | // of LLVM in favour of upgrading uses that rely on this implicit conversion | ||||||||
439 | // to uint64_t. Calls to functions that return a TypeSize should use the | ||||||||
440 | // proper interfaces to TypeSize. | ||||||||
441 | // In practice this is mostly calls to MVT/EVT::getSizeInBits(). | ||||||||
442 | // | ||||||||
443 | // To determine how to upgrade the code: | ||||||||
444 | // | ||||||||
445 | // if (<algorithm works for both scalable and fixed-width vectors>) | ||||||||
446 | // use getKnownMinValue() | ||||||||
447 | // else if (<algorithm works only for fixed-width vectors>) { | ||||||||
448 | // if <algorithm can be adapted for both scalable and fixed-width vectors> | ||||||||
449 | // update the algorithm and use getKnownMinValue() | ||||||||
450 | // else | ||||||||
451 | // bail out early for scalable vectors and use getFixedValue() | ||||||||
452 | // } | ||||||||
453 | operator ScalarTy() const; | ||||||||
454 | |||||||||
455 | // Additional operators needed to avoid ambiguous parses | ||||||||
456 | // because of the implicit conversion hack. | ||||||||
457 | friend TypeSize operator*(const TypeSize &LHS, const int RHS) { | ||||||||
458 | return LHS * (ScalarTy)RHS; | ||||||||
459 | } | ||||||||
460 | friend TypeSize operator*(const TypeSize &LHS, const unsigned RHS) { | ||||||||
461 | return LHS * (ScalarTy)RHS; | ||||||||
462 | } | ||||||||
463 | friend TypeSize operator*(const TypeSize &LHS, const int64_t RHS) { | ||||||||
464 | return LHS * (ScalarTy)RHS; | ||||||||
465 | } | ||||||||
466 | friend TypeSize operator*(const int LHS, const TypeSize &RHS) { | ||||||||
467 | return RHS * LHS; | ||||||||
468 | } | ||||||||
469 | friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { | ||||||||
470 | return RHS * LHS; | ||||||||
471 | } | ||||||||
472 | friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { | ||||||||
473 | return RHS * LHS; | ||||||||
474 | } | ||||||||
475 | friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { | ||||||||
476 | return RHS * LHS; | ||||||||
477 | } | ||||||||
478 | }; | ||||||||
479 | |||||||||
480 | //===----------------------------------------------------------------------===// | ||||||||
481 | // Utilities | ||||||||
482 | //===----------------------------------------------------------------------===// | ||||||||
483 | |||||||||
484 | /// Returns a TypeSize with a known minimum size that is the next integer | ||||||||
485 | /// (mod 2**64) that is greater than or equal to \p Value and is a multiple | ||||||||
486 | /// of \p Align. \p Align must be non-zero. | ||||||||
487 | /// | ||||||||
488 | /// Similar to the alignTo functions in MathExtras.h | ||||||||
489 | inline TypeSize alignTo(TypeSize Size, uint64_t Align) { | ||||||||
490 | assert(Align != 0u && "Align must be non-zero")((void)0); | ||||||||
491 | return {(Size.getKnownMinValue() + Align - 1) / Align * Align, | ||||||||
492 | Size.isScalable()}; | ||||||||
493 | } | ||||||||
494 | |||||||||
495 | /// Stream operator function for `LinearPolySize`. | ||||||||
496 | template <typename LeafTy> | ||||||||
497 | inline raw_ostream &operator<<(raw_ostream &OS, | ||||||||
498 | const LinearPolySize<LeafTy> &PS) { | ||||||||
499 | PS.print(OS); | ||||||||
500 | return OS; | ||||||||
501 | } | ||||||||
502 | |||||||||
503 | template <typename T> struct DenseMapInfo; | ||||||||
504 | template <> struct DenseMapInfo<ElementCount> { | ||||||||
505 | static inline ElementCount getEmptyKey() { | ||||||||
506 | return ElementCount::getScalable(~0U); | ||||||||
507 | } | ||||||||
508 | static inline ElementCount getTombstoneKey() { | ||||||||
509 | return ElementCount::getFixed(~0U - 1); | ||||||||
510 | } | ||||||||
511 | static unsigned getHashValue(const ElementCount &EltCnt) { | ||||||||
512 | unsigned HashVal = EltCnt.getKnownMinValue() * 37U; | ||||||||
513 | if (EltCnt.isScalable()) | ||||||||
514 | return (HashVal - 1U); | ||||||||
515 | |||||||||
516 | return HashVal; | ||||||||
517 | } | ||||||||
518 | |||||||||
519 | static bool isEqual(const ElementCount &LHS, const ElementCount &RHS) { | ||||||||
520 | return LHS == RHS; | ||||||||
521 | } | ||||||||
522 | }; | ||||||||
523 | |||||||||
524 | } // end namespace llvm | ||||||||
525 | |||||||||
526 | #endif // LLVM_SUPPORT_TYPESIZE_H |
1 | //===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file provides a simple and efficient mechanism for performing general |
10 | // tree-based pattern matches on the LLVM IR. The power of these routines is |
11 | // that it allows you to write concise patterns that are expressive and easy to |
12 | // understand. The other major advantage of this is that it allows you to |
13 | // trivially capture/bind elements in the pattern to variables. For example, |
14 | // you can do something like this: |
15 | // |
16 | // Value *Exp = ... |
17 | // Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2) |
18 | // if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)), |
19 | // m_And(m_Value(Y), m_ConstantInt(C2))))) { |
20 | // ... Pattern is matched and variables are bound ... |
21 | // } |
22 | // |
23 | // This is primarily useful to things like the instruction combiner, but can |
24 | // also be useful for static analysis tools or code generators. |
25 | // |
26 | //===----------------------------------------------------------------------===// |
27 | |
28 | #ifndef LLVM_IR_PATTERNMATCH_H |
29 | #define LLVM_IR_PATTERNMATCH_H |
30 | |
31 | #include "llvm/ADT/APFloat.h" |
32 | #include "llvm/ADT/APInt.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/InstrTypes.h" |
37 | #include "llvm/IR/Instruction.h" |
38 | #include "llvm/IR/Instructions.h" |
39 | #include "llvm/IR/IntrinsicInst.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Value.h" |
43 | #include "llvm/Support/Casting.h" |
44 | #include <cstdint> |
45 | |
46 | namespace llvm { |
47 | namespace PatternMatch { |
48 | |
49 | template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) { |
50 | return const_cast<Pattern &>(P).match(V); |
51 | } |
52 | |
53 | template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) { |
54 | return const_cast<Pattern &>(P).match(Mask); |
55 | } |
56 | |
57 | template <typename SubPattern_t> struct OneUse_match { |
58 | SubPattern_t SubPattern; |
59 | |
60 | OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} |
61 | |
62 | template <typename OpTy> bool match(OpTy *V) { |
63 | return V->hasOneUse() && SubPattern.match(V); |
64 | } |
65 | }; |
66 | |
67 | template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) { |
68 | return SubPattern; |
69 | } |
70 | |
71 | template <typename Class> struct class_match { |
72 | template <typename ITy> bool match(ITy *V) { return isa<Class>(V); } |
73 | }; |
74 | |
75 | /// Match an arbitrary value and ignore it. |
76 | inline class_match<Value> m_Value() { return class_match<Value>(); } |
77 | |
78 | /// Match an arbitrary unary operation and ignore it. |
79 | inline class_match<UnaryOperator> m_UnOp() { |
80 | return class_match<UnaryOperator>(); |
81 | } |
82 | |
83 | /// Match an arbitrary binary operation and ignore it. |
84 | inline class_match<BinaryOperator> m_BinOp() { |
85 | return class_match<BinaryOperator>(); |
86 | } |
87 | |
88 | /// Matches any compare instruction and ignore it. |
89 | inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } |
90 | |
91 | struct undef_match { |
92 | static bool check(const Value *V) { |
93 | if (isa<UndefValue>(V)) |
94 | return true; |
95 | |
96 | const auto *CA = dyn_cast<ConstantAggregate>(V); |
97 | if (!CA) |
98 | return false; |
99 | |
100 | SmallPtrSet<const ConstantAggregate *, 8> Seen; |
101 | SmallVector<const ConstantAggregate *, 8> Worklist; |
102 | |
103 | // Either UndefValue, PoisonValue, or an aggregate that only contains |
104 | // these is accepted by matcher. |
105 | // CheckValue returns false if CA cannot satisfy this constraint. |
106 | auto CheckValue = [&](const ConstantAggregate *CA) { |
107 | for (const Value *Op : CA->operand_values()) { |
108 | if (isa<UndefValue>(Op)) |
109 | continue; |
110 | |
111 | const auto *CA = dyn_cast<ConstantAggregate>(Op); |
112 | if (!CA) |
113 | return false; |
114 | if (Seen.insert(CA).second) |
115 | Worklist.emplace_back(CA); |
116 | } |
117 | |
118 | return true; |
119 | }; |
120 | |
121 | if (!CheckValue(CA)) |
122 | return false; |
123 | |
124 | while (!Worklist.empty()) { |
125 | if (!CheckValue(Worklist.pop_back_val())) |
126 | return false; |
127 | } |
128 | return true; |
129 | } |
130 | template <typename ITy> bool match(ITy *V) { return check(V); } |
131 | }; |
132 | |
133 | /// Match an arbitrary undef constant. This matches poison as well. |
134 | /// If this is an aggregate and contains a non-aggregate element that is |
135 | /// neither undef nor poison, the aggregate is not matched. |
136 | inline auto m_Undef() { return undef_match(); } |
137 | |
138 | /// Match an arbitrary poison constant. |
139 | inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); } |
140 | |
141 | /// Match an arbitrary Constant and ignore it. |
142 | inline class_match<Constant> m_Constant() { return class_match<Constant>(); } |
143 | |
144 | /// Match an arbitrary ConstantInt and ignore it. |
145 | inline class_match<ConstantInt> m_ConstantInt() { |
146 | return class_match<ConstantInt>(); |
147 | } |
148 | |
149 | /// Match an arbitrary ConstantFP and ignore it. |
150 | inline class_match<ConstantFP> m_ConstantFP() { |
151 | return class_match<ConstantFP>(); |
152 | } |
153 | |
154 | /// Match an arbitrary ConstantExpr and ignore it. |
155 | inline class_match<ConstantExpr> m_ConstantExpr() { |
156 | return class_match<ConstantExpr>(); |
157 | } |
158 | |
159 | /// Match an arbitrary basic block value and ignore it. |
160 | inline class_match<BasicBlock> m_BasicBlock() { |
161 | return class_match<BasicBlock>(); |
162 | } |
163 | |
164 | /// Inverting matcher |
165 | template <typename Ty> struct match_unless { |
166 | Ty M; |
167 | |
168 | match_unless(const Ty &Matcher) : M(Matcher) {} |
169 | |
170 | template <typename ITy> bool match(ITy *V) { return !M.match(V); } |
171 | }; |
172 | |
173 | /// Match if the inner matcher does *NOT* match. |
174 | template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) { |
175 | return match_unless<Ty>(M); |
176 | } |
177 | |
178 | /// Matching combinators |
179 | template <typename LTy, typename RTy> struct match_combine_or { |
180 | LTy L; |
181 | RTy R; |
182 | |
183 | match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} |
184 | |
185 | template <typename ITy> bool match(ITy *V) { |
186 | if (L.match(V)) |
187 | return true; |
188 | if (R.match(V)) |
189 | return true; |
190 | return false; |
191 | } |
192 | }; |
193 | |
194 | template <typename LTy, typename RTy> struct match_combine_and { |
195 | LTy L; |
196 | RTy R; |
197 | |
198 | match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {} |
199 | |
200 | template <typename ITy> bool match(ITy *V) { |
201 | if (L.match(V)) |
202 | if (R.match(V)) |
203 | return true; |
204 | return false; |
205 | } |
206 | }; |
207 | |
208 | /// Combine two pattern matchers matching L || R |
209 | template <typename LTy, typename RTy> |
210 | inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) { |
211 | return match_combine_or<LTy, RTy>(L, R); |
212 | } |
213 | |
214 | /// Combine two pattern matchers matching L && R |
215 | template <typename LTy, typename RTy> |
216 | inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { |
217 | return match_combine_and<LTy, RTy>(L, R); |
218 | } |
219 | |
220 | struct apint_match { |
221 | const APInt *&Res; |
222 | bool AllowUndef; |
223 | |
224 | apint_match(const APInt *&Res, bool AllowUndef) |
225 | : Res(Res), AllowUndef(AllowUndef) {} |
226 | |
227 | template <typename ITy> bool match(ITy *V) { |
228 | if (auto *CI = dyn_cast<ConstantInt>(V)) { |
229 | Res = &CI->getValue(); |
230 | return true; |
231 | } |
232 | if (V->getType()->isVectorTy()) |
233 | if (const auto *C = dyn_cast<Constant>(V)) |
234 | if (auto *CI = dyn_cast_or_null<ConstantInt>( |
235 | C->getSplatValue(AllowUndef))) { |
236 | Res = &CI->getValue(); |
237 | return true; |
238 | } |
239 | return false; |
240 | } |
241 | }; |
242 | // Either constexpr if or renaming ConstantFP::getValueAPF to |
243 | // ConstantFP::getValue is needed to do it via single template |
244 | // function for both apint/apfloat. |
245 | struct apfloat_match { |
246 | const APFloat *&Res; |
247 | bool AllowUndef; |
248 | |
249 | apfloat_match(const APFloat *&Res, bool AllowUndef) |
250 | : Res(Res), AllowUndef(AllowUndef) {} |
251 | |
252 | template <typename ITy> bool match(ITy *V) { |
253 | if (auto *CI = dyn_cast<ConstantFP>(V)) { |
254 | Res = &CI->getValueAPF(); |
255 | return true; |
256 | } |
257 | if (V->getType()->isVectorTy()) |
258 | if (const auto *C = dyn_cast<Constant>(V)) |
259 | if (auto *CI = dyn_cast_or_null<ConstantFP>( |
260 | C->getSplatValue(AllowUndef))) { |
261 | Res = &CI->getValueAPF(); |
262 | return true; |
263 | } |
264 | return false; |
265 | } |
266 | }; |
267 | |
268 | /// Match a ConstantInt or splatted ConstantVector, binding the |
269 | /// specified pointer to the contained APInt. |
270 | inline apint_match m_APInt(const APInt *&Res) { |
271 | // Forbid undefs by default to maintain previous behavior. |
272 | return apint_match(Res, /* AllowUndef */ false); |
273 | } |
274 | |
275 | /// Match APInt while allowing undefs in splat vector constants. |
276 | inline apint_match m_APIntAllowUndef(const APInt *&Res) { |
277 | return apint_match(Res, /* AllowUndef */ true); |
278 | } |
279 | |
280 | /// Match APInt while forbidding undefs in splat vector constants. |
281 | inline apint_match m_APIntForbidUndef(const APInt *&Res) { |
282 | return apint_match(Res, /* AllowUndef */ false); |
283 | } |
284 | |
285 | /// Match a ConstantFP or splatted ConstantVector, binding the |
286 | /// specified pointer to the contained APFloat. |
287 | inline apfloat_match m_APFloat(const APFloat *&Res) { |
288 | // Forbid undefs by default to maintain previous behavior. |
289 | return apfloat_match(Res, /* AllowUndef */ false); |
290 | } |
291 | |
292 | /// Match APFloat while allowing undefs in splat vector constants. |
293 | inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) { |
294 | return apfloat_match(Res, /* AllowUndef */ true); |
295 | } |
296 | |
297 | /// Match APFloat while forbidding undefs in splat vector constants. |
298 | inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) { |
299 | return apfloat_match(Res, /* AllowUndef */ false); |
300 | } |
301 | |
302 | template <int64_t Val> struct constantint_match { |
303 | template <typename ITy> bool match(ITy *V) { |
304 | if (const auto *CI = dyn_cast<ConstantInt>(V)) { |
305 | const APInt &CIV = CI->getValue(); |
306 | if (Val >= 0) |
307 | return CIV == static_cast<uint64_t>(Val); |
308 | // If Val is negative, and CI is shorter than it, truncate to the right |
309 | // number of bits. If it is larger, then we have to sign extend. Just |
310 | // compare their negated values. |
311 | return -CIV == -Val; |
312 | } |
313 | return false; |
314 | } |
315 | }; |
316 | |
317 | /// Match a ConstantInt with a specific value. |
318 | template <int64_t Val> inline constantint_match<Val> m_ConstantInt() { |
319 | return constantint_match<Val>(); |
320 | } |
321 | |
322 | /// This helper class is used to match constant scalars, vector splats, |
323 | /// and fixed width vectors that satisfy a specified predicate. |
324 | /// For fixed width vector constants, undefined elements are ignored. |
325 | template <typename Predicate, typename ConstantVal> |
326 | struct cstval_pred_ty : public Predicate { |
327 | template <typename ITy> bool match(ITy *V) { |
328 | if (const auto *CV = dyn_cast<ConstantVal>(V)) |
329 | return this->isValue(CV->getValue()); |
330 | if (const auto *VTy = dyn_cast<VectorType>(V->getType())) { |
331 | if (const auto *C = dyn_cast<Constant>(V)) { |
332 | if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue())) |
333 | return this->isValue(CV->getValue()); |
334 | |
335 | // Number of elements of a scalable vector unknown at compile time |
336 | auto *FVTy = dyn_cast<FixedVectorType>(VTy); |
337 | if (!FVTy) |
338 | return false; |
339 | |
340 | // Non-splat vector constant: check each element for a match. |
341 | unsigned NumElts = FVTy->getNumElements(); |
342 | assert(NumElts != 0 && "Constant vector with no elements?")((void)0); |
343 | bool HasNonUndefElements = false; |
344 | for (unsigned i = 0; i != NumElts; ++i) { |
345 | Constant *Elt = C->getAggregateElement(i); |
346 | if (!Elt) |
347 | return false; |
348 | if (isa<UndefValue>(Elt)) |
349 | continue; |
350 | auto *CV = dyn_cast<ConstantVal>(Elt); |
351 | if (!CV || !this->isValue(CV->getValue())) |
352 | return false; |
353 | HasNonUndefElements = true; |
354 | } |
355 | return HasNonUndefElements; |
356 | } |
357 | } |
358 | return false; |
359 | } |
360 | }; |
361 | |
362 | /// specialization of cstval_pred_ty for ConstantInt |
363 | template <typename Predicate> |
364 | using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>; |
365 | |
366 | /// specialization of cstval_pred_ty for ConstantFP |
367 | template <typename Predicate> |
368 | using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>; |
369 | |
370 | /// This helper class is used to match scalar and vector constants that |
371 | /// satisfy a specified predicate, and bind them to an APInt. |
372 | template <typename Predicate> struct api_pred_ty : public Predicate { |
373 | const APInt *&Res; |
374 | |
375 | api_pred_ty(const APInt *&R) : Res(R) {} |
376 | |
377 | template <typename ITy> bool match(ITy *V) { |
378 | if (const auto *CI = dyn_cast<ConstantInt>(V)) |
379 | if (this->isValue(CI->getValue())) { |
380 | Res = &CI->getValue(); |
381 | return true; |
382 | } |
383 | if (V->getType()->isVectorTy()) |
384 | if (const auto *C = dyn_cast<Constant>(V)) |
385 | if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) |
386 | if (this->isValue(CI->getValue())) { |
387 | Res = &CI->getValue(); |
388 | return true; |
389 | } |
390 | |
391 | return false; |
392 | } |
393 | }; |
394 | |
395 | /// This helper class is used to match scalar and vector constants that |
396 | /// satisfy a specified predicate, and bind them to an APFloat. |
397 | /// Undefs are allowed in splat vector constants. |
398 | template <typename Predicate> struct apf_pred_ty : public Predicate { |
399 | const APFloat *&Res; |
400 | |
401 | apf_pred_ty(const APFloat *&R) : Res(R) {} |
402 | |
403 | template <typename ITy> bool match(ITy *V) { |
404 | if (const auto *CI = dyn_cast<ConstantFP>(V)) |
405 | if (this->isValue(CI->getValue())) { |
406 | Res = &CI->getValue(); |
407 | return true; |
408 | } |
409 | if (V->getType()->isVectorTy()) |
410 | if (const auto *C = dyn_cast<Constant>(V)) |
411 | if (auto *CI = dyn_cast_or_null<ConstantFP>( |
412 | C->getSplatValue(/* AllowUndef */ true))) |
413 | if (this->isValue(CI->getValue())) { |
414 | Res = &CI->getValue(); |
415 | return true; |
416 | } |
417 | |
418 | return false; |
419 | } |
420 | }; |
421 | |
422 | /////////////////////////////////////////////////////////////////////////////// |
423 | // |
424 | // Encapsulate constant value queries for use in templated predicate matchers. |
425 | // This allows checking if constants match using compound predicates and works |
426 | // with vector constants, possibly with relaxed constraints. For example, ignore |
427 | // undef values. |
428 | // |
429 | /////////////////////////////////////////////////////////////////////////////// |
430 | |
431 | struct is_any_apint { |
432 | bool isValue(const APInt &C) { return true; } |
433 | }; |
434 | /// Match an integer or vector with any integral constant. |
435 | /// For vectors, this includes constants with undefined elements. |
436 | inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { |
437 | return cst_pred_ty<is_any_apint>(); |
438 | } |
439 | |
440 | struct is_all_ones { |
441 | bool isValue(const APInt &C) { return C.isAllOnesValue(); } |
442 | }; |
443 | /// Match an integer or vector with all bits set. |
444 | /// For vectors, this includes constants with undefined elements. |
445 | inline cst_pred_ty<is_all_ones> m_AllOnes() { |
446 | return cst_pred_ty<is_all_ones>(); |
447 | } |
448 | |
449 | struct is_maxsignedvalue { |
450 | bool isValue(const APInt &C) { return C.isMaxSignedValue(); } |
451 | }; |
452 | /// Match an integer or vector with values having all bits except for the high |
453 | /// bit set (0x7f...). |
454 | /// For vectors, this includes constants with undefined elements. |
455 | inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() { |
456 | return cst_pred_ty<is_maxsignedvalue>(); |
457 | } |
458 | inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) { |
459 | return V; |
460 | } |
461 | |
462 | struct is_negative { |
463 | bool isValue(const APInt &C) { return C.isNegative(); } |
464 | }; |
465 | /// Match an integer or vector of negative values. |
466 | /// For vectors, this includes constants with undefined elements. |
467 | inline cst_pred_ty<is_negative> m_Negative() { |
468 | return cst_pred_ty<is_negative>(); |
469 | } |
470 | inline api_pred_ty<is_negative> m_Negative(const APInt *&V) { |
471 | return V; |
472 | } |
473 | |
474 | struct is_nonnegative { |
475 | bool isValue(const APInt &C) { return C.isNonNegative(); } |
476 | }; |
477 | /// Match an integer or vector of non-negative values. |
478 | /// For vectors, this includes constants with undefined elements. |
479 | inline cst_pred_ty<is_nonnegative> m_NonNegative() { |
480 | return cst_pred_ty<is_nonnegative>(); |
481 | } |
482 | inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) { |
483 | return V; |
484 | } |
485 | |
486 | struct is_strictlypositive { |
487 | bool isValue(const APInt &C) { return C.isStrictlyPositive(); } |
488 | }; |
489 | /// Match an integer or vector of strictly positive values. |
490 | /// For vectors, this includes constants with undefined elements. |
491 | inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() { |
492 | return cst_pred_ty<is_strictlypositive>(); |
493 | } |
494 | inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) { |
495 | return V; |
496 | } |
497 | |
498 | struct is_nonpositive { |
499 | bool isValue(const APInt &C) { return C.isNonPositive(); } |
500 | }; |
501 | /// Match an integer or vector of non-positive values. |
502 | /// For vectors, this includes constants with undefined elements. |
503 | inline cst_pred_ty<is_nonpositive> m_NonPositive() { |
504 | return cst_pred_ty<is_nonpositive>(); |
505 | } |
506 | inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } |
507 | |
508 | struct is_one { |
509 | bool isValue(const APInt &C) { return C.isOneValue(); } |
510 | }; |
511 | /// Match an integer 1 or a vector with all elements equal to 1. |
512 | /// For vectors, this includes constants with undefined elements. |
513 | inline cst_pred_ty<is_one> m_One() { |
514 | return cst_pred_ty<is_one>(); |
515 | } |
516 | |
517 | struct is_zero_int { |
518 | bool isValue(const APInt &C) { return C.isNullValue(); } |
519 | }; |
520 | /// Match an integer 0 or a vector with all elements equal to 0. |
521 | /// For vectors, this includes constants with undefined elements. |
522 | inline cst_pred_ty<is_zero_int> m_ZeroInt() { |
523 | return cst_pred_ty<is_zero_int>(); |
524 | } |
525 | |
526 | struct is_zero { |
527 | template <typename ITy> bool match(ITy *V) { |
528 | auto *C = dyn_cast<Constant>(V); |
529 | // FIXME: this should be able to do something for scalable vectors |
530 | return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C)); |
531 | } |
532 | }; |
533 | /// Match any null constant or a vector with all elements equal to 0. |
534 | /// For vectors, this includes constants with undefined elements. |
535 | inline is_zero m_Zero() { |
536 | return is_zero(); |
537 | } |
538 | |
539 | struct is_power2 { |
540 | bool isValue(const APInt &C) { return C.isPowerOf2(); } |
541 | }; |
542 | /// Match an integer or vector power-of-2. |
543 | /// For vectors, this includes constants with undefined elements. |
544 | inline cst_pred_ty<is_power2> m_Power2() { |
545 | return cst_pred_ty<is_power2>(); |
546 | } |
547 | inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { |
548 | return V; |
549 | } |
550 | |
551 | struct is_negated_power2 { |
552 | bool isValue(const APInt &C) { return (-C).isPowerOf2(); } |
553 | }; |
554 | /// Match a integer or vector negated power-of-2. |
555 | /// For vectors, this includes constants with undefined elements. |
556 | inline cst_pred_ty<is_negated_power2> m_NegatedPower2() { |
557 | return cst_pred_ty<is_negated_power2>(); |
558 | } |
559 | inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) { |
560 | return V; |
561 | } |
562 | |
563 | struct is_power2_or_zero { |
564 | bool isValue(const APInt &C) { return !C || C.isPowerOf2(); } |
565 | }; |
566 | /// Match an integer or vector of 0 or power-of-2 values. |
567 | /// For vectors, this includes constants with undefined elements. |
568 | inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() { |
569 | return cst_pred_ty<is_power2_or_zero>(); |
570 | } |
571 | inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) { |
572 | return V; |
573 | } |
574 | |
575 | struct is_sign_mask { |
576 | bool isValue(const APInt &C) { return C.isSignMask(); } |
577 | }; |
578 | /// Match an integer or vector with only the sign bit(s) set. |
579 | /// For vectors, this includes constants with undefined elements. |
580 | inline cst_pred_ty<is_sign_mask> m_SignMask() { |
581 | return cst_pred_ty<is_sign_mask>(); |
582 | } |
583 | |
584 | struct is_lowbit_mask { |
585 | bool isValue(const APInt &C) { return C.isMask(); } |
586 | }; |
587 | /// Match an integer or vector with only the low bit(s) set. |
588 | /// For vectors, this includes constants with undefined elements. |
589 | inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { |
590 | return cst_pred_ty<is_lowbit_mask>(); |
591 | } |
592 | |
593 | struct icmp_pred_with_threshold { |
594 | ICmpInst::Predicate Pred; |
595 | const APInt *Thr; |
596 | bool isValue(const APInt &C) { |
597 | switch (Pred) { |
598 | case ICmpInst::Predicate::ICMP_EQ: |
599 | return C.eq(*Thr); |
600 | case ICmpInst::Predicate::ICMP_NE: |
601 | return C.ne(*Thr); |
602 | case ICmpInst::Predicate::ICMP_UGT: |
603 | return C.ugt(*Thr); |
604 | case ICmpInst::Predicate::ICMP_UGE: |
605 | return C.uge(*Thr); |
606 | case ICmpInst::Predicate::ICMP_ULT: |
607 | return C.ult(*Thr); |
608 | case ICmpInst::Predicate::ICMP_ULE: |
609 | return C.ule(*Thr); |
610 | case ICmpInst::Predicate::ICMP_SGT: |
611 | return C.sgt(*Thr); |
612 | case ICmpInst::Predicate::ICMP_SGE: |
613 | return C.sge(*Thr); |
614 | case ICmpInst::Predicate::ICMP_SLT: |
615 | return C.slt(*Thr); |
616 | case ICmpInst::Predicate::ICMP_SLE: |
617 | return C.sle(*Thr); |
618 | default: |
619 | llvm_unreachable("Unhandled ICmp predicate")__builtin_unreachable(); |
620 | } |
621 | } |
622 | }; |
623 | /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) |
624 | /// to Threshold. For vectors, this includes constants with undefined elements. |
625 | inline cst_pred_ty<icmp_pred_with_threshold> |
626 | m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) { |
627 | cst_pred_ty<icmp_pred_with_threshold> P; |
628 | P.Pred = Predicate; |
629 | P.Thr = &Threshold; |
630 | return P; |
631 | } |
632 | |
633 | struct is_nan { |
634 | bool isValue(const APFloat &C) { return C.isNaN(); } |
635 | }; |
636 | /// Match an arbitrary NaN constant. This includes quiet and signalling nans. |
637 | /// For vectors, this includes constants with undefined elements. |
638 | inline cstfp_pred_ty<is_nan> m_NaN() { |
639 | return cstfp_pred_ty<is_nan>(); |
640 | } |
641 | |
642 | struct is_nonnan { |
643 | bool isValue(const APFloat &C) { return !C.isNaN(); } |
644 | }; |
645 | /// Match a non-NaN FP constant. |
646 | /// For vectors, this includes constants with undefined elements. |
647 | inline cstfp_pred_ty<is_nonnan> m_NonNaN() { |
648 | return cstfp_pred_ty<is_nonnan>(); |
649 | } |
650 | |
651 | struct is_inf { |
652 | bool isValue(const APFloat &C) { return C.isInfinity(); } |
653 | }; |
654 | /// Match a positive or negative infinity FP constant. |
655 | /// For vectors, this includes constants with undefined elements. |
656 | inline cstfp_pred_ty<is_inf> m_Inf() { |
657 | return cstfp_pred_ty<is_inf>(); |
658 | } |
659 | |
660 | struct is_noninf { |
661 | bool isValue(const APFloat &C) { return !C.isInfinity(); } |
662 | }; |
663 | /// Match a non-infinity FP constant, i.e. finite or NaN. |
664 | /// For vectors, this includes constants with undefined elements. |
665 | inline cstfp_pred_ty<is_noninf> m_NonInf() { |
666 | return cstfp_pred_ty<is_noninf>(); |
667 | } |
668 | |
669 | struct is_finite { |
670 | bool isValue(const APFloat &C) { return C.isFinite(); } |
671 | }; |
672 | /// Match a finite FP constant, i.e. not infinity or NaN. |
673 | /// For vectors, this includes constants with undefined elements. |
674 | inline cstfp_pred_ty<is_finite> m_Finite() { |
675 | return cstfp_pred_ty<is_finite>(); |
676 | } |
677 | inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; } |
678 | |
679 | struct is_finitenonzero { |
680 | bool isValue(const APFloat &C) { return C.isFiniteNonZero(); } |
681 | }; |
682 | /// Match a finite non-zero FP constant. |
683 | /// For vectors, this includes constants with undefined elements. |
684 | inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() { |
685 | return cstfp_pred_ty<is_finitenonzero>(); |
686 | } |
687 | inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) { |
688 | return V; |
689 | } |
690 | |
691 | struct is_any_zero_fp { |
692 | bool isValue(const APFloat &C) { return C.isZero(); } |
693 | }; |
694 | /// Match a floating-point negative zero or positive zero. |
695 | /// For vectors, this includes constants with undefined elements. |
696 | inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() { |
697 | return cstfp_pred_ty<is_any_zero_fp>(); |
698 | } |
699 | |
700 | struct is_pos_zero_fp { |
701 | bool isValue(const APFloat &C) { return C.isPosZero(); } |
702 | }; |
703 | /// Match a floating-point positive zero. |
704 | /// For vectors, this includes constants with undefined elements. |
705 | inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() { |
706 | return cstfp_pred_ty<is_pos_zero_fp>(); |
707 | } |
708 | |
709 | struct is_neg_zero_fp { |
710 | bool isValue(const APFloat &C) { return C.isNegZero(); } |
711 | }; |
712 | /// Match a floating-point negative zero. |
713 | /// For vectors, this includes constants with undefined elements. |
714 | inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { |
715 | return cstfp_pred_ty<is_neg_zero_fp>(); |
716 | } |
717 | |
718 | struct is_non_zero_fp { |
719 | bool isValue(const APFloat &C) { return C.isNonZero(); } |
720 | }; |
721 | /// Match a floating-point non-zero. |
722 | /// For vectors, this includes constants with undefined elements. |
723 | inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() { |
724 | return cstfp_pred_ty<is_non_zero_fp>(); |
725 | } |
726 | |
727 | /////////////////////////////////////////////////////////////////////////////// |
728 | |
729 | template <typename Class> struct bind_ty { |
730 | Class *&VR; |
731 | |
732 | bind_ty(Class *&V) : VR(V) {} |
733 | |
734 | template <typename ITy> bool match(ITy *V) { |
735 | if (auto *CV = dyn_cast<Class>(V)) { |
736 | VR = CV; |
737 | return true; |
738 | } |
739 | return false; |
740 | } |
741 | }; |
742 | |
743 | /// Match a value, capturing it if we match. |
744 | inline bind_ty<Value> m_Value(Value *&V) { return V; } |
745 | inline bind_ty<const Value> m_Value(const Value *&V) { return V; } |
746 | |
747 | /// Match an instruction, capturing it if we match. |
748 | inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; } |
749 | /// Match a unary operator, capturing it if we match. |
750 | inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; } |
751 | /// Match a binary operator, capturing it if we match. |
752 | inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } |
753 | /// Match a with overflow intrinsic, capturing it if we match. |
754 | inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } |
755 | inline bind_ty<const WithOverflowInst> |
756 | m_WithOverflowInst(const WithOverflowInst *&I) { |
757 | return I; |
758 | } |
759 | |
760 | /// Match a Constant, capturing the value if we match. |
761 | inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } |
762 | |
763 | /// Match a ConstantInt, capturing the value if we match. |
764 | inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } |
765 | |
766 | /// Match a ConstantFP, capturing the value if we match. |
767 | inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } |
768 | |
769 | /// Match a ConstantExpr, capturing the value if we match. |
770 | inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; } |
771 | |
772 | /// Match a basic block value, capturing it if we match. |
773 | inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } |
774 | inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { |
775 | return V; |
776 | } |
777 | |
778 | /// Match an arbitrary immediate Constant and ignore it. |
779 | inline match_combine_and<class_match<Constant>, |
780 | match_unless<class_match<ConstantExpr>>> |
781 | m_ImmConstant() { |
782 | return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr())); |
783 | } |
784 | |
785 | /// Match an immediate Constant, capturing the value if we match. |
786 | inline match_combine_and<bind_ty<Constant>, |
787 | match_unless<class_match<ConstantExpr>>> |
788 | m_ImmConstant(Constant *&C) { |
789 | return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); |
790 | } |
791 | |
792 | /// Match a specified Value*. |
793 | struct specificval_ty { |
794 | const Value *Val; |
795 | |
796 | specificval_ty(const Value *V) : Val(V) {} |
797 | |
798 | template <typename ITy> bool match(ITy *V) { return V == Val; } |
799 | }; |
800 | |
801 | /// Match if we have a specific specified value. |
802 | inline specificval_ty m_Specific(const Value *V) { return V; } |
803 | |
804 | /// Stores a reference to the Value *, not the Value * itself, |
805 | /// thus can be used in commutative matchers. |
806 | template <typename Class> struct deferredval_ty { |
807 | Class *const &Val; |
808 | |
809 | deferredval_ty(Class *const &V) : Val(V) {} |
810 | |
811 | template <typename ITy> bool match(ITy *const V) { return V == Val; } |
812 | }; |
813 | |
814 | /// Like m_Specific(), but works if the specific value to match is determined |
815 | /// as part of the same match() expression. For example: |
816 | /// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will |
817 | /// bind X before the pattern match starts. |
818 | /// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against |
819 | /// whichever value m_Value(X) populated. |
820 | inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; } |
821 | inline deferredval_ty<const Value> m_Deferred(const Value *const &V) { |
822 | return V; |
823 | } |
824 | |
825 | /// Match a specified floating point value or vector of all elements of |
826 | /// that value. |
827 | struct specific_fpval { |
828 | double Val; |
829 | |
830 | specific_fpval(double V) : Val(V) {} |
831 | |
832 | template <typename ITy> bool match(ITy *V) { |
833 | if (const auto *CFP = dyn_cast<ConstantFP>(V)) |
834 | return CFP->isExactlyValue(Val); |
835 | if (V->getType()->isVectorTy()) |
836 | if (const auto *C = dyn_cast<Constant>(V)) |
837 | if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) |
838 | return CFP->isExactlyValue(Val); |
839 | return false; |
840 | } |
841 | }; |
842 | |
843 | /// Match a specific floating point value or vector with all elements |
844 | /// equal to the value. |
845 | inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); } |
846 | |
847 | /// Match a float 1.0 or vector with all elements equal to 1.0. |
848 | inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } |
849 | |
850 | struct bind_const_intval_ty { |
851 | uint64_t &VR; |
852 | |
853 | bind_const_intval_ty(uint64_t &V) : VR(V) {} |
854 | |
855 | template <typename ITy> bool match(ITy *V) { |
856 | if (const auto *CV = dyn_cast<ConstantInt>(V)) |
857 | if (CV->getValue().ule(UINT64_MAX0xffffffffffffffffULL)) { |
858 | VR = CV->getZExtValue(); |
859 | return true; |
860 | } |
861 | return false; |
862 | } |
863 | }; |
864 | |
865 | /// Match a specified integer value or vector of all elements of that |
866 | /// value. |
867 | template <bool AllowUndefs> |
868 | struct specific_intval { |
869 | APInt Val; |
870 | |
871 | specific_intval(APInt V) : Val(std::move(V)) {} |
872 | |
873 | template <typename ITy> bool match(ITy *V) { |
874 | const auto *CI = dyn_cast<ConstantInt>(V); |
875 | if (!CI && V->getType()->isVectorTy()) |
876 | if (const auto *C = dyn_cast<Constant>(V)) |
877 | CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs)); |
878 | |
879 | return CI && APInt::isSameValue(CI->getValue(), Val); |
880 | } |
881 | }; |
882 | |
883 | /// Match a specific integer value or vector with all elements equal to |
884 | /// the value. |
885 | inline specific_intval<false> m_SpecificInt(APInt V) { |
886 | return specific_intval<false>(std::move(V)); |
887 | } |
888 | |
889 | inline specific_intval<false> m_SpecificInt(uint64_t V) { |
890 | return m_SpecificInt(APInt(64, V)); |
891 | } |
892 | |
893 | inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) { |
894 | return specific_intval<true>(std::move(V)); |
895 | } |
896 | |
897 | inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) { |
898 | return m_SpecificIntAllowUndef(APInt(64, V)); |
899 | } |
900 | |
901 | /// Match a ConstantInt and bind to its value. This does not match |
902 | /// ConstantInts wider than 64-bits. |
903 | inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } |
904 | |
905 | /// Match a specified basic block value. |
906 | struct specific_bbval { |
907 | BasicBlock *Val; |
908 | |
909 | specific_bbval(BasicBlock *Val) : Val(Val) {} |
910 | |
911 | template <typename ITy> bool match(ITy *V) { |
912 | const auto *BB = dyn_cast<BasicBlock>(V); |
913 | return BB && BB == Val; |
914 | } |
915 | }; |
916 | |
917 | /// Match a specific basic block value. |
918 | inline specific_bbval m_SpecificBB(BasicBlock *BB) { |
919 | return specific_bbval(BB); |
920 | } |
921 | |
922 | /// A commutative-friendly version of m_Specific(). |
923 | inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) { |
924 | return BB; |
925 | } |
926 | inline deferredval_ty<const BasicBlock> |
927 | m_Deferred(const BasicBlock *const &BB) { |
928 | return BB; |
929 | } |
930 | |
931 | //===----------------------------------------------------------------------===// |
932 | // Matcher for any binary operator. |
933 | // |
934 | template <typename LHS_t, typename RHS_t, bool Commutable = false> |
935 | struct AnyBinaryOp_match { |
936 | LHS_t L; |
937 | RHS_t R; |
938 | |
939 | // The evaluation order is always stable, regardless of Commutability. |
940 | // The LHS is always matched first. |
941 | AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
942 | |
943 | template <typename OpTy> bool match(OpTy *V) { |
944 | if (auto *I = dyn_cast<BinaryOperator>(V)) |
945 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || |
946 | (Commutable && L.match(I->getOperand(1)) && |
947 | R.match(I->getOperand(0))); |
948 | return false; |
949 | } |
950 | }; |
951 | |
952 | template <typename LHS, typename RHS> |
953 | inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) { |
954 | return AnyBinaryOp_match<LHS, RHS>(L, R); |
955 | } |
956 | |
957 | //===----------------------------------------------------------------------===// |
958 | // Matcher for any unary operator. |
959 | // TODO fuse unary, binary matcher into n-ary matcher |
960 | // |
961 | template <typename OP_t> struct AnyUnaryOp_match { |
962 | OP_t X; |
963 | |
964 | AnyUnaryOp_match(const OP_t &X) : X(X) {} |
965 | |
966 | template <typename OpTy> bool match(OpTy *V) { |
967 | if (auto *I = dyn_cast<UnaryOperator>(V)) |
968 | return X.match(I->getOperand(0)); |
969 | return false; |
970 | } |
971 | }; |
972 | |
973 | template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) { |
974 | return AnyUnaryOp_match<OP_t>(X); |
975 | } |
976 | |
977 | //===----------------------------------------------------------------------===// |
978 | // Matchers for specific binary operators. |
979 | // |
980 | |
981 | template <typename LHS_t, typename RHS_t, unsigned Opcode, |
982 | bool Commutable = false> |
983 | struct BinaryOp_match { |
984 | LHS_t L; |
985 | RHS_t R; |
986 | |
987 | // The evaluation order is always stable, regardless of Commutability. |
988 | // The LHS is always matched first. |
989 | BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
990 | |
991 | template <typename OpTy> bool match(OpTy *V) { |
992 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
993 | auto *I = cast<BinaryOperator>(V); |
994 | return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || |
995 | (Commutable && L.match(I->getOperand(1)) && |
996 | R.match(I->getOperand(0))); |
997 | } |
998 | if (auto *CE = dyn_cast<ConstantExpr>(V)) |
999 | return CE->getOpcode() == Opcode && |
1000 | ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || |
1001 | (Commutable && L.match(CE->getOperand(1)) && |
1002 | R.match(CE->getOperand(0)))); |
1003 | return false; |
1004 | } |
1005 | }; |
1006 | |
1007 | template <typename LHS, typename RHS> |
1008 | inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L, |
1009 | const RHS &R) { |
1010 | return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R); |
1011 | } |
1012 | |
1013 | template <typename LHS, typename RHS> |
1014 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L, |
1015 | const RHS &R) { |
1016 | return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R); |
1017 | } |
1018 | |
1019 | template <typename LHS, typename RHS> |
1020 | inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L, |
1021 | const RHS &R) { |
1022 | return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R); |
1023 | } |
1024 | |
1025 | template <typename LHS, typename RHS> |
1026 | inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L, |
1027 | const RHS &R) { |
1028 | return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R); |
1029 | } |
1030 | |
1031 | template <typename Op_t> struct FNeg_match { |
1032 | Op_t X; |
1033 | |
1034 | FNeg_match(const Op_t &Op) : X(Op) {} |
1035 | template <typename OpTy> bool match(OpTy *V) { |
1036 | auto *FPMO = dyn_cast<FPMathOperator>(V); |
1037 | if (!FPMO) return false; |
1038 | |
1039 | if (FPMO->getOpcode() == Instruction::FNeg) |
1040 | return X.match(FPMO->getOperand(0)); |
1041 | |
1042 | if (FPMO->getOpcode() == Instruction::FSub) { |
1043 | if (FPMO->hasNoSignedZeros()) { |
1044 | // With 'nsz', any zero goes. |
1045 | if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0))) |
1046 | return false; |
1047 | } else { |
1048 | // Without 'nsz', we need fsub -0.0, X exactly. |
1049 | if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0))) |
1050 | return false; |
1051 | } |
1052 | |
1053 | return X.match(FPMO->getOperand(1)); |
1054 | } |
1055 | |
1056 | return false; |
1057 | } |
1058 | }; |
1059 | |
1060 | /// Match 'fneg X' as 'fsub -0.0, X'. |
1061 | template <typename OpTy> |
1062 | inline FNeg_match<OpTy> |
1063 | m_FNeg(const OpTy &X) { |
1064 | return FNeg_match<OpTy>(X); |
1065 | } |
1066 | |
1067 | /// Match 'fneg X' as 'fsub +-0.0, X'. |
1068 | template <typename RHS> |
1069 | inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub> |
1070 | m_FNegNSZ(const RHS &X) { |
1071 | return m_FSub(m_AnyZeroFP(), X); |
1072 | } |
1073 | |
1074 | template <typename LHS, typename RHS> |
1075 | inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L, |
1076 | const RHS &R) { |
1077 | return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R); |
1078 | } |
1079 | |
1080 | template <typename LHS, typename RHS> |
1081 | inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L, |
1082 | const RHS &R) { |
1083 | return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R); |
1084 | } |
1085 | |
1086 | template <typename LHS, typename RHS> |
1087 | inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L, |
1088 | const RHS &R) { |
1089 | return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R); |
1090 | } |
1091 | |
1092 | template <typename LHS, typename RHS> |
1093 | inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L, |
1094 | const RHS &R) { |
1095 | return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R); |
1096 | } |
1097 | |
1098 | template <typename LHS, typename RHS> |
1099 | inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L, |
1100 | const RHS &R) { |
1101 | return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R); |
1102 | } |
1103 | |
1104 | template <typename LHS, typename RHS> |
1105 | inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L, |
1106 | const RHS &R) { |
1107 | return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R); |
1108 | } |
1109 | |
1110 | template <typename LHS, typename RHS> |
1111 | inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L, |
1112 | const RHS &R) { |
1113 | return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R); |
1114 | } |
1115 | |
1116 | template <typename LHS, typename RHS> |
1117 | inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L, |
1118 | const RHS &R) { |
1119 | return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R); |
1120 | } |
1121 | |
1122 | template <typename LHS, typename RHS> |
1123 | inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L, |
1124 | const RHS &R) { |
1125 | return BinaryOp_match<LHS, RHS, Instruction::And>(L, R); |
1126 | } |
1127 | |
1128 | template <typename LHS, typename RHS> |
1129 | inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L, |
1130 | const RHS &R) { |
1131 | return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R); |
1132 | } |
1133 | |
1134 | template <typename LHS, typename RHS> |
1135 | inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L, |
1136 | const RHS &R) { |
1137 | return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R); |
1138 | } |
1139 | |
1140 | template <typename LHS, typename RHS> |
1141 | inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L, |
1142 | const RHS &R) { |
1143 | return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R); |
1144 | } |
1145 | |
1146 | template <typename LHS, typename RHS> |
1147 | inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L, |
1148 | const RHS &R) { |
1149 | return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R); |
1150 | } |
1151 | |
1152 | template <typename LHS, typename RHS> |
1153 | inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L, |
1154 | const RHS &R) { |
1155 | return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R); |
1156 | } |
1157 | |
1158 | template <typename LHS_t, typename RHS_t, unsigned Opcode, |
1159 | unsigned WrapFlags = 0> |
1160 | struct OverflowingBinaryOp_match { |
1161 | LHS_t L; |
1162 | RHS_t R; |
1163 | |
1164 | OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) |
1165 | : L(LHS), R(RHS) {} |
1166 | |
1167 | template <typename OpTy> bool match(OpTy *V) { |
1168 | if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) { |
1169 | if (Op->getOpcode() != Opcode) |
1170 | return false; |
1171 | if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) && |
1172 | !Op->hasNoUnsignedWrap()) |
1173 | return false; |
1174 | if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) && |
1175 | !Op->hasNoSignedWrap()) |
1176 | return false; |
1177 | return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1)); |
1178 | } |
1179 | return false; |
1180 | } |
1181 | }; |
1182 | |
1183 | template <typename LHS, typename RHS> |
1184 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1185 | OverflowingBinaryOperator::NoSignedWrap> |
1186 | m_NSWAdd(const LHS &L, const RHS &R) { |
1187 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1188 | OverflowingBinaryOperator::NoSignedWrap>( |
1189 | L, R); |
1190 | } |
1191 | template <typename LHS, typename RHS> |
1192 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1193 | OverflowingBinaryOperator::NoSignedWrap> |
1194 | m_NSWSub(const LHS &L, const RHS &R) { |
1195 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1196 | OverflowingBinaryOperator::NoSignedWrap>( |
1197 | L, R); |
1198 | } |
1199 | template <typename LHS, typename RHS> |
1200 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1201 | OverflowingBinaryOperator::NoSignedWrap> |
1202 | m_NSWMul(const LHS &L, const RHS &R) { |
1203 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1204 | OverflowingBinaryOperator::NoSignedWrap>( |
1205 | L, R); |
1206 | } |
1207 | template <typename LHS, typename RHS> |
1208 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1209 | OverflowingBinaryOperator::NoSignedWrap> |
1210 | m_NSWShl(const LHS &L, const RHS &R) { |
1211 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1212 | OverflowingBinaryOperator::NoSignedWrap>( |
1213 | L, R); |
1214 | } |
1215 | |
1216 | template <typename LHS, typename RHS> |
1217 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1218 | OverflowingBinaryOperator::NoUnsignedWrap> |
1219 | m_NUWAdd(const LHS &L, const RHS &R) { |
1220 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add, |
1221 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1222 | L, R); |
1223 | } |
1224 | template <typename LHS, typename RHS> |
1225 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1226 | OverflowingBinaryOperator::NoUnsignedWrap> |
1227 | m_NUWSub(const LHS &L, const RHS &R) { |
1228 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub, |
1229 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1230 | L, R); |
1231 | } |
1232 | template <typename LHS, typename RHS> |
1233 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1234 | OverflowingBinaryOperator::NoUnsignedWrap> |
1235 | m_NUWMul(const LHS &L, const RHS &R) { |
1236 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul, |
1237 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1238 | L, R); |
1239 | } |
1240 | template <typename LHS, typename RHS> |
1241 | inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1242 | OverflowingBinaryOperator::NoUnsignedWrap> |
1243 | m_NUWShl(const LHS &L, const RHS &R) { |
1244 | return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl, |
1245 | OverflowingBinaryOperator::NoUnsignedWrap>( |
1246 | L, R); |
1247 | } |
1248 | |
1249 | //===----------------------------------------------------------------------===// |
1250 | // Class that matches a group of binary opcodes. |
1251 | // |
1252 | template <typename LHS_t, typename RHS_t, typename Predicate> |
1253 | struct BinOpPred_match : Predicate { |
1254 | LHS_t L; |
1255 | RHS_t R; |
1256 | |
1257 | BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
1258 | |
1259 | template <typename OpTy> bool match(OpTy *V) { |
1260 | if (auto *I = dyn_cast<Instruction>(V)) |
1261 | return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) && |
1262 | R.match(I->getOperand(1)); |
1263 | if (auto *CE = dyn_cast<ConstantExpr>(V)) |
1264 | return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) && |
1265 | R.match(CE->getOperand(1)); |
1266 | return false; |
1267 | } |
1268 | }; |
1269 | |
1270 | struct is_shift_op { |
1271 | bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); } |
1272 | }; |
1273 | |
1274 | struct is_right_shift_op { |
1275 | bool isOpType(unsigned Opcode) { |
1276 | return Opcode == Instruction::LShr || Opcode == Instruction::AShr; |
1277 | } |
1278 | }; |
1279 | |
1280 | struct is_logical_shift_op { |
1281 | bool isOpType(unsigned Opcode) { |
1282 | return Opcode == Instruction::LShr || Opcode == Instruction::Shl; |
1283 | } |
1284 | }; |
1285 | |
1286 | struct is_bitwiselogic_op { |
1287 | bool isOpType(unsigned Opcode) { |
1288 | return Instruction::isBitwiseLogicOp(Opcode); |
1289 | } |
1290 | }; |
1291 | |
1292 | struct is_idiv_op { |
1293 | bool isOpType(unsigned Opcode) { |
1294 | return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; |
1295 | } |
1296 | }; |
1297 | |
1298 | struct is_irem_op { |
1299 | bool isOpType(unsigned Opcode) { |
1300 | return Opcode == Instruction::SRem || Opcode == Instruction::URem; |
1301 | } |
1302 | }; |
1303 | |
1304 | /// Matches shift operations. |
1305 | template <typename LHS, typename RHS> |
1306 | inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L, |
1307 | const RHS &R) { |
1308 | return BinOpPred_match<LHS, RHS, is_shift_op>(L, R); |
1309 | } |
1310 | |
1311 | /// Matches logical shift operations. |
1312 | template <typename LHS, typename RHS> |
1313 | inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L, |
1314 | const RHS &R) { |
1315 | return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R); |
1316 | } |
1317 | |
1318 | /// Matches logical shift operations. |
1319 | template <typename LHS, typename RHS> |
1320 | inline BinOpPred_match<LHS, RHS, is_logical_shift_op> |
1321 | m_LogicalShift(const LHS &L, const RHS &R) { |
1322 | return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R); |
1323 | } |
1324 | |
1325 | /// Matches bitwise logic operations. |
1326 | template <typename LHS, typename RHS> |
1327 | inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op> |
1328 | m_BitwiseLogic(const LHS &L, const RHS &R) { |
1329 | return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R); |
1330 | } |
1331 | |
1332 | /// Matches integer division operations. |
1333 | template <typename LHS, typename RHS> |
1334 | inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L, |
1335 | const RHS &R) { |
1336 | return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R); |
1337 | } |
1338 | |
1339 | /// Matches integer remainder operations. |
1340 | template <typename LHS, typename RHS> |
1341 | inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L, |
1342 | const RHS &R) { |
1343 | return BinOpPred_match<LHS, RHS, is_irem_op>(L, R); |
1344 | } |
1345 | |
1346 | //===----------------------------------------------------------------------===// |
1347 | // Class that matches exact binary ops. |
1348 | // |
1349 | template <typename SubPattern_t> struct Exact_match { |
1350 | SubPattern_t SubPattern; |
1351 | |
1352 | Exact_match(const SubPattern_t &SP) : SubPattern(SP) {} |
1353 | |
1354 | template <typename OpTy> bool match(OpTy *V) { |
1355 | if (auto *PEO = dyn_cast<PossiblyExactOperator>(V)) |
1356 | return PEO->isExact() && SubPattern.match(V); |
1357 | return false; |
1358 | } |
1359 | }; |
1360 | |
1361 | template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) { |
1362 | return SubPattern; |
1363 | } |
1364 | |
1365 | //===----------------------------------------------------------------------===// |
1366 | // Matchers for CmpInst classes |
1367 | // |
1368 | |
1369 | template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy, |
1370 | bool Commutable = false> |
1371 | struct CmpClass_match { |
1372 | PredicateTy &Predicate; |
1373 | LHS_t L; |
1374 | RHS_t R; |
1375 | |
1376 | // The evaluation order is always stable, regardless of Commutability. |
1377 | // The LHS is always matched first. |
1378 | CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS) |
1379 | : Predicate(Pred), L(LHS), R(RHS) {} |
1380 | |
1381 | template <typename OpTy> bool match(OpTy *V) { |
1382 | if (auto *I = dyn_cast<Class>(V)) { |
1383 | if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { |
1384 | Predicate = I->getPredicate(); |
1385 | return true; |
1386 | } else if (Commutable && L.match(I->getOperand(1)) && |
1387 | R.match(I->getOperand(0))) { |
1388 | Predicate = I->getSwappedPredicate(); |
1389 | return true; |
1390 | } |
1391 | } |
1392 | return false; |
1393 | } |
1394 | }; |
1395 | |
1396 | template <typename LHS, typename RHS> |
1397 | inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate> |
1398 | m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1399 | return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R); |
1400 | } |
1401 | |
1402 | template <typename LHS, typename RHS> |
1403 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate> |
1404 | m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1405 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R); |
1406 | } |
1407 | |
1408 | template <typename LHS, typename RHS> |
1409 | inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate> |
1410 | m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
1411 | return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R); |
1412 | } |
1413 | |
1414 | //===----------------------------------------------------------------------===// |
1415 | // Matchers for instructions with a given opcode and number of operands. |
1416 | // |
1417 | |
1418 | /// Matches instructions with Opcode and three operands. |
1419 | template <typename T0, unsigned Opcode> struct OneOps_match { |
1420 | T0 Op1; |
1421 | |
1422 | OneOps_match(const T0 &Op1) : Op1(Op1) {} |
1423 | |
1424 | template <typename OpTy> bool match(OpTy *V) { |
1425 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1426 | auto *I = cast<Instruction>(V); |
1427 | return Op1.match(I->getOperand(0)); |
1428 | } |
1429 | return false; |
1430 | } |
1431 | }; |
1432 | |
1433 | /// Matches instructions with Opcode and three operands. |
1434 | template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match { |
1435 | T0 Op1; |
1436 | T1 Op2; |
1437 | |
1438 | TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {} |
1439 | |
1440 | template <typename OpTy> bool match(OpTy *V) { |
1441 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1442 | auto *I = cast<Instruction>(V); |
1443 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)); |
1444 | } |
1445 | return false; |
1446 | } |
1447 | }; |
1448 | |
1449 | /// Matches instructions with Opcode and three operands. |
1450 | template <typename T0, typename T1, typename T2, unsigned Opcode> |
1451 | struct ThreeOps_match { |
1452 | T0 Op1; |
1453 | T1 Op2; |
1454 | T2 Op3; |
1455 | |
1456 | ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3) |
1457 | : Op1(Op1), Op2(Op2), Op3(Op3) {} |
1458 | |
1459 | template <typename OpTy> bool match(OpTy *V) { |
1460 | if (V->getValueID() == Value::InstructionVal + Opcode) { |
1461 | auto *I = cast<Instruction>(V); |
1462 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && |
1463 | Op3.match(I->getOperand(2)); |
1464 | } |
1465 | return false; |
1466 | } |
1467 | }; |
1468 | |
1469 | /// Matches SelectInst. |
1470 | template <typename Cond, typename LHS, typename RHS> |
1471 | inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select> |
1472 | m_Select(const Cond &C, const LHS &L, const RHS &R) { |
1473 | return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R); |
1474 | } |
1475 | |
1476 | /// This matches a select of two constants, e.g.: |
1477 | /// m_SelectCst<-1, 0>(m_Value(V)) |
1478 | template <int64_t L, int64_t R, typename Cond> |
1479 | inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>, |
1480 | Instruction::Select> |
1481 | m_SelectCst(const Cond &C) { |
1482 | return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>()); |
1483 | } |
1484 | |
1485 | /// Matches FreezeInst. |
1486 | template <typename OpTy> |
1487 | inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) { |
1488 | return OneOps_match<OpTy, Instruction::Freeze>(Op); |
1489 | } |
1490 | |
1491 | /// Matches InsertElementInst. |
1492 | template <typename Val_t, typename Elt_t, typename Idx_t> |
1493 | inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement> |
1494 | m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) { |
1495 | return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>( |
1496 | Val, Elt, Idx); |
1497 | } |
1498 | |
1499 | /// Matches ExtractElementInst. |
1500 | template <typename Val_t, typename Idx_t> |
1501 | inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement> |
1502 | m_ExtractElt(const Val_t &Val, const Idx_t &Idx) { |
1503 | return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx); |
1504 | } |
1505 | |
1506 | /// Matches shuffle. |
1507 | template <typename T0, typename T1, typename T2> struct Shuffle_match { |
1508 | T0 Op1; |
1509 | T1 Op2; |
1510 | T2 Mask; |
1511 | |
1512 | Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask) |
1513 | : Op1(Op1), Op2(Op2), Mask(Mask) {} |
1514 | |
1515 | template <typename OpTy> bool match(OpTy *V) { |
1516 | if (auto *I = dyn_cast<ShuffleVectorInst>(V)) { |
1517 | return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) && |
1518 | Mask.match(I->getShuffleMask()); |
1519 | } |
1520 | return false; |
1521 | } |
1522 | }; |
1523 | |
1524 | struct m_Mask { |
1525 | ArrayRef<int> &MaskRef; |
1526 | m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} |
1527 | bool match(ArrayRef<int> Mask) { |
1528 | MaskRef = Mask; |
1529 | return true; |
1530 | } |
1531 | }; |
1532 | |
1533 | struct m_ZeroMask { |
1534 | bool match(ArrayRef<int> Mask) { |
1535 | return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; }); |
1536 | } |
1537 | }; |
1538 | |
1539 | struct m_SpecificMask { |
1540 | ArrayRef<int> &MaskRef; |
1541 | m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {} |
1542 | bool match(ArrayRef<int> Mask) { return MaskRef == Mask; } |
1543 | }; |
1544 | |
1545 | struct m_SplatOrUndefMask { |
1546 | int &SplatIndex; |
1547 | m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {} |
1548 | bool match(ArrayRef<int> Mask) { |
1549 | auto First = find_if(Mask, [](int Elem) { return Elem != -1; }); |
1550 | if (First == Mask.end()) |
1551 | return false; |
1552 | SplatIndex = *First; |
1553 | return all_of(Mask, |
1554 | [First](int Elem) { return Elem == *First || Elem == -1; }); |
1555 | } |
1556 | }; |
1557 | |
1558 | /// Matches ShuffleVectorInst independently of mask value. |
1559 | template <typename V1_t, typename V2_t> |
1560 | inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector> |
1561 | m_Shuffle(const V1_t &v1, const V2_t &v2) { |
1562 | return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2); |
1563 | } |
1564 | |
1565 | template <typename V1_t, typename V2_t, typename Mask_t> |
1566 | inline Shuffle_match<V1_t, V2_t, Mask_t> |
1567 | m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) { |
1568 | return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask); |
1569 | } |
1570 | |
1571 | /// Matches LoadInst. |
1572 | template <typename OpTy> |
1573 | inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) { |
1574 | return OneOps_match<OpTy, Instruction::Load>(Op); |
1575 | } |
1576 | |
1577 | /// Matches StoreInst. |
1578 | template <typename ValueOpTy, typename PointerOpTy> |
1579 | inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store> |
1580 | m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) { |
1581 | return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp, |
1582 | PointerOp); |
1583 | } |
1584 | |
1585 | //===----------------------------------------------------------------------===// |
1586 | // Matchers for CastInst classes |
1587 | // |
1588 | |
1589 | template <typename Op_t, unsigned Opcode> struct CastClass_match { |
1590 | Op_t Op; |
1591 | |
1592 | CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {} |
1593 | |
1594 | template <typename OpTy> bool match(OpTy *V) { |
1595 | if (auto *O = dyn_cast<Operator>(V)) |
1596 | return O->getOpcode() == Opcode && Op.match(O->getOperand(0)); |
1597 | return false; |
1598 | } |
1599 | }; |
1600 | |
1601 | /// Matches BitCast. |
1602 | template <typename OpTy> |
1603 | inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) { |
1604 | return CastClass_match<OpTy, Instruction::BitCast>(Op); |
1605 | } |
1606 | |
1607 | /// Matches PtrToInt. |
1608 | template <typename OpTy> |
1609 | inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { |
1610 | return CastClass_match<OpTy, Instruction::PtrToInt>(Op); |
1611 | } |
1612 | |
1613 | /// Matches IntToPtr. |
1614 | template <typename OpTy> |
1615 | inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) { |
1616 | return CastClass_match<OpTy, Instruction::IntToPtr>(Op); |
1617 | } |
1618 | |
1619 | /// Matches Trunc. |
1620 | template <typename OpTy> |
1621 | inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { |
1622 | return CastClass_match<OpTy, Instruction::Trunc>(Op); |
1623 | } |
1624 | |
1625 | template <typename OpTy> |
1626 | inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy> |
1627 | m_TruncOrSelf(const OpTy &Op) { |
1628 | return m_CombineOr(m_Trunc(Op), Op); |
1629 | } |
1630 | |
1631 | /// Matches SExt. |
1632 | template <typename OpTy> |
1633 | inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) { |
1634 | return CastClass_match<OpTy, Instruction::SExt>(Op); |
1635 | } |
1636 | |
1637 | /// Matches ZExt. |
1638 | template <typename OpTy> |
1639 | inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) { |
1640 | return CastClass_match<OpTy, Instruction::ZExt>(Op); |
1641 | } |
1642 | |
1643 | template <typename OpTy> |
1644 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy> |
1645 | m_ZExtOrSelf(const OpTy &Op) { |
1646 | return m_CombineOr(m_ZExt(Op), Op); |
1647 | } |
1648 | |
1649 | template <typename OpTy> |
1650 | inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy> |
1651 | m_SExtOrSelf(const OpTy &Op) { |
1652 | return m_CombineOr(m_SExt(Op), Op); |
1653 | } |
1654 | |
1655 | template <typename OpTy> |
1656 | inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, |
1657 | CastClass_match<OpTy, Instruction::SExt>> |
1658 | m_ZExtOrSExt(const OpTy &Op) { |
1659 | return m_CombineOr(m_ZExt(Op), m_SExt(Op)); |
1660 | } |
1661 | |
1662 | template <typename OpTy> |
1663 | inline match_combine_or< |
1664 | match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, |
1665 | CastClass_match<OpTy, Instruction::SExt>>, |
1666 | OpTy> |
1667 | m_ZExtOrSExtOrSelf(const OpTy &Op) { |
1668 | return m_CombineOr(m_ZExtOrSExt(Op), Op); |
1669 | } |
1670 | |
1671 | template <typename OpTy> |
1672 | inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) { |
1673 | return CastClass_match<OpTy, Instruction::UIToFP>(Op); |
1674 | } |
1675 | |
1676 | template <typename OpTy> |
1677 | inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) { |
1678 | return CastClass_match<OpTy, Instruction::SIToFP>(Op); |
1679 | } |
1680 | |
1681 | template <typename OpTy> |
1682 | inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) { |
1683 | return CastClass_match<OpTy, Instruction::FPToUI>(Op); |
1684 | } |
1685 | |
1686 | template <typename OpTy> |
1687 | inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) { |
1688 | return CastClass_match<OpTy, Instruction::FPToSI>(Op); |
1689 | } |
1690 | |
1691 | template <typename OpTy> |
1692 | inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) { |
1693 | return CastClass_match<OpTy, Instruction::FPTrunc>(Op); |
1694 | } |
1695 | |
1696 | template <typename OpTy> |
1697 | inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) { |
1698 | return CastClass_match<OpTy, Instruction::FPExt>(Op); |
1699 | } |
1700 | |
1701 | //===----------------------------------------------------------------------===// |
1702 | // Matchers for control flow. |
1703 | // |
1704 | |
1705 | struct br_match { |
1706 | BasicBlock *&Succ; |
1707 | |
1708 | br_match(BasicBlock *&Succ) : Succ(Succ) {} |
1709 | |
1710 | template <typename OpTy> bool match(OpTy *V) { |
1711 | if (auto *BI = dyn_cast<BranchInst>(V)) |
1712 | if (BI->isUnconditional()) { |
1713 | Succ = BI->getSuccessor(0); |
1714 | return true; |
1715 | } |
1716 | return false; |
1717 | } |
1718 | }; |
1719 | |
1720 | inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } |
1721 | |
1722 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> |
1723 | struct brc_match { |
1724 | Cond_t Cond; |
1725 | TrueBlock_t T; |
1726 | FalseBlock_t F; |
1727 | |
1728 | brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f) |
1729 | : Cond(C), T(t), F(f) {} |
1730 | |
1731 | template <typename OpTy> bool match(OpTy *V) { |
1732 | if (auto *BI = dyn_cast<BranchInst>(V)) |
1733 | if (BI->isConditional() && Cond.match(BI->getCondition())) |
1734 | return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1)); |
1735 | return false; |
1736 | } |
1737 | }; |
1738 | |
1739 | template <typename Cond_t> |
1740 | inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>> |
1741 | m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) { |
1742 | return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>( |
1743 | C, m_BasicBlock(T), m_BasicBlock(F)); |
1744 | } |
1745 | |
1746 | template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t> |
1747 | inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t> |
1748 | m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) { |
1749 | return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F); |
1750 | } |
1751 | |
1752 | //===----------------------------------------------------------------------===// |
1753 | // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y). |
1754 | // |
1755 | |
1756 | template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t, |
1757 | bool Commutable = false> |
1758 | struct MaxMin_match { |
1759 | using PredType = Pred_t; |
1760 | LHS_t L; |
1761 | RHS_t R; |
1762 | |
1763 | // The evaluation order is always stable, regardless of Commutability. |
1764 | // The LHS is always matched first. |
1765 | MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} |
1766 | |
1767 | template <typename OpTy> bool match(OpTy *V) { |
1768 | if (auto *II = dyn_cast<IntrinsicInst>(V)) { |
1769 | Intrinsic::ID IID = II->getIntrinsicID(); |
1770 | if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) || |
1771 | (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) || |
1772 | (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) || |
1773 | (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) { |
1774 | Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); |
1775 | return (L.match(LHS) && R.match(RHS)) || |
1776 | (Commutable && L.match(RHS) && R.match(LHS)); |
1777 | } |
1778 | } |
1779 | // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". |
1780 | auto *SI = dyn_cast<SelectInst>(V); |
1781 | if (!SI) |
1782 | return false; |
1783 | auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition()); |
1784 | if (!Cmp) |
1785 | return false; |
1786 | // At this point we have a select conditioned on a comparison. Check that |
1787 | // it is the values returned by the select that are being compared. |
1788 | auto *TrueVal = SI->getTrueValue(); |
1789 | auto *FalseVal = SI->getFalseValue(); |
1790 | auto *LHS = Cmp->getOperand(0); |
1791 | auto *RHS = Cmp->getOperand(1); |
1792 | if ((TrueVal != LHS || FalseVal != RHS) && |
1793 | (TrueVal != RHS || FalseVal != LHS)) |
1794 | return false; |
1795 | typename CmpInst_t::Predicate Pred = |
1796 | LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); |
1797 | // Does "(x pred y) ? x : y" represent the desired max/min operation? |
1798 | if (!Pred_t::match(Pred)) |
1799 | return false; |
1800 | // It does! Bind the operands. |
1801 | return (L.match(LHS) && R.match(RHS)) || |
1802 | (Commutable && L.match(RHS) && R.match(LHS)); |
1803 | } |
1804 | }; |
1805 | |
1806 | /// Helper class for identifying signed max predicates. |
1807 | struct smax_pred_ty { |
1808 | static bool match(ICmpInst::Predicate Pred) { |
1809 | return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; |
1810 | } |
1811 | }; |
1812 | |
1813 | /// Helper class for identifying signed min predicates. |
1814 | struct smin_pred_ty { |
1815 | static bool match(ICmpInst::Predicate Pred) { |
1816 | return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE; |
1817 | } |
1818 | }; |
1819 | |
1820 | /// Helper class for identifying unsigned max predicates. |
1821 | struct umax_pred_ty { |
1822 | static bool match(ICmpInst::Predicate Pred) { |
1823 | return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE; |
1824 | } |
1825 | }; |
1826 | |
1827 | /// Helper class for identifying unsigned min predicates. |
1828 | struct umin_pred_ty { |
1829 | static bool match(ICmpInst::Predicate Pred) { |
1830 | return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE; |
1831 | } |
1832 | }; |
1833 | |
1834 | /// Helper class for identifying ordered max predicates. |
1835 | struct ofmax_pred_ty { |
1836 | static bool match(FCmpInst::Predicate Pred) { |
1837 | return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE; |
1838 | } |
1839 | }; |
1840 | |
1841 | /// Helper class for identifying ordered min predicates. |
1842 | struct ofmin_pred_ty { |
1843 | static bool match(FCmpInst::Predicate Pred) { |
1844 | return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE; |
1845 | } |
1846 | }; |
1847 | |
1848 | /// Helper class for identifying unordered max predicates. |
1849 | struct ufmax_pred_ty { |
1850 | static bool match(FCmpInst::Predicate Pred) { |
1851 | return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE; |
1852 | } |
1853 | }; |
1854 | |
1855 | /// Helper class for identifying unordered min predicates. |
1856 | struct ufmin_pred_ty { |
1857 | static bool match(FCmpInst::Predicate Pred) { |
1858 | return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE; |
1859 | } |
1860 | }; |
1861 | |
1862 | template <typename LHS, typename RHS> |
1863 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L, |
1864 | const RHS &R) { |
1865 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R); |
1866 | } |
1867 | |
1868 | template <typename LHS, typename RHS> |
1869 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L, |
1870 | const RHS &R) { |
1871 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R); |
1872 | } |
1873 | |
1874 | template <typename LHS, typename RHS> |
1875 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L, |
1876 | const RHS &R) { |
1877 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R); |
1878 | } |
1879 | |
1880 | template <typename LHS, typename RHS> |
1881 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, |
1882 | const RHS &R) { |
1883 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); |
1884 | } |
1885 | |
1886 | template <typename LHS, typename RHS> |
1887 | inline match_combine_or< |
1888 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>, |
1889 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>, |
1890 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>, |
1891 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>> |
1892 | m_MaxOrMin(const LHS &L, const RHS &R) { |
1893 | return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)), |
1894 | m_CombineOr(m_UMax(L, R), m_UMin(L, R))); |
1895 | } |
1896 | |
1897 | /// Match an 'ordered' floating point maximum function. |
1898 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1899 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1900 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' |
1901 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1902 | /// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate. |
1903 | /// |
1904 | /// max(L, R) iff L and R are not NaN |
1905 | /// m_OrdFMax(L, R) = R iff L or R are NaN |
1906 | template <typename LHS, typename RHS> |
1907 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L, |
1908 | const RHS &R) { |
1909 | return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R); |
1910 | } |
1911 | |
1912 | /// Match an 'ordered' floating point minimum function. |
1913 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1914 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1915 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' |
1916 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1917 | /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. |
1918 | /// |
1919 | /// min(L, R) iff L and R are not NaN |
1920 | /// m_OrdFMin(L, R) = R iff L or R are NaN |
1921 | template <typename LHS, typename RHS> |
1922 | inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L, |
1923 | const RHS &R) { |
1924 | return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R); |
1925 | } |
1926 | |
1927 | /// Match an 'unordered' floating point maximum function. |
1928 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1929 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1930 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum' |
1931 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1932 | /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. |
1933 | /// |
1934 | /// max(L, R) iff L and R are not NaN |
1935 | /// m_UnordFMax(L, R) = L iff L or R are NaN |
1936 | template <typename LHS, typename RHS> |
1937 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty> |
1938 | m_UnordFMax(const LHS &L, const RHS &R) { |
1939 | return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R); |
1940 | } |
1941 | |
1942 | /// Match an 'unordered' floating point minimum function. |
1943 | /// Floating point has one special value 'NaN'. Therefore, there is no total |
1944 | /// order. However, if we can ignore the 'NaN' value (for example, because of a |
1945 | /// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' |
1946 | /// semantics. In the presence of 'NaN' we have to preserve the original |
1947 | /// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. |
1948 | /// |
1949 | /// min(L, R) iff L and R are not NaN |
1950 | /// m_UnordFMin(L, R) = L iff L or R are NaN |
1951 | template <typename LHS, typename RHS> |
1952 | inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty> |
1953 | m_UnordFMin(const LHS &L, const RHS &R) { |
1954 | return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R); |
1955 | } |
1956 | |
1957 | //===----------------------------------------------------------------------===// |
1958 | // Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b |
1959 | // Note that S might be matched to other instructions than AddInst. |
1960 | // |
1961 | |
1962 | template <typename LHS_t, typename RHS_t, typename Sum_t> |
1963 | struct UAddWithOverflow_match { |
1964 | LHS_t L; |
1965 | RHS_t R; |
1966 | Sum_t S; |
1967 | |
1968 | UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) |
1969 | : L(L), R(R), S(S) {} |
1970 | |
1971 | template <typename OpTy> bool match(OpTy *V) { |
1972 | Value *ICmpLHS, *ICmpRHS; |
1973 | ICmpInst::Predicate Pred; |
1974 | if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) |
1975 | return false; |
1976 | |
1977 | Value *AddLHS, *AddRHS; |
1978 | auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); |
1979 | |
1980 | // (a + b) u< a, (a + b) u< b |
1981 | if (Pred == ICmpInst::ICMP_ULT) |
1982 | if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) |
1983 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); |
1984 | |
1985 | // a >u (a + b), b >u (a + b) |
1986 | if (Pred == ICmpInst::ICMP_UGT) |
1987 | if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) |
1988 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); |
1989 | |
1990 | Value *Op1; |
1991 | auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); |
1992 | // (a ^ -1) <u b |
1993 | if (Pred == ICmpInst::ICMP_ULT) { |
1994 | if (XorExpr.match(ICmpLHS)) |
1995 | return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS); |
1996 | } |
1997 | // b > u (a ^ -1) |
1998 | if (Pred == ICmpInst::ICMP_UGT) { |
1999 | if (XorExpr.match(ICmpRHS)) |
2000 | return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); |
2001 | } |
2002 | |
2003 | // Match special-case for increment-by-1. |
2004 | if (Pred == ICmpInst::ICMP_EQ) { |
2005 | // (a + 1) == 0 |
2006 | // (1 + a) == 0 |
2007 | if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) && |
2008 | (m_One().match(AddLHS) || m_One().match(AddRHS))) |
2009 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); |
2010 | // 0 == (a + 1) |
2011 | // 0 == (1 + a) |
2012 | if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) && |
2013 | (m_One().match(AddLHS) || m_One().match(AddRHS))) |
2014 | return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); |
2015 | } |
2016 | |
2017 | return false; |
2018 | } |
2019 | }; |
2020 | |
2021 | /// Match an icmp instruction checking for unsigned overflow on addition. |
2022 | /// |
2023 | /// S is matched to the addition whose result is being checked for overflow, and |
2024 | /// L and R are matched to the LHS and RHS of S. |
2025 | template <typename LHS_t, typename RHS_t, typename Sum_t> |
2026 | UAddWithOverflow_match<LHS_t, RHS_t, Sum_t> |
2027 | m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { |
2028 | return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S); |
2029 | } |
2030 | |
2031 | template <typename Opnd_t> struct Argument_match { |
2032 | unsigned OpI; |
2033 | Opnd_t Val; |
2034 | |
2035 | Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} |
2036 | |
2037 | template <typename OpTy> bool match(OpTy *V) { |
2038 | // FIXME: Should likely be switched to use `CallBase`. |
2039 | if (const auto *CI = dyn_cast<CallInst>(V)) |
2040 | return Val.match(CI->getArgOperand(OpI)); |
2041 | return false; |
2042 | } |
2043 | }; |
2044 | |
2045 | /// Match an argument. |
2046 | template <unsigned OpI, typename Opnd_t> |
2047 | inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) { |
2048 | return Argument_match<Opnd_t>(OpI, Op); |
2049 | } |
2050 | |
2051 | /// Intrinsic matchers. |
2052 | struct IntrinsicID_match { |
2053 | unsigned ID; |
2054 | |
2055 | IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} |
2056 | |
2057 | template <typename OpTy> bool match(OpTy *V) { |
2058 | if (const auto *CI = dyn_cast<CallInst>(V)) |
2059 | if (const auto *F = CI->getCalledFunction()) |
2060 | return F->getIntrinsicID() == ID; |
2061 | return false; |
2062 | } |
2063 | }; |
2064 | |
2065 | /// Intrinsic matches are combinations of ID matchers, and argument |
2066 | /// matchers. Higher arity matcher are defined recursively in terms of and-ing |
2067 | /// them with lower arity matchers. Here's some convenient typedefs for up to |
2068 | /// several arguments, and more can be added as needed |
2069 | template <typename T0 = void, typename T1 = void, typename T2 = void, |
2070 | typename T3 = void, typename T4 = void, typename T5 = void, |
2071 | typename T6 = void, typename T7 = void, typename T8 = void, |
2072 | typename T9 = void, typename T10 = void> |
2073 | struct m_Intrinsic_Ty; |
2074 | template <typename T0> struct m_Intrinsic_Ty<T0> { |
2075 | using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>; |
2076 | }; |
2077 | template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> { |
2078 | using Ty = |
2079 | match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>; |
2080 | }; |
2081 | template <typename T0, typename T1, typename T2> |
2082 | struct m_Intrinsic_Ty<T0, T1, T2> { |
2083 | using Ty = |
2084 | match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty, |
2085 | Argument_match<T2>>; |
2086 | }; |
2087 | template <typename T0, typename T1, typename T2, typename T3> |
2088 | struct m_Intrinsic_Ty<T0, T1, T2, T3> { |
2089 | using Ty = |
2090 | match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty, |
2091 | Argument_match<T3>>; |
2092 | }; |
2093 | |
2094 | template <typename T0, typename T1, typename T2, typename T3, typename T4> |
2095 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> { |
2096 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty, |
2097 | Argument_match<T4>>; |
2098 | }; |
2099 | |
2100 | template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> |
2101 | struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> { |
2102 | using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty, |
2103 | Argument_match<T5>>; |
2104 | }; |
2105 | |
2106 | /// Match intrinsic calls like this: |
2107 | /// m_Intrinsic<Intrinsic::fabs>(m_Value(X)) |
2108 | template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() { |
2109 | return IntrinsicID_match(IntrID); |
2110 | } |
2111 | |
2112 | /// Matches MaskedLoad Intrinsic. |
2113 | template <typename Opnd0, typename Opnd1, typename Opnd2, typename Opnd3> |
2114 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2, Opnd3>::Ty |
2115 | m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2, |
2116 | const Opnd3 &Op3) { |
2117 | return m_Intrinsic<Intrinsic::masked_load>(Op0, Op1, Op2, Op3); |
2118 | } |
2119 | |
2120 | template <Intrinsic::ID IntrID, typename T0> |
2121 | inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) { |
2122 | return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0)); |
2123 | } |
2124 | |
2125 | template <Intrinsic::ID IntrID, typename T0, typename T1> |
2126 | inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0, |
2127 | const T1 &Op1) { |
2128 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1)); |
2129 | } |
2130 | |
2131 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2> |
2132 | inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty |
2133 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) { |
2134 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2)); |
2135 | } |
2136 | |
2137 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, |
2138 | typename T3> |
2139 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty |
2140 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) { |
2141 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3)); |
2142 | } |
2143 | |
2144 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, |
2145 | typename T3, typename T4> |
2146 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty |
2147 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, |
2148 | const T4 &Op4) { |
2149 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3), |
2150 | m_Argument<4>(Op4)); |
2151 | } |
2152 | |
2153 | template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2, |
2154 | typename T3, typename T4, typename T5> |
2155 | inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty |
2156 | m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3, |
2157 | const T4 &Op4, const T5 &Op5) { |
2158 | return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4), |
2159 | m_Argument<5>(Op5)); |
2160 | } |
2161 | |
2162 | // Helper intrinsic matching specializations. |
2163 | template <typename Opnd0> |
2164 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) { |
2165 | return m_Intrinsic<Intrinsic::bitreverse>(Op0); |
2166 | } |
2167 | |
2168 | template <typename Opnd0> |
2169 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) { |
2170 | return m_Intrinsic<Intrinsic::bswap>(Op0); |
2171 | } |
2172 | |
2173 | template <typename Opnd0> |
2174 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) { |
2175 | return m_Intrinsic<Intrinsic::fabs>(Op0); |
2176 | } |
2177 | |
2178 | template <typename Opnd0> |
2179 | inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) { |
2180 | return m_Intrinsic<Intrinsic::canonicalize>(Op0); |
2181 | } |
2182 | |
2183 | template <typename Opnd0, typename Opnd1> |
2184 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0, |
2185 | const Opnd1 &Op1) { |
2186 | return m_Intrinsic<Intrinsic::minnum>(Op0, Op1); |
2187 | } |
2188 | |
2189 | template <typename Opnd0, typename Opnd1> |
2190 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, |
2191 | const Opnd1 &Op1) { |
2192 | return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); |
2193 | } |
2194 | |
2195 | template <typename Opnd0, typename Opnd1, typename Opnd2> |
2196 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty |
2197 | m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { |
2198 | return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2); |
2199 | } |
2200 | |
2201 | template <typename Opnd0, typename Opnd1, typename Opnd2> |
2202 | inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty |
2203 | m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { |
2204 | return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2); |
2205 | } |
2206 | |
2207 | //===----------------------------------------------------------------------===// |
2208 | // Matchers for two-operands operators with the operators in either order |
2209 | // |
2210 | |
2211 | /// Matches a BinaryOperator with LHS and RHS in either order. |
2212 | template <typename LHS, typename RHS> |
2213 | inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) { |
2214 | return AnyBinaryOp_match<LHS, RHS, true>(L, R); |
2215 | } |
2216 | |
2217 | /// Matches an ICmp with a predicate over LHS and RHS in either order. |
2218 | /// Swaps the predicate if operands are commuted. |
2219 | template <typename LHS, typename RHS> |
2220 | inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true> |
2221 | m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { |
2222 | return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L, |
2223 | R); |
2224 | } |
2225 | |
2226 | /// Matches a Add with LHS and RHS in either order. |
2227 | template <typename LHS, typename RHS> |
2228 | inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, |
2229 | const RHS &R) { |
2230 | return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R); |
2231 | } |
2232 | |
2233 | /// Matches a Mul with LHS and RHS in either order. |
2234 | template <typename LHS, typename RHS> |
2235 | inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L, |
2236 | const RHS &R) { |
2237 | return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R); |
2238 | } |
2239 | |
2240 | /// Matches an And with LHS and RHS in either order. |
2241 | template <typename LHS, typename RHS> |
2242 | inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L, |
2243 | const RHS &R) { |
2244 | return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R); |
2245 | } |
2246 | |
2247 | /// Matches an Or with LHS and RHS in either order. |
2248 | template <typename LHS, typename RHS> |
2249 | inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L, |
2250 | const RHS &R) { |
2251 | return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R); |
2252 | } |
2253 | |
2254 | /// Matches an Xor with LHS and RHS in either order. |
2255 | template <typename LHS, typename RHS> |
2256 | inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L, |
2257 | const RHS &R) { |
2258 | return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R); |
2259 | } |
2260 | |
2261 | /// Matches a 'Neg' as 'sub 0, V'. |
2262 | template <typename ValTy> |
2263 | inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub> |
2264 | m_Neg(const ValTy &V) { |
2265 | return m_Sub(m_ZeroInt(), V); |
2266 | } |
2267 | |
2268 | /// Matches a 'Neg' as 'sub nsw 0, V'. |
2269 | template <typename ValTy> |
2270 | inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, |
2271 | Instruction::Sub, |
2272 | OverflowingBinaryOperator::NoSignedWrap> |
2273 | m_NSWNeg(const ValTy &V) { |
2274 | return m_NSWSub(m_ZeroInt(), V); |
2275 | } |
2276 | |
2277 | /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. |
2278 | template <typename ValTy> |
2279 | inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> |
2280 | m_Not(const ValTy &V) { |
2281 | return m_c_Xor(V, m_AllOnes()); |
2282 | } |
2283 | |
2284 | /// Matches an SMin with LHS and RHS in either order. |
2285 | template <typename LHS, typename RHS> |
2286 | inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> |
2287 | m_c_SMin(const LHS &L, const RHS &R) { |
2288 | return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R); |
2289 | } |
2290 | /// Matches an SMax with LHS and RHS in either order. |
2291 | template <typename LHS, typename RHS> |
2292 | inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true> |
2293 | m_c_SMax(const LHS &L, const RHS &R) { |
2294 | return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R); |
2295 | } |
2296 | /// Matches a UMin with LHS and RHS in either order. |
2297 | template <typename LHS, typename RHS> |
2298 | inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true> |
2299 | m_c_UMin(const LHS &L, const RHS &R) { |
2300 | return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R); |
2301 | } |
2302 | /// Matches a UMax with LHS and RHS in either order. |
2303 | template <typename LHS, typename RHS> |
2304 | inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true> |
2305 | m_c_UMax(const LHS &L, const RHS &R) { |
2306 | return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); |
2307 | } |
2308 | |
2309 | template <typename LHS, typename RHS> |
2310 | inline match_combine_or< |
2311 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>, |
2312 | MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>, |
2313 | match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>, |
2314 | MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>> |
2315 | m_c_MaxOrMin(const LHS &L, const RHS &R) { |
2316 | return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)), |
2317 | m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R))); |
2318 | } |
2319 | |
2320 | /// Matches FAdd with LHS and RHS in either order. |
2321 | template <typename LHS, typename RHS> |
2322 | inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> |
2323 | m_c_FAdd(const LHS &L, const RHS &R) { |
2324 | return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R); |
2325 | } |
2326 | |
2327 | /// Matches FMul with LHS and RHS in either order. |
2328 | template <typename LHS, typename RHS> |
2329 | inline BinaryOp_match<LHS, RHS, Instruction::FMul, true> |
2330 | m_c_FMul(const LHS &L, const RHS &R) { |
2331 | return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R); |
2332 | } |
2333 | |
2334 | template <typename Opnd_t> struct Signum_match { |
2335 | Opnd_t Val; |
2336 | Signum_match(const Opnd_t &V) : Val(V) {} |
2337 | |
2338 | template <typename OpTy> bool match(OpTy *V) { |
2339 | unsigned TypeSize = V->getType()->getScalarSizeInBits(); |
2340 | if (TypeSize == 0) |
2341 | return false; |
2342 | |
2343 | unsigned ShiftWidth = TypeSize - 1; |
2344 | Value *OpL = nullptr, *OpR = nullptr; |
2345 | |
2346 | // This is the representation of signum we match: |
2347 | // |
2348 | // signum(x) == (x >> 63) | (-x >>u 63) |
2349 | // |
2350 | // An i1 value is its own signum, so it's correct to match |
2351 | // |
2352 | // signum(x) == (x >> 0) | (-x >>u 0) |
2353 | // |
2354 | // for i1 values. |
2355 | |
2356 | auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth)); |
2357 | auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth)); |
2358 | auto Signum = m_Or(LHS, RHS); |
2359 | |
2360 | return Signum.match(V) && OpL == OpR && Val.match(OpL); |
2361 | } |
2362 | }; |
2363 | |
2364 | /// Matches a signum pattern. |
2365 | /// |
2366 | /// signum(x) = |
2367 | /// x > 0 -> 1 |
2368 | /// x == 0 -> 0 |
2369 | /// x < 0 -> -1 |
2370 | template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) { |
2371 | return Signum_match<Val_t>(V); |
2372 | } |
2373 | |
2374 | template <int Ind, typename Opnd_t> struct ExtractValue_match { |
2375 | Opnd_t Val; |
2376 | ExtractValue_match(const Opnd_t &V) : Val(V) {} |
2377 | |
2378 | template <typename OpTy> bool match(OpTy *V) { |
2379 | if (auto *I = dyn_cast<ExtractValueInst>(V)) { |
2380 | // If Ind is -1, don't inspect indices |
2381 | if (Ind != -1 && |
2382 | !(I->getNumIndices() == 1 && I->getIndices()[0] == (unsigned)Ind)) |
2383 | return false; |
2384 | return Val.match(I->getAggregateOperand()); |
2385 | } |
2386 | return false; |
2387 | } |
2388 | }; |
2389 | |
2390 | /// Match a single index ExtractValue instruction. |
2391 | /// For example m_ExtractValue<1>(...) |
2392 | template <int Ind, typename Val_t> |
2393 | inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { |
2394 | return ExtractValue_match<Ind, Val_t>(V); |
2395 | } |
2396 | |
2397 | /// Match an ExtractValue instruction with any index. |
2398 | /// For example m_ExtractValue(...) |
2399 | template <typename Val_t> |
2400 | inline ExtractValue_match<-1, Val_t> m_ExtractValue(const Val_t &V) { |
2401 | return ExtractValue_match<-1, Val_t>(V); |
2402 | } |
2403 | |
2404 | /// Matcher for a single index InsertValue instruction. |
2405 | template <int Ind, typename T0, typename T1> struct InsertValue_match { |
2406 | T0 Op0; |
2407 | T1 Op1; |
2408 | |
2409 | InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {} |
2410 | |
2411 | template <typename OpTy> bool match(OpTy *V) { |
2412 | if (auto *I = dyn_cast<InsertValueInst>(V)) { |
2413 | return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) && |
2414 | I->getNumIndices() == 1 && Ind == I->getIndices()[0]; |
2415 | } |
2416 | return false; |
2417 | } |
2418 | }; |
2419 | |
2420 | /// Matches a single index InsertValue instruction. |
2421 | template <int Ind, typename Val_t, typename Elt_t> |
2422 | inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val, |
2423 | const Elt_t &Elt) { |
2424 | return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt); |
2425 | } |
2426 | |
2427 | /// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or |
2428 | /// the constant expression |
2429 | /// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>` |
2430 | /// under the right conditions determined by DataLayout. |
2431 | struct VScaleVal_match { |
2432 | const DataLayout &DL; |
2433 | VScaleVal_match(const DataLayout &DL) : DL(DL) {} |
2434 | |
2435 | template <typename ITy> bool match(ITy *V) { |
2436 | if (m_Intrinsic<Intrinsic::vscale>().match(V)) |
2437 | return true; |
2438 | |
2439 | Value *Ptr; |
2440 | if (m_PtrToInt(m_Value(Ptr)).match(V)) { |
2441 | if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { |
2442 | auto *DerefTy = GEP->getSourceElementType(); |
2443 | if (GEP->getNumIndices() == 1 && isa<ScalableVectorType>(DerefTy) && |
2444 | m_Zero().match(GEP->getPointerOperand()) && |
2445 | m_SpecificInt(1).match(GEP->idx_begin()->get()) && |
2446 | DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) |
2447 | return true; |
2448 | } |
2449 | } |
2450 | |
2451 | return false; |
2452 | } |
2453 | }; |
2454 | |
2455 | inline VScaleVal_match m_VScale(const DataLayout &DL) { |
2456 | return VScaleVal_match(DL); |
2457 | } |
2458 | |
2459 | template <typename LHS, typename RHS, unsigned Opcode> |
2460 | struct LogicalOp_match { |
2461 | LHS L; |
2462 | RHS R; |
2463 | |
2464 | LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {} |
2465 | |
2466 | template <typename T> bool match(T *V) { |
2467 | if (auto *I = dyn_cast<Instruction>(V)) { |
2468 | if (!I->getType()->isIntOrIntVectorTy(1)) |
2469 | return false; |
2470 | |
2471 | if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) && |
2472 | R.match(I->getOperand(1))) |
2473 | return true; |
2474 | |
2475 | if (auto *SI = dyn_cast<SelectInst>(I)) { |
2476 | if (Opcode == Instruction::And) { |
2477 | if (const auto *C = dyn_cast<Constant>(SI->getFalseValue())) |
2478 | if (C->isNullValue() && L.match(SI->getCondition()) && |
2479 | R.match(SI->getTrueValue())) |
2480 | return true; |
2481 | } else { |
2482 | assert(Opcode == Instruction::Or)((void)0); |
2483 | if (const auto *C = dyn_cast<Constant>(SI->getTrueValue())) |
2484 | if (C->isOneValue() && L.match(SI->getCondition()) && |
2485 | R.match(SI->getFalseValue())) |
2486 | return true; |
2487 | } |
2488 | } |
2489 | } |
2490 | |
2491 | return false; |
2492 | } |
2493 | }; |
2494 | |
2495 | /// Matches L && R either in the form of L & R or L ? R : false. |
2496 | /// Note that the latter form is poison-blocking. |
2497 | template <typename LHS, typename RHS> |
2498 | inline LogicalOp_match<LHS, RHS, Instruction::And> |
2499 | m_LogicalAnd(const LHS &L, const RHS &R) { |
2500 | return LogicalOp_match<LHS, RHS, Instruction::And>(L, R); |
2501 | } |
2502 | |
2503 | /// Matches L && R where L and R are arbitrary values. |
2504 | inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); } |
2505 | |
2506 | /// Matches L || R either in the form of L | R or L ? true : R. |
2507 | /// Note that the latter form is poison-blocking. |
2508 | template <typename LHS, typename RHS> |
2509 | inline LogicalOp_match<LHS, RHS, Instruction::Or> |
2510 | m_LogicalOr(const LHS &L, const RHS &R) { |
2511 | return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R); |
2512 | } |
2513 | |
2514 | /// Matches L || R where L and R are arbitrary values. |
2515 | inline auto m_LogicalOr() { |
2516 | return m_LogicalOr(m_Value(), m_Value()); |
2517 | } |
2518 | |
2519 | } // end namespace PatternMatch |
2520 | } // end namespace llvm |
2521 | |
2522 | #endif // LLVM_IR_PATTERNMATCH_H |