File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PartialReduction.cpp |
Warning: | line 289, column 26 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86PartialReduction.cpp -------------------------------------------===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // This pass looks for add instructions used by a horizontal reduction to see | ||||||||
10 | // if we might be able to use pmaddwd or psadbw. Some cases of this require | ||||||||
11 | // cross basic block knowledge and can't be done in SelectionDAG. | ||||||||
12 | // | ||||||||
13 | //===----------------------------------------------------------------------===// | ||||||||
14 | |||||||||
15 | #include "X86.h" | ||||||||
16 | #include "llvm/Analysis/ValueTracking.h" | ||||||||
17 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||||||
18 | #include "llvm/IR/Constants.h" | ||||||||
19 | #include "llvm/IR/Instructions.h" | ||||||||
20 | #include "llvm/IR/IntrinsicsX86.h" | ||||||||
21 | #include "llvm/IR/IRBuilder.h" | ||||||||
22 | #include "llvm/IR/Operator.h" | ||||||||
23 | #include "llvm/Pass.h" | ||||||||
24 | #include "X86TargetMachine.h" | ||||||||
25 | |||||||||
26 | using namespace llvm; | ||||||||
27 | |||||||||
28 | #define DEBUG_TYPE"x86-partial-reduction" "x86-partial-reduction" | ||||||||
29 | |||||||||
30 | namespace { | ||||||||
31 | |||||||||
32 | class X86PartialReduction : public FunctionPass { | ||||||||
33 | const DataLayout *DL; | ||||||||
34 | const X86Subtarget *ST; | ||||||||
35 | |||||||||
36 | public: | ||||||||
37 | static char ID; // Pass identification, replacement for typeid. | ||||||||
38 | |||||||||
39 | X86PartialReduction() : FunctionPass(ID) { } | ||||||||
40 | |||||||||
41 | bool runOnFunction(Function &Fn) override; | ||||||||
42 | |||||||||
43 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||||||
44 | AU.setPreservesCFG(); | ||||||||
45 | } | ||||||||
46 | |||||||||
47 | StringRef getPassName() const override { | ||||||||
48 | return "X86 Partial Reduction"; | ||||||||
49 | } | ||||||||
50 | |||||||||
51 | private: | ||||||||
52 | bool tryMAddReplacement(Instruction *Op); | ||||||||
53 | bool trySADReplacement(Instruction *Op); | ||||||||
54 | }; | ||||||||
55 | } | ||||||||
56 | |||||||||
57 | FunctionPass *llvm::createX86PartialReductionPass() { | ||||||||
58 | return new X86PartialReduction(); | ||||||||
59 | } | ||||||||
60 | |||||||||
61 | char X86PartialReduction::ID = 0; | ||||||||
62 | |||||||||
63 | INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,static void *initializeX86PartialReductionPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "X86 Partial Reduction" , "x86-partial-reduction", &X86PartialReduction::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86PartialReduction>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86PartialReductionPassFlag; void llvm ::initializeX86PartialReductionPass(PassRegistry &Registry ) { llvm::call_once(InitializeX86PartialReductionPassFlag, initializeX86PartialReductionPassOnce , std::ref(Registry)); } | ||||||||
64 | "X86 Partial Reduction", false, false)static void *initializeX86PartialReductionPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "X86 Partial Reduction" , "x86-partial-reduction", &X86PartialReduction::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86PartialReduction>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86PartialReductionPassFlag; void llvm ::initializeX86PartialReductionPass(PassRegistry &Registry ) { llvm::call_once(InitializeX86PartialReductionPassFlag, initializeX86PartialReductionPassOnce , std::ref(Registry)); } | ||||||||
65 | |||||||||
66 | bool X86PartialReduction::tryMAddReplacement(Instruction *Op) { | ||||||||
67 | if (!ST->hasSSE2()) | ||||||||
68 | return false; | ||||||||
69 | |||||||||
70 | // Need at least 8 elements. | ||||||||
71 | if (cast<FixedVectorType>(Op->getType())->getNumElements() < 8) | ||||||||
72 | return false; | ||||||||
73 | |||||||||
74 | // Element type should be i32. | ||||||||
75 | if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32)) | ||||||||
76 | return false; | ||||||||
77 | |||||||||
78 | auto *Mul = dyn_cast<BinaryOperator>(Op); | ||||||||
79 | if (!Mul || Mul->getOpcode() != Instruction::Mul) | ||||||||
80 | return false; | ||||||||
81 | |||||||||
82 | Value *LHS = Mul->getOperand(0); | ||||||||
83 | Value *RHS = Mul->getOperand(1); | ||||||||
84 | |||||||||
85 | // LHS and RHS should be only used once or if they are the same then only | ||||||||
86 | // used twice. Only check this when SSE4.1 is enabled and we have zext/sext | ||||||||
87 | // instructions, otherwise we use punpck to emulate zero extend in stages. The | ||||||||
88 | // trunc/ we need to do likely won't introduce new instructions in that case. | ||||||||
89 | if (ST->hasSSE41()) { | ||||||||
90 | if (LHS == RHS) { | ||||||||
91 | if (!isa<Constant>(LHS) && !LHS->hasNUses(2)) | ||||||||
92 | return false; | ||||||||
93 | } else { | ||||||||
94 | if (!isa<Constant>(LHS) && !LHS->hasOneUse()) | ||||||||
95 | return false; | ||||||||
96 | if (!isa<Constant>(RHS) && !RHS->hasOneUse()) | ||||||||
97 | return false; | ||||||||
98 | } | ||||||||
99 | } | ||||||||
100 | |||||||||
101 | auto CanShrinkOp = [&](Value *Op) { | ||||||||
102 | auto IsFreeTruncation = [&](Value *Op) { | ||||||||
103 | if (auto *Cast = dyn_cast<CastInst>(Op)) { | ||||||||
104 | if (Cast->getParent() == Mul->getParent() && | ||||||||
105 | (Cast->getOpcode() == Instruction::SExt || | ||||||||
106 | Cast->getOpcode() == Instruction::ZExt) && | ||||||||
107 | Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 16) | ||||||||
108 | return true; | ||||||||
109 | } | ||||||||
110 | |||||||||
111 | return isa<Constant>(Op); | ||||||||
112 | }; | ||||||||
113 | |||||||||
114 | // If the operation can be freely truncated and has enough sign bits we | ||||||||
115 | // can shrink. | ||||||||
116 | if (IsFreeTruncation(Op) && | ||||||||
117 | ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16) | ||||||||
118 | return true; | ||||||||
119 | |||||||||
120 | // SelectionDAG has limited support for truncating through an add or sub if | ||||||||
121 | // the inputs are freely truncatable. | ||||||||
122 | if (auto *BO = dyn_cast<BinaryOperator>(Op)) { | ||||||||
123 | if (BO->getParent() == Mul->getParent() && | ||||||||
124 | IsFreeTruncation(BO->getOperand(0)) && | ||||||||
125 | IsFreeTruncation(BO->getOperand(1)) && | ||||||||
126 | ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16) | ||||||||
127 | return true; | ||||||||
128 | } | ||||||||
129 | |||||||||
130 | return false; | ||||||||
131 | }; | ||||||||
132 | |||||||||
133 | // Both Ops need to be shrinkable. | ||||||||
134 | if (!CanShrinkOp(LHS) && !CanShrinkOp(RHS)) | ||||||||
135 | return false; | ||||||||
136 | |||||||||
137 | IRBuilder<> Builder(Mul); | ||||||||
138 | |||||||||
139 | auto *MulTy = cast<FixedVectorType>(Op->getType()); | ||||||||
140 | unsigned NumElts = MulTy->getNumElements(); | ||||||||
141 | |||||||||
142 | // Extract even elements and odd elements and add them together. This will | ||||||||
143 | // be pattern matched by SelectionDAG to pmaddwd. This instruction will be | ||||||||
144 | // half the original width. | ||||||||
145 | SmallVector<int, 16> EvenMask(NumElts / 2); | ||||||||
146 | SmallVector<int, 16> OddMask(NumElts / 2); | ||||||||
147 | for (int i = 0, e = NumElts / 2; i != e; ++i) { | ||||||||
148 | EvenMask[i] = i * 2; | ||||||||
149 | OddMask[i] = i * 2 + 1; | ||||||||
150 | } | ||||||||
151 | // Creating a new mul so the replaceAllUsesWith below doesn't replace the | ||||||||
152 | // uses in the shuffles we're creating. | ||||||||
153 | Value *NewMul = Builder.CreateMul(Mul->getOperand(0), Mul->getOperand(1)); | ||||||||
154 | Value *EvenElts = Builder.CreateShuffleVector(NewMul, NewMul, EvenMask); | ||||||||
155 | Value *OddElts = Builder.CreateShuffleVector(NewMul, NewMul, OddMask); | ||||||||
156 | Value *MAdd = Builder.CreateAdd(EvenElts, OddElts); | ||||||||
157 | |||||||||
158 | // Concatenate zeroes to extend back to the original type. | ||||||||
159 | SmallVector<int, 32> ConcatMask(NumElts); | ||||||||
160 | std::iota(ConcatMask.begin(), ConcatMask.end(), 0); | ||||||||
161 | Value *Zero = Constant::getNullValue(MAdd->getType()); | ||||||||
162 | Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask); | ||||||||
163 | |||||||||
164 | Mul->replaceAllUsesWith(Concat); | ||||||||
165 | Mul->eraseFromParent(); | ||||||||
166 | |||||||||
167 | return true; | ||||||||
168 | } | ||||||||
169 | |||||||||
170 | bool X86PartialReduction::trySADReplacement(Instruction *Op) { | ||||||||
171 | if (!ST->hasSSE2()) | ||||||||
172 | return false; | ||||||||
173 | |||||||||
174 | // TODO: There's nothing special about i32, any integer type above i16 should | ||||||||
175 | // work just as well. | ||||||||
176 | if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32)) | ||||||||
177 | return false; | ||||||||
178 | |||||||||
179 | // Operand should be a select. | ||||||||
180 | auto *SI = dyn_cast<SelectInst>(Op); | ||||||||
181 | if (!SI
| ||||||||
182 | return false; | ||||||||
183 | |||||||||
184 | // Select needs to implement absolute value. | ||||||||
185 | Value *LHS, *RHS; | ||||||||
186 | auto SPR = matchSelectPattern(SI, LHS, RHS); | ||||||||
187 | if (SPR.Flavor != SPF_ABS) | ||||||||
188 | return false; | ||||||||
189 | |||||||||
190 | // Need a subtract of two values. | ||||||||
191 | auto *Sub = dyn_cast<BinaryOperator>(LHS); | ||||||||
192 | if (!Sub
| ||||||||
193 | return false; | ||||||||
194 | |||||||||
195 | // Look for zero extend from i8. | ||||||||
196 | auto getZeroExtendedVal = [](Value *Op) -> Value * { | ||||||||
197 | if (auto *ZExt = dyn_cast<ZExtInst>(Op)) | ||||||||
198 | if (cast<VectorType>(ZExt->getOperand(0)->getType()) | ||||||||
199 | ->getElementType() | ||||||||
200 | ->isIntegerTy(8)) | ||||||||
201 | return ZExt->getOperand(0); | ||||||||
202 | |||||||||
203 | return nullptr; | ||||||||
204 | }; | ||||||||
205 | |||||||||
206 | // Both operands of the subtract should be extends from vXi8. | ||||||||
207 | Value *Op0 = getZeroExtendedVal(Sub->getOperand(0)); | ||||||||
208 | Value *Op1 = getZeroExtendedVal(Sub->getOperand(1)); | ||||||||
209 | if (!Op0
| ||||||||
210 | return false; | ||||||||
211 | |||||||||
212 | IRBuilder<> Builder(SI); | ||||||||
213 | |||||||||
214 | auto *OpTy = cast<FixedVectorType>(Op->getType()); | ||||||||
215 | unsigned NumElts = OpTy->getNumElements(); | ||||||||
216 | |||||||||
217 | unsigned IntrinsicNumElts; | ||||||||
218 | Intrinsic::ID IID; | ||||||||
219 | if (ST->hasBWI() && NumElts >= 64) { | ||||||||
220 | IID = Intrinsic::x86_avx512_psad_bw_512; | ||||||||
221 | IntrinsicNumElts = 64; | ||||||||
222 | } else if (ST->hasAVX2() && NumElts >= 32) { | ||||||||
223 | IID = Intrinsic::x86_avx2_psad_bw; | ||||||||
224 | IntrinsicNumElts = 32; | ||||||||
225 | } else { | ||||||||
226 | IID = Intrinsic::x86_sse2_psad_bw; | ||||||||
227 | IntrinsicNumElts = 16; | ||||||||
228 | } | ||||||||
229 | |||||||||
230 | Function *PSADBWFn = Intrinsic::getDeclaration(SI->getModule(), IID); | ||||||||
231 | |||||||||
232 | if (NumElts < 16) { | ||||||||
233 | // Pad input with zeroes. | ||||||||
234 | SmallVector<int, 32> ConcatMask(16); | ||||||||
235 | for (unsigned i = 0; i != NumElts; ++i) | ||||||||
236 | ConcatMask[i] = i; | ||||||||
237 | for (unsigned i = NumElts; i != 16; ++i) | ||||||||
238 | ConcatMask[i] = (i % NumElts) + NumElts; | ||||||||
239 | |||||||||
240 | Value *Zero = Constant::getNullValue(Op0->getType()); | ||||||||
241 | Op0 = Builder.CreateShuffleVector(Op0, Zero, ConcatMask); | ||||||||
242 | Op1 = Builder.CreateShuffleVector(Op1, Zero, ConcatMask); | ||||||||
243 | NumElts = 16; | ||||||||
244 | } | ||||||||
245 | |||||||||
246 | // Intrinsics produce vXi64 and need to be casted to vXi32. | ||||||||
247 | auto *I32Ty = | ||||||||
248 | FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4); | ||||||||
249 | |||||||||
250 | assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!")((void)0); | ||||||||
251 | unsigned NumSplits = NumElts / IntrinsicNumElts; | ||||||||
252 | |||||||||
253 | // First collect the pieces we need. | ||||||||
254 | SmallVector<Value *, 4> Ops(NumSplits); | ||||||||
255 | for (unsigned i = 0; i != NumSplits; ++i) { | ||||||||
256 | SmallVector<int, 64> ExtractMask(IntrinsicNumElts); | ||||||||
257 | std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts); | ||||||||
258 | Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask); | ||||||||
259 | Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask); | ||||||||
260 | Ops[i] = Builder.CreateCall(PSADBWFn, {ExtractOp0, ExtractOp1}); | ||||||||
261 | Ops[i] = Builder.CreateBitCast(Ops[i], I32Ty); | ||||||||
262 | } | ||||||||
263 | |||||||||
264 | assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits")((void)0); | ||||||||
265 | unsigned Stages = Log2_32(NumSplits); | ||||||||
266 | for (unsigned s = Stages; s > 0; --s) { | ||||||||
267 | unsigned NumConcatElts = | ||||||||
268 | cast<FixedVectorType>(Ops[0]->getType())->getNumElements() * 2; | ||||||||
269 | for (unsigned i = 0; i != 1U << (s - 1); ++i) { | ||||||||
270 | SmallVector<int, 64> ConcatMask(NumConcatElts); | ||||||||
271 | std::iota(ConcatMask.begin(), ConcatMask.end(), 0); | ||||||||
272 | Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask); | ||||||||
273 | } | ||||||||
274 | } | ||||||||
275 | |||||||||
276 | // At this point the final value should be in Ops[0]. Now we need to adjust | ||||||||
277 | // it to the final original type. | ||||||||
278 | NumElts = cast<FixedVectorType>(OpTy)->getNumElements(); | ||||||||
279 | if (NumElts
| ||||||||
280 | // Extract down to 2 elements. | ||||||||
281 | Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1}); | ||||||||
282 | } else if (NumElts
| ||||||||
283 | SmallVector<int, 32> ConcatMask(NumElts); | ||||||||
284 | unsigned SubElts = | ||||||||
285 | cast<FixedVectorType>(Ops[0]->getType())->getNumElements(); | ||||||||
286 | for (unsigned i = 0; i != SubElts; ++i) | ||||||||
287 | ConcatMask[i] = i; | ||||||||
288 | for (unsigned i = SubElts; i
| ||||||||
289 | ConcatMask[i] = (i % SubElts) + SubElts; | ||||||||
| |||||||||
290 | |||||||||
291 | Value *Zero = Constant::getNullValue(Ops[0]->getType()); | ||||||||
292 | Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask); | ||||||||
293 | } | ||||||||
294 | |||||||||
295 | SI->replaceAllUsesWith(Ops[0]); | ||||||||
296 | SI->eraseFromParent(); | ||||||||
297 | |||||||||
298 | return true; | ||||||||
299 | } | ||||||||
300 | |||||||||
301 | // Walk backwards from the ExtractElementInst and determine if it is the end of | ||||||||
302 | // a horizontal reduction. Return the input to the reduction if we find one. | ||||||||
303 | static Value *matchAddReduction(const ExtractElementInst &EE) { | ||||||||
304 | // Make sure we're extracting index 0. | ||||||||
305 | auto *Index = dyn_cast<ConstantInt>(EE.getIndexOperand()); | ||||||||
306 | if (!Index || !Index->isNullValue()) | ||||||||
307 | return nullptr; | ||||||||
308 | |||||||||
309 | const auto *BO = dyn_cast<BinaryOperator>(EE.getVectorOperand()); | ||||||||
310 | if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse()) | ||||||||
311 | return nullptr; | ||||||||
312 | |||||||||
313 | unsigned NumElems = cast<FixedVectorType>(BO->getType())->getNumElements(); | ||||||||
314 | // Ensure the reduction size is a power of 2. | ||||||||
315 | if (!isPowerOf2_32(NumElems)) | ||||||||
316 | return nullptr; | ||||||||
317 | |||||||||
318 | const Value *Op = BO; | ||||||||
319 | unsigned Stages = Log2_32(NumElems); | ||||||||
320 | for (unsigned i = 0; i != Stages; ++i) { | ||||||||
321 | const auto *BO = dyn_cast<BinaryOperator>(Op); | ||||||||
322 | if (!BO || BO->getOpcode() != Instruction::Add) | ||||||||
323 | return nullptr; | ||||||||
324 | |||||||||
325 | // If this isn't the first add, then it should only have 2 users, the | ||||||||
326 | // shuffle and another add which we checked in the previous iteration. | ||||||||
327 | if (i != 0 && !BO->hasNUses(2)) | ||||||||
328 | return nullptr; | ||||||||
329 | |||||||||
330 | Value *LHS = BO->getOperand(0); | ||||||||
331 | Value *RHS = BO->getOperand(1); | ||||||||
332 | |||||||||
333 | auto *Shuffle = dyn_cast<ShuffleVectorInst>(LHS); | ||||||||
334 | if (Shuffle) { | ||||||||
335 | Op = RHS; | ||||||||
336 | } else { | ||||||||
337 | Shuffle = dyn_cast<ShuffleVectorInst>(RHS); | ||||||||
338 | Op = LHS; | ||||||||
339 | } | ||||||||
340 | |||||||||
341 | // The first operand of the shuffle should be the same as the other operand | ||||||||
342 | // of the bin op. | ||||||||
343 | if (!Shuffle || Shuffle->getOperand(0) != Op) | ||||||||
344 | return nullptr; | ||||||||
345 | |||||||||
346 | // Verify the shuffle has the expected (at this stage of the pyramid) mask. | ||||||||
347 | unsigned MaskEnd = 1 << i; | ||||||||
348 | for (unsigned Index = 0; Index < MaskEnd; ++Index) | ||||||||
349 | if (Shuffle->getMaskValue(Index) != (int)(MaskEnd + Index)) | ||||||||
350 | return nullptr; | ||||||||
351 | } | ||||||||
352 | |||||||||
353 | return const_cast<Value *>(Op); | ||||||||
354 | } | ||||||||
355 | |||||||||
356 | // See if this BO is reachable from this Phi by walking forward through single | ||||||||
357 | // use BinaryOperators with the same opcode. If we get back then we know we've | ||||||||
358 | // found a loop and it is safe to step through this Add to find more leaves. | ||||||||
359 | static bool isReachableFromPHI(PHINode *Phi, BinaryOperator *BO) { | ||||||||
360 | // The PHI itself should only have one use. | ||||||||
361 | if (!Phi->hasOneUse()) | ||||||||
362 | return false; | ||||||||
363 | |||||||||
364 | Instruction *U = cast<Instruction>(*Phi->user_begin()); | ||||||||
365 | if (U == BO) | ||||||||
366 | return true; | ||||||||
367 | |||||||||
368 | while (U->hasOneUse() && U->getOpcode() == BO->getOpcode()) | ||||||||
369 | U = cast<Instruction>(*U->user_begin()); | ||||||||
370 | |||||||||
371 | return U == BO; | ||||||||
372 | } | ||||||||
373 | |||||||||
374 | // Collect all the leaves of the tree of adds that feeds into the horizontal | ||||||||
375 | // reduction. Root is the Value that is used by the horizontal reduction. | ||||||||
376 | // We look through single use phis, single use adds, or adds that are used by | ||||||||
377 | // a phi that forms a loop with the add. | ||||||||
378 | static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) { | ||||||||
379 | SmallPtrSet<Value *, 8> Visited; | ||||||||
380 | SmallVector<Value *, 8> Worklist; | ||||||||
381 | Worklist.push_back(Root); | ||||||||
382 | |||||||||
383 | while (!Worklist.empty()) { | ||||||||
384 | Value *V = Worklist.pop_back_val(); | ||||||||
385 | if (!Visited.insert(V).second) | ||||||||
386 | continue; | ||||||||
387 | |||||||||
388 | if (auto *PN = dyn_cast<PHINode>(V)) { | ||||||||
389 | // PHI node should have single use unless it is the root node, then it | ||||||||
390 | // has 2 uses. | ||||||||
391 | if (!PN->hasNUses(PN == Root ? 2 : 1)) | ||||||||
392 | break; | ||||||||
393 | |||||||||
394 | // Push incoming values to the worklist. | ||||||||
395 | append_range(Worklist, PN->incoming_values()); | ||||||||
396 | |||||||||
397 | continue; | ||||||||
398 | } | ||||||||
399 | |||||||||
400 | if (auto *BO = dyn_cast<BinaryOperator>(V)) { | ||||||||
401 | if (BO->getOpcode() == Instruction::Add) { | ||||||||
402 | // Simple case. Single use, just push its operands to the worklist. | ||||||||
403 | if (BO->hasNUses(BO == Root ? 2 : 1)) { | ||||||||
404 | append_range(Worklist, BO->operands()); | ||||||||
405 | continue; | ||||||||
406 | } | ||||||||
407 | |||||||||
408 | // If there is additional use, make sure it is an unvisited phi that | ||||||||
409 | // gets us back to this node. | ||||||||
410 | if (BO->hasNUses(BO == Root ? 3 : 2)) { | ||||||||
411 | PHINode *PN = nullptr; | ||||||||
412 | for (auto *U : Root->users()) | ||||||||
413 | if (auto *P = dyn_cast<PHINode>(U)) | ||||||||
414 | if (!Visited.count(P)) | ||||||||
415 | PN = P; | ||||||||
416 | |||||||||
417 | // If we didn't find a 2-input PHI then this isn't a case we can | ||||||||
418 | // handle. | ||||||||
419 | if (!PN || PN->getNumIncomingValues() != 2) | ||||||||
420 | continue; | ||||||||
421 | |||||||||
422 | // Walk forward from this phi to see if it reaches back to this add. | ||||||||
423 | if (!isReachableFromPHI(PN, BO)) | ||||||||
424 | continue; | ||||||||
425 | |||||||||
426 | // The phi forms a loop with this Add, push its operands. | ||||||||
427 | append_range(Worklist, BO->operands()); | ||||||||
428 | } | ||||||||
429 | } | ||||||||
430 | } | ||||||||
431 | |||||||||
432 | // Not an add or phi, make it a leaf. | ||||||||
433 | if (auto *I = dyn_cast<Instruction>(V)) { | ||||||||
434 | if (!V->hasNUses(I == Root ? 2 : 1)) | ||||||||
435 | continue; | ||||||||
436 | |||||||||
437 | // Add this as a leaf. | ||||||||
438 | Leaves.push_back(I); | ||||||||
439 | } | ||||||||
440 | } | ||||||||
441 | } | ||||||||
442 | |||||||||
443 | bool X86PartialReduction::runOnFunction(Function &F) { | ||||||||
444 | if (skipFunction(F)) | ||||||||
| |||||||||
445 | return false; | ||||||||
446 | |||||||||
447 | auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); | ||||||||
448 | if (!TPC) | ||||||||
449 | return false; | ||||||||
450 | |||||||||
451 | auto &TM = TPC->getTM<X86TargetMachine>(); | ||||||||
452 | ST = TM.getSubtargetImpl(F); | ||||||||
453 | |||||||||
454 | DL = &F.getParent()->getDataLayout(); | ||||||||
455 | |||||||||
456 | bool MadeChange = false; | ||||||||
457 | for (auto &BB : F) { | ||||||||
458 | for (auto &I : BB) { | ||||||||
459 | auto *EE = dyn_cast<ExtractElementInst>(&I); | ||||||||
460 | if (!EE
| ||||||||
461 | continue; | ||||||||
462 | |||||||||
463 | // First find a reduction tree. | ||||||||
464 | // FIXME: Do we need to handle other opcodes than Add? | ||||||||
465 | Value *Root = matchAddReduction(*EE); | ||||||||
466 | if (!Root
| ||||||||
467 | continue; | ||||||||
468 | |||||||||
469 | SmallVector<Instruction *, 8> Leaves; | ||||||||
470 | collectLeaves(Root, Leaves); | ||||||||
471 | |||||||||
472 | for (Instruction *I : Leaves) { | ||||||||
473 | if (tryMAddReplacement(I)) { | ||||||||
474 | MadeChange = true; | ||||||||
475 | continue; | ||||||||
476 | } | ||||||||
477 | |||||||||
478 | // Don't do SAD matching on the root node. SelectionDAG already | ||||||||
479 | // has support for that and currently generates better code. | ||||||||
480 | if (I != Root && trySADReplacement(I)) | ||||||||
481 | MadeChange = true; | ||||||||
482 | } | ||||||||
483 | } | ||||||||
484 | } | ||||||||
485 | |||||||||
486 | return MadeChange; | ||||||||
487 | } |
1 | //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file declares the X86 specific subclass of TargetSubtargetInfo. | ||||
10 | // | ||||
11 | //===----------------------------------------------------------------------===// | ||||
12 | |||||
13 | #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H | ||||
14 | #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H | ||||
15 | |||||
16 | #include "X86FrameLowering.h" | ||||
17 | #include "X86ISelLowering.h" | ||||
18 | #include "X86InstrInfo.h" | ||||
19 | #include "X86SelectionDAGInfo.h" | ||||
20 | #include "llvm/ADT/Triple.h" | ||||
21 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||
22 | #include "llvm/IR/CallingConv.h" | ||||
23 | #include <climits> | ||||
24 | #include <memory> | ||||
25 | |||||
26 | #define GET_SUBTARGETINFO_HEADER | ||||
27 | #include "X86GenSubtargetInfo.inc" | ||||
28 | |||||
29 | namespace llvm { | ||||
30 | |||||
31 | class CallLowering; | ||||
32 | class GlobalValue; | ||||
33 | class InstructionSelector; | ||||
34 | class LegalizerInfo; | ||||
35 | class RegisterBankInfo; | ||||
36 | class StringRef; | ||||
37 | class TargetMachine; | ||||
38 | |||||
39 | /// The X86 backend supports a number of different styles of PIC. | ||||
40 | /// | ||||
41 | namespace PICStyles { | ||||
42 | |||||
43 | enum class Style { | ||||
44 | StubPIC, // Used on i386-darwin in pic mode. | ||||
45 | GOT, // Used on 32 bit elf on when in pic mode. | ||||
46 | RIPRel, // Used on X86-64 when in pic mode. | ||||
47 | None // Set when not in pic mode. | ||||
48 | }; | ||||
49 | |||||
50 | } // end namespace PICStyles | ||||
51 | |||||
52 | class X86Subtarget final : public X86GenSubtargetInfo { | ||||
53 | // NOTE: Do not add anything new to this list. Coarse, CPU name based flags | ||||
54 | // are not a good idea. We should be migrating away from these. | ||||
55 | enum X86ProcFamilyEnum { | ||||
56 | Others, | ||||
57 | IntelAtom, | ||||
58 | IntelSLM | ||||
59 | }; | ||||
60 | |||||
61 | enum X86SSEEnum { | ||||
62 | NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F | ||||
63 | }; | ||||
64 | |||||
65 | enum X863DNowEnum { | ||||
66 | NoThreeDNow, MMX, ThreeDNow, ThreeDNowA | ||||
67 | }; | ||||
68 | |||||
69 | /// X86 processor family: Intel Atom, and others | ||||
70 | X86ProcFamilyEnum X86ProcFamily = Others; | ||||
71 | |||||
72 | /// Which PIC style to use | ||||
73 | PICStyles::Style PICStyle; | ||||
74 | |||||
75 | const TargetMachine &TM; | ||||
76 | |||||
77 | /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. | ||||
78 | X86SSEEnum X86SSELevel = NoSSE; | ||||
79 | |||||
80 | /// MMX, 3DNow, 3DNow Athlon, or none supported. | ||||
81 | X863DNowEnum X863DNowLevel = NoThreeDNow; | ||||
82 | |||||
83 | /// True if the processor supports X87 instructions. | ||||
84 | bool HasX87 = false; | ||||
85 | |||||
86 | /// True if the processor supports CMPXCHG8B. | ||||
87 | bool HasCmpxchg8b = false; | ||||
88 | |||||
89 | /// True if this processor has NOPL instruction | ||||
90 | /// (generally pentium pro+). | ||||
91 | bool HasNOPL = false; | ||||
92 | |||||
93 | /// True if this processor has conditional move instructions | ||||
94 | /// (generally pentium pro+). | ||||
95 | bool HasCMov = false; | ||||
96 | |||||
97 | /// True if the processor supports X86-64 instructions. | ||||
98 | bool HasX86_64 = false; | ||||
99 | |||||
100 | /// True if the processor supports POPCNT. | ||||
101 | bool HasPOPCNT = false; | ||||
102 | |||||
103 | /// True if the processor supports SSE4A instructions. | ||||
104 | bool HasSSE4A = false; | ||||
105 | |||||
106 | /// Target has AES instructions | ||||
107 | bool HasAES = false; | ||||
108 | bool HasVAES = false; | ||||
109 | |||||
110 | /// Target has FXSAVE/FXRESTOR instructions | ||||
111 | bool HasFXSR = false; | ||||
112 | |||||
113 | /// Target has XSAVE instructions | ||||
114 | bool HasXSAVE = false; | ||||
115 | |||||
116 | /// Target has XSAVEOPT instructions | ||||
117 | bool HasXSAVEOPT = false; | ||||
118 | |||||
119 | /// Target has XSAVEC instructions | ||||
120 | bool HasXSAVEC = false; | ||||
121 | |||||
122 | /// Target has XSAVES instructions | ||||
123 | bool HasXSAVES = false; | ||||
124 | |||||
125 | /// Target has carry-less multiplication | ||||
126 | bool HasPCLMUL = false; | ||||
127 | bool HasVPCLMULQDQ = false; | ||||
128 | |||||
129 | /// Target has Galois Field Arithmetic instructions | ||||
130 | bool HasGFNI = false; | ||||
131 | |||||
132 | /// Target has 3-operand fused multiply-add | ||||
133 | bool HasFMA = false; | ||||
134 | |||||
135 | /// Target has 4-operand fused multiply-add | ||||
136 | bool HasFMA4 = false; | ||||
137 | |||||
138 | /// Target has XOP instructions | ||||
139 | bool HasXOP = false; | ||||
140 | |||||
141 | /// Target has TBM instructions. | ||||
142 | bool HasTBM = false; | ||||
143 | |||||
144 | /// Target has LWP instructions | ||||
145 | bool HasLWP = false; | ||||
146 | |||||
147 | /// True if the processor has the MOVBE instruction. | ||||
148 | bool HasMOVBE = false; | ||||
149 | |||||
150 | /// True if the processor has the RDRAND instruction. | ||||
151 | bool HasRDRAND = false; | ||||
152 | |||||
153 | /// Processor has 16-bit floating point conversion instructions. | ||||
154 | bool HasF16C = false; | ||||
155 | |||||
156 | /// Processor has FS/GS base insturctions. | ||||
157 | bool HasFSGSBase = false; | ||||
158 | |||||
159 | /// Processor has LZCNT instruction. | ||||
160 | bool HasLZCNT = false; | ||||
161 | |||||
162 | /// Processor has BMI1 instructions. | ||||
163 | bool HasBMI = false; | ||||
164 | |||||
165 | /// Processor has BMI2 instructions. | ||||
166 | bool HasBMI2 = false; | ||||
167 | |||||
168 | /// Processor has VBMI instructions. | ||||
169 | bool HasVBMI = false; | ||||
170 | |||||
171 | /// Processor has VBMI2 instructions. | ||||
172 | bool HasVBMI2 = false; | ||||
173 | |||||
174 | /// Processor has Integer Fused Multiply Add | ||||
175 | bool HasIFMA = false; | ||||
176 | |||||
177 | /// Processor has RTM instructions. | ||||
178 | bool HasRTM = false; | ||||
179 | |||||
180 | /// Processor has ADX instructions. | ||||
181 | bool HasADX = false; | ||||
182 | |||||
183 | /// Processor has SHA instructions. | ||||
184 | bool HasSHA = false; | ||||
185 | |||||
186 | /// Processor has PRFCHW instructions. | ||||
187 | bool HasPRFCHW = false; | ||||
188 | |||||
189 | /// Processor has RDSEED instructions. | ||||
190 | bool HasRDSEED = false; | ||||
191 | |||||
192 | /// Processor has LAHF/SAHF instructions in 64-bit mode. | ||||
193 | bool HasLAHFSAHF64 = false; | ||||
194 | |||||
195 | /// Processor has MONITORX/MWAITX instructions. | ||||
196 | bool HasMWAITX = false; | ||||
197 | |||||
198 | /// Processor has Cache Line Zero instruction | ||||
199 | bool HasCLZERO = false; | ||||
200 | |||||
201 | /// Processor has Cache Line Demote instruction | ||||
202 | bool HasCLDEMOTE = false; | ||||
203 | |||||
204 | /// Processor has MOVDIRI instruction (direct store integer). | ||||
205 | bool HasMOVDIRI = false; | ||||
206 | |||||
207 | /// Processor has MOVDIR64B instruction (direct store 64 bytes). | ||||
208 | bool HasMOVDIR64B = false; | ||||
209 | |||||
210 | /// Processor has ptwrite instruction. | ||||
211 | bool HasPTWRITE = false; | ||||
212 | |||||
213 | /// Processor has Prefetch with intent to Write instruction | ||||
214 | bool HasPREFETCHWT1 = false; | ||||
215 | |||||
216 | /// True if SHLD instructions are slow. | ||||
217 | bool IsSHLDSlow = false; | ||||
218 | |||||
219 | /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and | ||||
220 | // PMULUDQ. | ||||
221 | bool IsPMULLDSlow = false; | ||||
222 | |||||
223 | /// True if the PMADDWD instruction is slow compared to PMULLD. | ||||
224 | bool IsPMADDWDSlow = false; | ||||
225 | |||||
226 | /// True if unaligned memory accesses of 16-bytes are slow. | ||||
227 | bool IsUAMem16Slow = false; | ||||
228 | |||||
229 | /// True if unaligned memory accesses of 32-bytes are slow. | ||||
230 | bool IsUAMem32Slow = false; | ||||
231 | |||||
232 | /// True if SSE operations can have unaligned memory operands. | ||||
233 | /// This may require setting a configuration bit in the processor. | ||||
234 | bool HasSSEUnalignedMem = false; | ||||
235 | |||||
236 | /// True if this processor has the CMPXCHG16B instruction; | ||||
237 | /// this is true for most x86-64 chips, but not the first AMD chips. | ||||
238 | bool HasCmpxchg16b = false; | ||||
239 | |||||
240 | /// True if the LEA instruction should be used for adjusting | ||||
241 | /// the stack pointer. This is an optimization for Intel Atom processors. | ||||
242 | bool UseLeaForSP = false; | ||||
243 | |||||
244 | /// True if POPCNT instruction has a false dependency on the destination register. | ||||
245 | bool HasPOPCNTFalseDeps = false; | ||||
246 | |||||
247 | /// True if LZCNT/TZCNT instructions have a false dependency on the destination register. | ||||
248 | bool HasLZCNTFalseDeps = false; | ||||
249 | |||||
250 | /// True if its preferable to combine to a single cross-lane shuffle | ||||
251 | /// using a variable mask over multiple fixed shuffles. | ||||
252 | bool HasFastVariableCrossLaneShuffle = false; | ||||
253 | |||||
254 | /// True if its preferable to combine to a single per-lane shuffle | ||||
255 | /// using a variable mask over multiple fixed shuffles. | ||||
256 | bool HasFastVariablePerLaneShuffle = false; | ||||
257 | |||||
258 | /// True if vzeroupper instructions should be inserted after code that uses | ||||
259 | /// ymm or zmm registers. | ||||
260 | bool InsertVZEROUPPER = false; | ||||
261 | |||||
262 | /// True if there is no performance penalty for writing NOPs with up to | ||||
263 | /// 7 bytes. | ||||
264 | bool HasFast7ByteNOP = false; | ||||
265 | |||||
266 | /// True if there is no performance penalty for writing NOPs with up to | ||||
267 | /// 11 bytes. | ||||
268 | bool HasFast11ByteNOP = false; | ||||
269 | |||||
270 | /// True if there is no performance penalty for writing NOPs with up to | ||||
271 | /// 15 bytes. | ||||
272 | bool HasFast15ByteNOP = false; | ||||
273 | |||||
274 | /// True if gather is reasonably fast. This is true for Skylake client and | ||||
275 | /// all AVX-512 CPUs. | ||||
276 | bool HasFastGather = false; | ||||
277 | |||||
278 | /// True if hardware SQRTSS instruction is at least as fast (latency) as | ||||
279 | /// RSQRTSS followed by a Newton-Raphson iteration. | ||||
280 | bool HasFastScalarFSQRT = false; | ||||
281 | |||||
282 | /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast | ||||
283 | /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. | ||||
284 | bool HasFastVectorFSQRT = false; | ||||
285 | |||||
286 | /// True if 8-bit divisions are significantly faster than | ||||
287 | /// 32-bit divisions and should be used when possible. | ||||
288 | bool HasSlowDivide32 = false; | ||||
289 | |||||
290 | /// True if 32-bit divides are significantly faster than | ||||
291 | /// 64-bit divisions and should be used when possible. | ||||
292 | bool HasSlowDivide64 = false; | ||||
293 | |||||
294 | /// True if LZCNT instruction is fast. | ||||
295 | bool HasFastLZCNT = false; | ||||
296 | |||||
297 | /// True if SHLD based rotate is fast. | ||||
298 | bool HasFastSHLDRotate = false; | ||||
299 | |||||
300 | /// True if the processor supports macrofusion. | ||||
301 | bool HasMacroFusion = false; | ||||
302 | |||||
303 | /// True if the processor supports branch fusion. | ||||
304 | bool HasBranchFusion = false; | ||||
305 | |||||
306 | /// True if the processor has enhanced REP MOVSB/STOSB. | ||||
307 | bool HasERMSB = false; | ||||
308 | |||||
309 | /// True if the processor has fast short REP MOV. | ||||
310 | bool HasFSRM = false; | ||||
311 | |||||
312 | /// True if the short functions should be padded to prevent | ||||
313 | /// a stall when returning too early. | ||||
314 | bool PadShortFunctions = false; | ||||
315 | |||||
316 | /// True if two memory operand instructions should use a temporary register | ||||
317 | /// instead. | ||||
318 | bool SlowTwoMemOps = false; | ||||
319 | |||||
320 | /// True if the LEA instruction inputs have to be ready at address generation | ||||
321 | /// (AG) time. | ||||
322 | bool LEAUsesAG = false; | ||||
323 | |||||
324 | /// True if the LEA instruction with certain arguments is slow | ||||
325 | bool SlowLEA = false; | ||||
326 | |||||
327 | /// True if the LEA instruction has all three source operands: base, index, | ||||
328 | /// and offset or if the LEA instruction uses base and index registers where | ||||
329 | /// the base is EBP, RBP,or R13 | ||||
330 | bool Slow3OpsLEA = false; | ||||
331 | |||||
332 | /// True if INC and DEC instructions are slow when writing to flags | ||||
333 | bool SlowIncDec = false; | ||||
334 | |||||
335 | /// Processor has AVX-512 PreFetch Instructions | ||||
336 | bool HasPFI = false; | ||||
337 | |||||
338 | /// Processor has AVX-512 Exponential and Reciprocal Instructions | ||||
339 | bool HasERI = false; | ||||
340 | |||||
341 | /// Processor has AVX-512 Conflict Detection Instructions | ||||
342 | bool HasCDI = false; | ||||
343 | |||||
344 | /// Processor has AVX-512 population count Instructions | ||||
345 | bool HasVPOPCNTDQ = false; | ||||
346 | |||||
347 | /// Processor has AVX-512 Doubleword and Quadword instructions | ||||
348 | bool HasDQI = false; | ||||
349 | |||||
350 | /// Processor has AVX-512 Byte and Word instructions | ||||
351 | bool HasBWI = false; | ||||
352 | |||||
353 | /// Processor has AVX-512 Vector Length eXtenstions | ||||
354 | bool HasVLX = false; | ||||
355 | |||||
356 | /// Processor has PKU extenstions | ||||
357 | bool HasPKU = false; | ||||
358 | |||||
359 | /// Processor has AVX-512 Vector Neural Network Instructions | ||||
360 | bool HasVNNI = false; | ||||
361 | |||||
362 | /// Processor has AVX Vector Neural Network Instructions | ||||
363 | bool HasAVXVNNI = false; | ||||
364 | |||||
365 | /// Processor has AVX-512 bfloat16 floating-point extensions | ||||
366 | bool HasBF16 = false; | ||||
367 | |||||
368 | /// Processor supports ENQCMD instructions | ||||
369 | bool HasENQCMD = false; | ||||
370 | |||||
371 | /// Processor has AVX-512 Bit Algorithms instructions | ||||
372 | bool HasBITALG = false; | ||||
373 | |||||
374 | /// Processor has AVX-512 vp2intersect instructions | ||||
375 | bool HasVP2INTERSECT = false; | ||||
376 | |||||
377 | /// Processor supports CET SHSTK - Control-Flow Enforcement Technology | ||||
378 | /// using Shadow Stack | ||||
379 | bool HasSHSTK = false; | ||||
380 | |||||
381 | /// Processor supports Invalidate Process-Context Identifier | ||||
382 | bool HasINVPCID = false; | ||||
383 | |||||
384 | /// Processor has Software Guard Extensions | ||||
385 | bool HasSGX = false; | ||||
386 | |||||
387 | /// Processor supports Flush Cache Line instruction | ||||
388 | bool HasCLFLUSHOPT = false; | ||||
389 | |||||
390 | /// Processor supports Cache Line Write Back instruction | ||||
391 | bool HasCLWB = false; | ||||
392 | |||||
393 | /// Processor supports Write Back No Invalidate instruction | ||||
394 | bool HasWBNOINVD = false; | ||||
395 | |||||
396 | /// Processor support RDPID instruction | ||||
397 | bool HasRDPID = false; | ||||
398 | |||||
399 | /// Processor supports WaitPKG instructions | ||||
400 | bool HasWAITPKG = false; | ||||
401 | |||||
402 | /// Processor supports PCONFIG instruction | ||||
403 | bool HasPCONFIG = false; | ||||
404 | |||||
405 | /// Processor support key locker instructions | ||||
406 | bool HasKL = false; | ||||
407 | |||||
408 | /// Processor support key locker wide instructions | ||||
409 | bool HasWIDEKL = false; | ||||
410 | |||||
411 | /// Processor supports HRESET instruction | ||||
412 | bool HasHRESET = false; | ||||
413 | |||||
414 | /// Processor supports SERIALIZE instruction | ||||
415 | bool HasSERIALIZE = false; | ||||
416 | |||||
417 | /// Processor supports TSXLDTRK instruction | ||||
418 | bool HasTSXLDTRK = false; | ||||
419 | |||||
420 | /// Processor has AMX support | ||||
421 | bool HasAMXTILE = false; | ||||
422 | bool HasAMXBF16 = false; | ||||
423 | bool HasAMXINT8 = false; | ||||
424 | |||||
425 | /// Processor supports User Level Interrupt instructions | ||||
426 | bool HasUINTR = false; | ||||
427 | |||||
428 | /// Processor has a single uop BEXTR implementation. | ||||
429 | bool HasFastBEXTR = false; | ||||
430 | |||||
431 | /// Try harder to combine to horizontal vector ops if they are fast. | ||||
432 | bool HasFastHorizontalOps = false; | ||||
433 | |||||
434 | /// Prefer a left/right scalar logical shifts pair over a shift+and pair. | ||||
435 | bool HasFastScalarShiftMasks = false; | ||||
436 | |||||
437 | /// Prefer a left/right vector logical shifts pair over a shift+and pair. | ||||
438 | bool HasFastVectorShiftMasks = false; | ||||
439 | |||||
440 | /// Prefer a movbe over a single-use load + bswap / single-use bswap + store. | ||||
441 | bool HasFastMOVBE = false; | ||||
442 | |||||
443 | /// Use a retpoline thunk rather than indirect calls to block speculative | ||||
444 | /// execution. | ||||
445 | bool UseRetpolineIndirectCalls = false; | ||||
446 | |||||
447 | /// Use a retpoline thunk or remove any indirect branch to block speculative | ||||
448 | /// execution. | ||||
449 | bool UseRetpolineIndirectBranches = false; | ||||
450 | |||||
451 | /// Deprecated flag, query `UseRetpolineIndirectCalls` and | ||||
452 | /// `UseRetpolineIndirectBranches` instead. | ||||
453 | bool DeprecatedUseRetpoline = false; | ||||
454 | |||||
455 | /// When using a retpoline thunk, call an externally provided thunk rather | ||||
456 | /// than emitting one inside the compiler. | ||||
457 | bool UseRetpolineExternalThunk = false; | ||||
458 | |||||
459 | /// Prevent generation of indirect call/branch instructions from memory, | ||||
460 | /// and force all indirect call/branch instructions from a register to be | ||||
461 | /// preceded by an LFENCE. Also decompose RET instructions into a | ||||
462 | /// POP+LFENCE+JMP sequence. | ||||
463 | bool UseLVIControlFlowIntegrity = false; | ||||
464 | |||||
465 | /// Enable Speculative Execution Side Effect Suppression | ||||
466 | bool UseSpeculativeExecutionSideEffectSuppression = false; | ||||
467 | |||||
468 | /// Insert LFENCE instructions to prevent data speculatively injected into | ||||
469 | /// loads from being used maliciously. | ||||
470 | bool UseLVILoadHardening = false; | ||||
471 | |||||
472 | /// Use software floating point for code generation. | ||||
473 | bool UseSoftFloat = false; | ||||
474 | |||||
475 | /// Use alias analysis during code generation. | ||||
476 | bool UseAA = false; | ||||
477 | |||||
478 | /// The minimum alignment known to hold of the stack frame on | ||||
479 | /// entry to the function and which must be maintained by every function. | ||||
480 | Align stackAlignment = Align(4); | ||||
481 | |||||
482 | Align TileConfigAlignment = Align(4); | ||||
483 | |||||
484 | /// Whether function prologues should save register arguments on the stack. | ||||
485 | bool SaveArgs = false; | ||||
486 | |||||
487 | /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. | ||||
488 | /// | ||||
489 | // FIXME: this is a known good value for Yonah. How about others? | ||||
490 | unsigned MaxInlineSizeThreshold = 128; | ||||
491 | |||||
492 | /// Indicates target prefers 128 bit instructions. | ||||
493 | bool Prefer128Bit = false; | ||||
494 | |||||
495 | /// Indicates target prefers 256 bit instructions. | ||||
496 | bool Prefer256Bit = false; | ||||
497 | |||||
498 | /// Indicates target prefers AVX512 mask registers. | ||||
499 | bool PreferMaskRegisters = false; | ||||
500 | |||||
501 | /// Use Goldmont specific floating point div/sqrt costs. | ||||
502 | bool UseGLMDivSqrtCosts = false; | ||||
503 | |||||
504 | /// What processor and OS we're targeting. | ||||
505 | Triple TargetTriple; | ||||
506 | |||||
507 | /// GlobalISel related APIs. | ||||
508 | std::unique_ptr<CallLowering> CallLoweringInfo; | ||||
509 | std::unique_ptr<LegalizerInfo> Legalizer; | ||||
510 | std::unique_ptr<RegisterBankInfo> RegBankInfo; | ||||
511 | std::unique_ptr<InstructionSelector> InstSelector; | ||||
512 | |||||
513 | private: | ||||
514 | /// Override the stack alignment. | ||||
515 | MaybeAlign StackAlignOverride; | ||||
516 | |||||
517 | /// Preferred vector width from function attribute. | ||||
518 | unsigned PreferVectorWidthOverride; | ||||
519 | |||||
520 | /// Resolved preferred vector width from function attribute and subtarget | ||||
521 | /// features. | ||||
522 | unsigned PreferVectorWidth = UINT32_MAX0xffffffffU; | ||||
523 | |||||
524 | /// Required vector width from function attribute. | ||||
525 | unsigned RequiredVectorWidth; | ||||
526 | |||||
527 | /// True if compiling for 64-bit, false for 16-bit or 32-bit. | ||||
528 | bool In64BitMode = false; | ||||
529 | |||||
530 | /// True if compiling for 32-bit, false for 16-bit or 64-bit. | ||||
531 | bool In32BitMode = false; | ||||
532 | |||||
533 | /// True if compiling for 16-bit, false for 32-bit or 64-bit. | ||||
534 | bool In16BitMode = false; | ||||
535 | |||||
536 | X86SelectionDAGInfo TSInfo; | ||||
537 | // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which | ||||
538 | // X86TargetLowering needs. | ||||
539 | X86InstrInfo InstrInfo; | ||||
540 | X86TargetLowering TLInfo; | ||||
541 | X86FrameLowering FrameLowering; | ||||
542 | |||||
543 | public: | ||||
544 | /// This constructor initializes the data members to match that | ||||
545 | /// of the specified triple. | ||||
546 | /// | ||||
547 | X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, | ||||
548 | const X86TargetMachine &TM, MaybeAlign StackAlignOverride, | ||||
549 | unsigned PreferVectorWidthOverride, | ||||
550 | unsigned RequiredVectorWidth); | ||||
551 | |||||
552 | const X86TargetLowering *getTargetLowering() const override { | ||||
553 | return &TLInfo; | ||||
554 | } | ||||
555 | |||||
556 | const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; } | ||||
557 | |||||
558 | const X86FrameLowering *getFrameLowering() const override { | ||||
559 | return &FrameLowering; | ||||
560 | } | ||||
561 | |||||
562 | const X86SelectionDAGInfo *getSelectionDAGInfo() const override { | ||||
563 | return &TSInfo; | ||||
564 | } | ||||
565 | |||||
566 | const X86RegisterInfo *getRegisterInfo() const override { | ||||
567 | return &getInstrInfo()->getRegisterInfo(); | ||||
568 | } | ||||
569 | |||||
570 | bool getSaveArgs() const { return SaveArgs; } | ||||
571 | |||||
572 | unsigned getTileConfigSize() const { return 64; } | ||||
573 | Align getTileConfigAlignment() const { return TileConfigAlignment; } | ||||
574 | |||||
575 | /// Returns the minimum alignment known to hold of the | ||||
576 | /// stack frame on entry to the function and which must be maintained by every | ||||
577 | /// function for this subtarget. | ||||
578 | Align getStackAlignment() const { return stackAlignment; } | ||||
579 | |||||
580 | /// Returns the maximum memset / memcpy size | ||||
581 | /// that still makes it profitable to inline the call. | ||||
582 | unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; } | ||||
583 | |||||
584 | /// ParseSubtargetFeatures - Parses features string setting specified | ||||
585 | /// subtarget options. Definition of function is auto generated by tblgen. | ||||
586 | void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); | ||||
587 | |||||
588 | /// Methods used by Global ISel | ||||
589 | const CallLowering *getCallLowering() const override; | ||||
590 | InstructionSelector *getInstructionSelector() const override; | ||||
591 | const LegalizerInfo *getLegalizerInfo() const override; | ||||
592 | const RegisterBankInfo *getRegBankInfo() const override; | ||||
593 | |||||
594 | private: | ||||
595 | /// Initialize the full set of dependencies so we can use an initializer | ||||
596 | /// list for X86Subtarget. | ||||
597 | X86Subtarget &initializeSubtargetDependencies(StringRef CPU, | ||||
598 | StringRef TuneCPU, | ||||
599 | StringRef FS); | ||||
600 | void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); | ||||
601 | |||||
602 | public: | ||||
603 | /// Is this x86_64? (disregarding specific ABI / programming model) | ||||
604 | bool is64Bit() const { | ||||
605 | return In64BitMode; | ||||
606 | } | ||||
607 | |||||
608 | bool is32Bit() const { | ||||
609 | return In32BitMode; | ||||
610 | } | ||||
611 | |||||
612 | bool is16Bit() const { | ||||
613 | return In16BitMode; | ||||
614 | } | ||||
615 | |||||
616 | /// Is this x86_64 with the ILP32 programming model (x32 ABI)? | ||||
617 | bool isTarget64BitILP32() const { | ||||
618 | return In64BitMode && (TargetTriple.isX32() || TargetTriple.isOSNaCl()); | ||||
619 | } | ||||
620 | |||||
621 | /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? | ||||
622 | bool isTarget64BitLP64() const { | ||||
623 | return In64BitMode && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl()); | ||||
624 | } | ||||
625 | |||||
626 | PICStyles::Style getPICStyle() const { return PICStyle; } | ||||
627 | void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } | ||||
628 | |||||
629 | bool hasX87() const { return HasX87; } | ||||
630 | bool hasCmpxchg8b() const { return HasCmpxchg8b; } | ||||
631 | bool hasNOPL() const { return HasNOPL; } | ||||
632 | // SSE codegen depends on cmovs, and all SSE1+ processors support them. | ||||
633 | // All 64-bit processors support cmov. | ||||
634 | bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); } | ||||
635 | bool hasSSE1() const { return X86SSELevel >= SSE1; } | ||||
636 | bool hasSSE2() const { return X86SSELevel
| ||||
637 | bool hasSSE3() const { return X86SSELevel >= SSE3; } | ||||
638 | bool hasSSSE3() const { return X86SSELevel >= SSSE3; } | ||||
639 | bool hasSSE41() const { return X86SSELevel >= SSE41; } | ||||
640 | bool hasSSE42() const { return X86SSELevel >= SSE42; } | ||||
641 | bool hasAVX() const { return X86SSELevel >= AVX; } | ||||
642 | bool hasAVX2() const { return X86SSELevel >= AVX2; } | ||||
643 | bool hasAVX512() const { return X86SSELevel >= AVX512F; } | ||||
644 | bool hasInt256() const { return hasAVX2(); } | ||||
645 | bool hasSSE4A() const { return HasSSE4A; } | ||||
646 | bool hasMMX() const { return X863DNowLevel >= MMX; } | ||||
647 | bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } | ||||
648 | bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } | ||||
649 | bool hasPOPCNT() const { return HasPOPCNT; } | ||||
650 | bool hasAES() const { return HasAES; } | ||||
651 | bool hasVAES() const { return HasVAES; } | ||||
652 | bool hasFXSR() const { return HasFXSR; } | ||||
653 | bool hasXSAVE() const { return HasXSAVE; } | ||||
654 | bool hasXSAVEOPT() const { return HasXSAVEOPT; } | ||||
655 | bool hasXSAVEC() const { return HasXSAVEC; } | ||||
656 | bool hasXSAVES() const { return HasXSAVES; } | ||||
657 | bool hasPCLMUL() const { return HasPCLMUL; } | ||||
658 | bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } | ||||
659 | bool hasGFNI() const { return HasGFNI; } | ||||
660 | // Prefer FMA4 to FMA - its better for commutation/memory folding and | ||||
661 | // has equal or better performance on all supported targets. | ||||
662 | bool hasFMA() const { return HasFMA; } | ||||
663 | bool hasFMA4() const { return HasFMA4; } | ||||
664 | bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } | ||||
665 | bool hasXOP() const { return HasXOP; } | ||||
666 | bool hasTBM() const { return HasTBM; } | ||||
667 | bool hasLWP() const { return HasLWP; } | ||||
668 | bool hasMOVBE() const { return HasMOVBE; } | ||||
669 | bool hasRDRAND() const { return HasRDRAND; } | ||||
670 | bool hasF16C() const { return HasF16C; } | ||||
671 | bool hasFSGSBase() const { return HasFSGSBase; } | ||||
672 | bool hasLZCNT() const { return HasLZCNT; } | ||||
673 | bool hasBMI() const { return HasBMI; } | ||||
674 | bool hasBMI2() const { return HasBMI2; } | ||||
675 | bool hasVBMI() const { return HasVBMI; } | ||||
676 | bool hasVBMI2() const { return HasVBMI2; } | ||||
677 | bool hasIFMA() const { return HasIFMA; } | ||||
678 | bool hasRTM() const { return HasRTM; } | ||||
679 | bool hasADX() const { return HasADX; } | ||||
680 | bool hasSHA() const { return HasSHA; } | ||||
681 | bool hasPRFCHW() const { return HasPRFCHW; } | ||||
682 | bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } | ||||
683 | bool hasPrefetchW() const { | ||||
684 | // The PREFETCHW instruction was added with 3DNow but later CPUs gave it | ||||
685 | // its own CPUID bit as part of deprecating 3DNow. Intel eventually added | ||||
686 | // it and KNL has another that prefetches to L2 cache. We assume the | ||||
687 | // L1 version exists if the L2 version does. | ||||
688 | return has3DNow() || hasPRFCHW() || hasPREFETCHWT1(); | ||||
689 | } | ||||
690 | bool hasSSEPrefetch() const { | ||||
691 | // We implicitly enable these when we have a write prefix supporting cache | ||||
692 | // level OR if we have prfchw, but don't already have a read prefetch from | ||||
693 | // 3dnow. | ||||
694 | return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); | ||||
695 | } | ||||
696 | bool hasRDSEED() const { return HasRDSEED; } | ||||
697 | bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); } | ||||
698 | bool hasMWAITX() const { return HasMWAITX; } | ||||
699 | bool hasCLZERO() const { return HasCLZERO; } | ||||
700 | bool hasCLDEMOTE() const { return HasCLDEMOTE; } | ||||
701 | bool hasMOVDIRI() const { return HasMOVDIRI; } | ||||
702 | bool hasMOVDIR64B() const { return HasMOVDIR64B; } | ||||
703 | bool hasPTWRITE() const { return HasPTWRITE; } | ||||
704 | bool isSHLDSlow() const { return IsSHLDSlow; } | ||||
705 | bool isPMULLDSlow() const { return IsPMULLDSlow; } | ||||
706 | bool isPMADDWDSlow() const { return IsPMADDWDSlow; } | ||||
707 | bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } | ||||
708 | bool isUnalignedMem32Slow() const { return IsUAMem32Slow; } | ||||
709 | bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } | ||||
710 | bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); } | ||||
711 | bool useLeaForSP() const { return UseLeaForSP; } | ||||
712 | bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } | ||||
713 | bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } | ||||
714 | bool hasFastVariableCrossLaneShuffle() const { | ||||
715 | return HasFastVariableCrossLaneShuffle; | ||||
716 | } | ||||
717 | bool hasFastVariablePerLaneShuffle() const { | ||||
718 | return HasFastVariablePerLaneShuffle; | ||||
719 | } | ||||
720 | bool insertVZEROUPPER() const { return InsertVZEROUPPER; } | ||||
721 | bool hasFastGather() const { return HasFastGather; } | ||||
722 | bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; } | ||||
723 | bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } | ||||
724 | bool hasFastLZCNT() const { return HasFastLZCNT; } | ||||
725 | bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } | ||||
726 | bool hasFastBEXTR() const { return HasFastBEXTR; } | ||||
727 | bool hasFastHorizontalOps() const { return HasFastHorizontalOps; } | ||||
728 | bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; } | ||||
729 | bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; } | ||||
730 | bool hasFastMOVBE() const { return HasFastMOVBE; } | ||||
731 | bool hasMacroFusion() const { return HasMacroFusion; } | ||||
732 | bool hasBranchFusion() const { return HasBranchFusion; } | ||||
733 | bool hasERMSB() const { return HasERMSB; } | ||||
734 | bool hasFSRM() const { return HasFSRM; } | ||||
735 | bool hasSlowDivide32() const { return HasSlowDivide32; } | ||||
736 | bool hasSlowDivide64() const { return HasSlowDivide64; } | ||||
737 | bool padShortFunctions() const { return PadShortFunctions; } | ||||
738 | bool slowTwoMemOps() const { return SlowTwoMemOps; } | ||||
739 | bool LEAusesAG() const { return LEAUsesAG; } | ||||
740 | bool slowLEA() const { return SlowLEA; } | ||||
741 | bool slow3OpsLEA() const { return Slow3OpsLEA; } | ||||
742 | bool slowIncDec() const { return SlowIncDec; } | ||||
743 | bool hasCDI() const { return HasCDI; } | ||||
744 | bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } | ||||
745 | bool hasPFI() const { return HasPFI; } | ||||
746 | bool hasERI() const { return HasERI; } | ||||
747 | bool hasDQI() const { return HasDQI; } | ||||
748 | bool hasBWI() const { return HasBWI; } | ||||
749 | bool hasVLX() const { return HasVLX; } | ||||
750 | bool hasPKU() const { return HasPKU; } | ||||
751 | bool hasVNNI() const { return HasVNNI; } | ||||
752 | bool hasBF16() const { return HasBF16; } | ||||
753 | bool hasVP2INTERSECT() const { return HasVP2INTERSECT; } | ||||
754 | bool hasBITALG() const { return HasBITALG; } | ||||
755 | bool hasSHSTK() const { return HasSHSTK; } | ||||
756 | bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } | ||||
757 | bool hasCLWB() const { return HasCLWB; } | ||||
758 | bool hasWBNOINVD() const { return HasWBNOINVD; } | ||||
759 | bool hasRDPID() const { return HasRDPID; } | ||||
760 | bool hasWAITPKG() const { return HasWAITPKG; } | ||||
761 | bool hasPCONFIG() const { return HasPCONFIG; } | ||||
762 | bool hasSGX() const { return HasSGX; } | ||||
763 | bool hasINVPCID() const { return HasINVPCID; } | ||||
764 | bool hasENQCMD() const { return HasENQCMD; } | ||||
765 | bool hasKL() const { return HasKL; } | ||||
766 | bool hasWIDEKL() const { return HasWIDEKL; } | ||||
767 | bool hasHRESET() const { return HasHRESET; } | ||||
768 | bool hasSERIALIZE() const { return HasSERIALIZE; } | ||||
769 | bool hasTSXLDTRK() const { return HasTSXLDTRK; } | ||||
770 | bool hasUINTR() const { return HasUINTR; } | ||||
771 | bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; } | ||||
772 | bool useRetpolineIndirectBranches() const { | ||||
773 | return UseRetpolineIndirectBranches; | ||||
774 | } | ||||
775 | bool hasAVXVNNI() const { return HasAVXVNNI; } | ||||
776 | bool hasAMXTILE() const { return HasAMXTILE; } | ||||
777 | bool hasAMXBF16() const { return HasAMXBF16; } | ||||
778 | bool hasAMXINT8() const { return HasAMXINT8; } | ||||
779 | bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } | ||||
780 | |||||
781 | // These are generic getters that OR together all of the thunk types | ||||
782 | // supported by the subtarget. Therefore useIndirectThunk*() will return true | ||||
783 | // if any respective thunk feature is enabled. | ||||
784 | bool useIndirectThunkCalls() const { | ||||
785 | return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); | ||||
786 | } | ||||
787 | bool useIndirectThunkBranches() const { | ||||
788 | return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); | ||||
789 | } | ||||
790 | |||||
791 | bool preferMaskRegisters() const { return PreferMaskRegisters; } | ||||
792 | bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } | ||||
793 | bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } | ||||
794 | bool useLVILoadHardening() const { return UseLVILoadHardening; } | ||||
795 | bool useSpeculativeExecutionSideEffectSuppression() const { | ||||
796 | return UseSpeculativeExecutionSideEffectSuppression; | ||||
797 | } | ||||
798 | |||||
799 | unsigned getPreferVectorWidth() const { return PreferVectorWidth; } | ||||
800 | unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } | ||||
801 | |||||
802 | // Helper functions to determine when we should allow widening to 512-bit | ||||
803 | // during codegen. | ||||
804 | // TODO: Currently we're always allowing widening on CPUs without VLX, | ||||
805 | // because for many cases we don't have a better option. | ||||
806 | bool canExtendTo512DQ() const { | ||||
807 | return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); | ||||
808 | } | ||||
809 | bool canExtendTo512BW() const { | ||||
810 | return hasBWI() && canExtendTo512DQ(); | ||||
811 | } | ||||
812 | |||||
813 | // If there are no 512-bit vectors and we prefer not to use 512-bit registers, | ||||
814 | // disable them in the legalizer. | ||||
815 | bool useAVX512Regs() const { | ||||
816 | return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256); | ||||
817 | } | ||||
818 | |||||
819 | bool useBWIRegs() const { | ||||
820 | return hasBWI() && useAVX512Regs(); | ||||
821 | } | ||||
822 | |||||
823 | bool isXRaySupported() const override { return is64Bit(); } | ||||
824 | |||||
825 | /// TODO: to be removed later and replaced with suitable properties | ||||
826 | bool isAtom() const { return X86ProcFamily == IntelAtom; } | ||||
827 | bool isSLM() const { return X86ProcFamily == IntelSLM; } | ||||
828 | bool useSoftFloat() const { return UseSoftFloat; } | ||||
829 | bool useAA() const override { return UseAA; } | ||||
830 | |||||
831 | /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for | ||||
832 | /// no-sse2). There isn't any reason to disable it if the target processor | ||||
833 | /// supports it. | ||||
834 | bool hasMFence() const { return hasSSE2() || is64Bit(); } | ||||
835 | |||||
836 | const Triple &getTargetTriple() const { return TargetTriple; } | ||||
837 | |||||
838 | bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } | ||||
839 | bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } | ||||
840 | bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); } | ||||
841 | bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); } | ||||
842 | bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); } | ||||
843 | bool isTargetPS4() const { return TargetTriple.isPS4CPU(); } | ||||
844 | |||||
845 | bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } | ||||
846 | bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } | ||||
847 | bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } | ||||
848 | |||||
849 | bool isTargetLinux() const { return TargetTriple.isOSLinux(); } | ||||
850 | bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); } | ||||
851 | bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } | ||||
852 | bool isTargetAndroid() const { return TargetTriple.isAndroid(); } | ||||
853 | bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } | ||||
854 | bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } | ||||
855 | bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } | ||||
856 | bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); } | ||||
857 | bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } | ||||
858 | |||||
859 | bool isTargetWindowsMSVC() const { | ||||
860 | return TargetTriple.isWindowsMSVCEnvironment(); | ||||
861 | } | ||||
862 | |||||
863 | bool isTargetWindowsCoreCLR() const { | ||||
864 | return TargetTriple.isWindowsCoreCLREnvironment(); | ||||
865 | } | ||||
866 | |||||
867 | bool isTargetWindowsCygwin() const { | ||||
868 | return TargetTriple.isWindowsCygwinEnvironment(); | ||||
869 | } | ||||
870 | |||||
871 | bool isTargetWindowsGNU() const { | ||||
872 | return TargetTriple.isWindowsGNUEnvironment(); | ||||
873 | } | ||||
874 | |||||
875 | bool isTargetWindowsItanium() const { | ||||
876 | return TargetTriple.isWindowsItaniumEnvironment(); | ||||
877 | } | ||||
878 | |||||
879 | bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } | ||||
880 | |||||
881 | bool isOSWindows() const { return TargetTriple.isOSWindows(); } | ||||
882 | |||||
883 | bool isTargetWin64() const { return In64BitMode && isOSWindows(); } | ||||
884 | |||||
885 | bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } | ||||
886 | |||||
887 | bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; } | ||||
888 | bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; } | ||||
889 | |||||
890 | bool isPICStyleStubPIC() const { | ||||
891 | return PICStyle == PICStyles::Style::StubPIC; | ||||
892 | } | ||||
893 | |||||
894 | bool isPositionIndependent() const; | ||||
895 | |||||
896 | bool isCallingConvWin64(CallingConv::ID CC) const { | ||||
897 | switch (CC) { | ||||
898 | // On Win64, all these conventions just use the default convention. | ||||
899 | case CallingConv::C: | ||||
900 | case CallingConv::Fast: | ||||
901 | case CallingConv::Tail: | ||||
902 | case CallingConv::Swift: | ||||
903 | case CallingConv::SwiftTail: | ||||
904 | case CallingConv::X86_FastCall: | ||||
905 | case CallingConv::X86_StdCall: | ||||
906 | case CallingConv::X86_ThisCall: | ||||
907 | case CallingConv::X86_VectorCall: | ||||
908 | case CallingConv::Intel_OCL_BI: | ||||
909 | return isTargetWin64(); | ||||
910 | // This convention allows using the Win64 convention on other targets. | ||||
911 | case CallingConv::Win64: | ||||
912 | return true; | ||||
913 | // This convention allows using the SysV convention on Windows targets. | ||||
914 | case CallingConv::X86_64_SysV: | ||||
915 | return false; | ||||
916 | // Otherwise, who knows what this is. | ||||
917 | default: | ||||
918 | return false; | ||||
919 | } | ||||
920 | } | ||||
921 | |||||
922 | /// Classify a global variable reference for the current subtarget according | ||||
923 | /// to how we should reference it in a non-pcrel context. | ||||
924 | unsigned char classifyLocalReference(const GlobalValue *GV) const; | ||||
925 | |||||
926 | unsigned char classifyGlobalReference(const GlobalValue *GV, | ||||
927 | const Module &M) const; | ||||
928 | unsigned char classifyGlobalReference(const GlobalValue *GV) const; | ||||
929 | |||||
930 | /// Classify a global function reference for the current subtarget. | ||||
931 | unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, | ||||
932 | const Module &M) const; | ||||
933 | unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const; | ||||
934 | |||||
935 | /// Classify a blockaddress reference for the current subtarget according to | ||||
936 | /// how we should reference it in a non-pcrel context. | ||||
937 | unsigned char classifyBlockAddressReference() const; | ||||
938 | |||||
939 | /// Return true if the subtarget allows calls to immediate address. | ||||
940 | bool isLegalToCallImmediateAddr() const; | ||||
941 | |||||
942 | /// If we are using indirect thunks, we need to expand indirectbr to avoid it | ||||
943 | /// lowering to an actual indirect jump. | ||||
944 | bool enableIndirectBrExpand() const override { | ||||
945 | return useIndirectThunkBranches(); | ||||
946 | } | ||||
947 | |||||
948 | /// Enable the MachineScheduler pass for all X86 subtargets. | ||||
949 | bool enableMachineScheduler() const override { return true; } | ||||
950 | |||||
951 | bool enableEarlyIfConversion() const override; | ||||
952 | |||||
953 | void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>> | ||||
954 | &Mutations) const override; | ||||
955 | |||||
956 | AntiDepBreakMode getAntiDepBreakMode() const override { | ||||
957 | return TargetSubtargetInfo::ANTIDEP_CRITICAL; | ||||
958 | } | ||||
959 | |||||
960 | bool enableAdvancedRASplitCost() const override { return false; } | ||||
961 | }; | ||||
962 | |||||
963 | } // end namespace llvm | ||||
964 | |||||
965 | #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H |