Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PartialReduction.cpp
Warning:line 289, column 26
Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86PartialReduction.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PartialReduction.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86PartialReduction.cpp

1//===-- X86PartialReduction.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass looks for add instructions used by a horizontal reduction to see
10// if we might be able to use pmaddwd or psadbw. Some cases of this require
11// cross basic block knowledge and can't be done in SelectionDAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/CodeGen/TargetPassConfig.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Instructions.h"
20#include "llvm/IR/IntrinsicsX86.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Operator.h"
23#include "llvm/Pass.h"
24#include "X86TargetMachine.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE"x86-partial-reduction" "x86-partial-reduction"
29
30namespace {
31
32class X86PartialReduction : public FunctionPass {
33 const DataLayout *DL;
34 const X86Subtarget *ST;
35
36public:
37 static char ID; // Pass identification, replacement for typeid.
38
39 X86PartialReduction() : FunctionPass(ID) { }
40
41 bool runOnFunction(Function &Fn) override;
42
43 void getAnalysisUsage(AnalysisUsage &AU) const override {
44 AU.setPreservesCFG();
45 }
46
47 StringRef getPassName() const override {
48 return "X86 Partial Reduction";
49 }
50
51private:
52 bool tryMAddReplacement(Instruction *Op);
53 bool trySADReplacement(Instruction *Op);
54};
55}
56
57FunctionPass *llvm::createX86PartialReductionPass() {
58 return new X86PartialReduction();
59}
60
61char X86PartialReduction::ID = 0;
62
63INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,static void *initializeX86PartialReductionPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "X86 Partial Reduction"
, "x86-partial-reduction", &X86PartialReduction::ID, PassInfo
::NormalCtor_t(callDefaultCtor<X86PartialReduction>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeX86PartialReductionPassFlag; void llvm
::initializeX86PartialReductionPass(PassRegistry &Registry
) { llvm::call_once(InitializeX86PartialReductionPassFlag, initializeX86PartialReductionPassOnce
, std::ref(Registry)); }
64 "X86 Partial Reduction", false, false)static void *initializeX86PartialReductionPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "X86 Partial Reduction"
, "x86-partial-reduction", &X86PartialReduction::ID, PassInfo
::NormalCtor_t(callDefaultCtor<X86PartialReduction>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeX86PartialReductionPassFlag; void llvm
::initializeX86PartialReductionPass(PassRegistry &Registry
) { llvm::call_once(InitializeX86PartialReductionPassFlag, initializeX86PartialReductionPassOnce
, std::ref(Registry)); }
65
66bool X86PartialReduction::tryMAddReplacement(Instruction *Op) {
67 if (!ST->hasSSE2())
68 return false;
69
70 // Need at least 8 elements.
71 if (cast<FixedVectorType>(Op->getType())->getNumElements() < 8)
72 return false;
73
74 // Element type should be i32.
75 if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
76 return false;
77
78 auto *Mul = dyn_cast<BinaryOperator>(Op);
79 if (!Mul || Mul->getOpcode() != Instruction::Mul)
80 return false;
81
82 Value *LHS = Mul->getOperand(0);
83 Value *RHS = Mul->getOperand(1);
84
85 // LHS and RHS should be only used once or if they are the same then only
86 // used twice. Only check this when SSE4.1 is enabled and we have zext/sext
87 // instructions, otherwise we use punpck to emulate zero extend in stages. The
88 // trunc/ we need to do likely won't introduce new instructions in that case.
89 if (ST->hasSSE41()) {
90 if (LHS == RHS) {
91 if (!isa<Constant>(LHS) && !LHS->hasNUses(2))
92 return false;
93 } else {
94 if (!isa<Constant>(LHS) && !LHS->hasOneUse())
95 return false;
96 if (!isa<Constant>(RHS) && !RHS->hasOneUse())
97 return false;
98 }
99 }
100
101 auto CanShrinkOp = [&](Value *Op) {
102 auto IsFreeTruncation = [&](Value *Op) {
103 if (auto *Cast = dyn_cast<CastInst>(Op)) {
104 if (Cast->getParent() == Mul->getParent() &&
105 (Cast->getOpcode() == Instruction::SExt ||
106 Cast->getOpcode() == Instruction::ZExt) &&
107 Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 16)
108 return true;
109 }
110
111 return isa<Constant>(Op);
112 };
113
114 // If the operation can be freely truncated and has enough sign bits we
115 // can shrink.
116 if (IsFreeTruncation(Op) &&
117 ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
118 return true;
119
120 // SelectionDAG has limited support for truncating through an add or sub if
121 // the inputs are freely truncatable.
122 if (auto *BO = dyn_cast<BinaryOperator>(Op)) {
123 if (BO->getParent() == Mul->getParent() &&
124 IsFreeTruncation(BO->getOperand(0)) &&
125 IsFreeTruncation(BO->getOperand(1)) &&
126 ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
127 return true;
128 }
129
130 return false;
131 };
132
133 // Both Ops need to be shrinkable.
134 if (!CanShrinkOp(LHS) && !CanShrinkOp(RHS))
135 return false;
136
137 IRBuilder<> Builder(Mul);
138
139 auto *MulTy = cast<FixedVectorType>(Op->getType());
140 unsigned NumElts = MulTy->getNumElements();
141
142 // Extract even elements and odd elements and add them together. This will
143 // be pattern matched by SelectionDAG to pmaddwd. This instruction will be
144 // half the original width.
145 SmallVector<int, 16> EvenMask(NumElts / 2);
146 SmallVector<int, 16> OddMask(NumElts / 2);
147 for (int i = 0, e = NumElts / 2; i != e; ++i) {
148 EvenMask[i] = i * 2;
149 OddMask[i] = i * 2 + 1;
150 }
151 // Creating a new mul so the replaceAllUsesWith below doesn't replace the
152 // uses in the shuffles we're creating.
153 Value *NewMul = Builder.CreateMul(Mul->getOperand(0), Mul->getOperand(1));
154 Value *EvenElts = Builder.CreateShuffleVector(NewMul, NewMul, EvenMask);
155 Value *OddElts = Builder.CreateShuffleVector(NewMul, NewMul, OddMask);
156 Value *MAdd = Builder.CreateAdd(EvenElts, OddElts);
157
158 // Concatenate zeroes to extend back to the original type.
159 SmallVector<int, 32> ConcatMask(NumElts);
160 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
161 Value *Zero = Constant::getNullValue(MAdd->getType());
162 Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask);
163
164 Mul->replaceAllUsesWith(Concat);
165 Mul->eraseFromParent();
166
167 return true;
168}
169
170bool X86PartialReduction::trySADReplacement(Instruction *Op) {
171 if (!ST->hasSSE2())
12
Calling 'X86Subtarget::hasSSE2'
14
Returning from 'X86Subtarget::hasSSE2'
15
Taking false branch
172 return false;
173
174 // TODO: There's nothing special about i32, any integer type above i16 should
175 // work just as well.
176 if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
16
The object is a 'VectorType'
17
Assuming the condition is false
18
Taking false branch
177 return false;
178
179 // Operand should be a select.
180 auto *SI = dyn_cast<SelectInst>(Op);
19
Assuming 'Op' is a 'SelectInst'
181 if (!SI
19.1
'SI' is non-null
19.1
'SI' is non-null
)
20
Taking false branch
182 return false;
183
184 // Select needs to implement absolute value.
185 Value *LHS, *RHS;
186 auto SPR = matchSelectPattern(SI, LHS, RHS);
187 if (SPR.Flavor != SPF_ABS)
21
Assuming field 'Flavor' is equal to SPF_ABS
22
Taking false branch
188 return false;
189
190 // Need a subtract of two values.
191 auto *Sub = dyn_cast<BinaryOperator>(LHS);
23
Assuming 'LHS' is a 'BinaryOperator'
192 if (!Sub
23.1
'Sub' is non-null
23.1
'Sub' is non-null
|| Sub->getOpcode() != Instruction::Sub)
24
Assuming the condition is false
25
Taking false branch
193 return false;
194
195 // Look for zero extend from i8.
196 auto getZeroExtendedVal = [](Value *Op) -> Value * {
197 if (auto *ZExt = dyn_cast<ZExtInst>(Op))
27
Assuming 'ZExt' is non-null
28
Taking true branch
35
Assuming 'ZExt' is non-null
36
Taking true branch
198 if (cast<VectorType>(ZExt->getOperand(0)->getType())
29
The object is a 'VectorType'
30
Assuming the condition is true
31
Taking true branch
37
The object is a 'VectorType'
38
Assuming the condition is true
39
Taking true branch
199 ->getElementType()
200 ->isIntegerTy(8))
201 return ZExt->getOperand(0);
32
Returning pointer, which participates in a condition later
40
Returning pointer, which participates in a condition later
202
203 return nullptr;
204 };
205
206 // Both operands of the subtract should be extends from vXi8.
207 Value *Op0 = getZeroExtendedVal(Sub->getOperand(0));
26
Calling 'operator()'
33
Returning from 'operator()'
208 Value *Op1 = getZeroExtendedVal(Sub->getOperand(1));
34
Calling 'operator()'
41
Returning from 'operator()'
209 if (!Op0
41.1
'Op0' is non-null
41.1
'Op0' is non-null
|| !Op1
41.2
'Op1' is non-null
41.2
'Op1' is non-null
)
42
Taking false branch
210 return false;
211
212 IRBuilder<> Builder(SI);
213
214 auto *OpTy = cast<FixedVectorType>(Op->getType());
43
The object is a 'FixedVectorType'
215 unsigned NumElts = OpTy->getNumElements();
216
217 unsigned IntrinsicNumElts;
218 Intrinsic::ID IID;
219 if (ST->hasBWI() && NumElts >= 64) {
44
Assuming the condition is false
220 IID = Intrinsic::x86_avx512_psad_bw_512;
221 IntrinsicNumElts = 64;
222 } else if (ST->hasAVX2() && NumElts >= 32) {
223 IID = Intrinsic::x86_avx2_psad_bw;
224 IntrinsicNumElts = 32;
225 } else {
226 IID = Intrinsic::x86_sse2_psad_bw;
227 IntrinsicNumElts = 16;
228 }
229
230 Function *PSADBWFn = Intrinsic::getDeclaration(SI->getModule(), IID);
231
232 if (NumElts < 16) {
45
Assuming 'NumElts' is >= 16
46
Taking false branch
233 // Pad input with zeroes.
234 SmallVector<int, 32> ConcatMask(16);
235 for (unsigned i = 0; i != NumElts; ++i)
236 ConcatMask[i] = i;
237 for (unsigned i = NumElts; i != 16; ++i)
238 ConcatMask[i] = (i % NumElts) + NumElts;
239
240 Value *Zero = Constant::getNullValue(Op0->getType());
241 Op0 = Builder.CreateShuffleVector(Op0, Zero, ConcatMask);
242 Op1 = Builder.CreateShuffleVector(Op1, Zero, ConcatMask);
243 NumElts = 16;
244 }
245
246 // Intrinsics produce vXi64 and need to be casted to vXi32.
247 auto *I32Ty =
248 FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
249
250 assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!")((void)0);
251 unsigned NumSplits = NumElts / IntrinsicNumElts;
252
253 // First collect the pieces we need.
254 SmallVector<Value *, 4> Ops(NumSplits);
255 for (unsigned i = 0; i != NumSplits; ++i) {
47
Assuming 'i' is not equal to 'NumSplits'
48
Loop condition is true. Entering loop body
49
Assuming 'i' is equal to 'NumSplits'
50
Loop condition is false. Execution continues on line 264
256 SmallVector<int, 64> ExtractMask(IntrinsicNumElts);
257 std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts);
258 Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask);
259 Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask);
260 Ops[i] = Builder.CreateCall(PSADBWFn, {ExtractOp0, ExtractOp1});
261 Ops[i] = Builder.CreateBitCast(Ops[i], I32Ty);
262 }
263
264 assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits")((void)0);
265 unsigned Stages = Log2_32(NumSplits);
266 for (unsigned s = Stages; s > 0; --s) {
51
Assuming 's' is <= 0
52
Loop condition is false. Execution continues on line 278
267 unsigned NumConcatElts =
268 cast<FixedVectorType>(Ops[0]->getType())->getNumElements() * 2;
269 for (unsigned i = 0; i != 1U << (s - 1); ++i) {
270 SmallVector<int, 64> ConcatMask(NumConcatElts);
271 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
272 Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask);
273 }
274 }
275
276 // At this point the final value should be in Ops[0]. Now we need to adjust
277 // it to the final original type.
278 NumElts = cast<FixedVectorType>(OpTy)->getNumElements();
53
'OpTy' is a 'FixedVectorType'
279 if (NumElts
53.1
'NumElts' is not equal to 2
53.1
'NumElts' is not equal to 2
== 2) {
54
Taking false branch
280 // Extract down to 2 elements.
281 Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
282 } else if (NumElts
54.1
'NumElts' is >= 8
54.1
'NumElts' is >= 8
>= 8) {
55
Taking true branch
283 SmallVector<int, 32> ConcatMask(NumElts);
284 unsigned SubElts =
57
'SubElts' initialized here
285 cast<FixedVectorType>(Ops[0]->getType())->getNumElements();
56
The object is a 'FixedVectorType'
286 for (unsigned i = 0; i != SubElts; ++i)
58
Assuming 'i' is equal to 'SubElts'
59
Loop condition is false. Execution continues on line 288
287 ConcatMask[i] = i;
288 for (unsigned i = SubElts; i
59.1
'i' is not equal to 'NumElts'
59.1
'i' is not equal to 'NumElts'
!= NumElts; ++i)
60
Loop condition is true. Entering loop body
289 ConcatMask[i] = (i % SubElts) + SubElts;
61
Division by zero
290
291 Value *Zero = Constant::getNullValue(Ops[0]->getType());
292 Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask);
293 }
294
295 SI->replaceAllUsesWith(Ops[0]);
296 SI->eraseFromParent();
297
298 return true;
299}
300
301// Walk backwards from the ExtractElementInst and determine if it is the end of
302// a horizontal reduction. Return the input to the reduction if we find one.
303static Value *matchAddReduction(const ExtractElementInst &EE) {
304 // Make sure we're extracting index 0.
305 auto *Index = dyn_cast<ConstantInt>(EE.getIndexOperand());
306 if (!Index || !Index->isNullValue())
307 return nullptr;
308
309 const auto *BO = dyn_cast<BinaryOperator>(EE.getVectorOperand());
310 if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse())
311 return nullptr;
312
313 unsigned NumElems = cast<FixedVectorType>(BO->getType())->getNumElements();
314 // Ensure the reduction size is a power of 2.
315 if (!isPowerOf2_32(NumElems))
316 return nullptr;
317
318 const Value *Op = BO;
319 unsigned Stages = Log2_32(NumElems);
320 for (unsigned i = 0; i != Stages; ++i) {
321 const auto *BO = dyn_cast<BinaryOperator>(Op);
322 if (!BO || BO->getOpcode() != Instruction::Add)
323 return nullptr;
324
325 // If this isn't the first add, then it should only have 2 users, the
326 // shuffle and another add which we checked in the previous iteration.
327 if (i != 0 && !BO->hasNUses(2))
328 return nullptr;
329
330 Value *LHS = BO->getOperand(0);
331 Value *RHS = BO->getOperand(1);
332
333 auto *Shuffle = dyn_cast<ShuffleVectorInst>(LHS);
334 if (Shuffle) {
335 Op = RHS;
336 } else {
337 Shuffle = dyn_cast<ShuffleVectorInst>(RHS);
338 Op = LHS;
339 }
340
341 // The first operand of the shuffle should be the same as the other operand
342 // of the bin op.
343 if (!Shuffle || Shuffle->getOperand(0) != Op)
344 return nullptr;
345
346 // Verify the shuffle has the expected (at this stage of the pyramid) mask.
347 unsigned MaskEnd = 1 << i;
348 for (unsigned Index = 0; Index < MaskEnd; ++Index)
349 if (Shuffle->getMaskValue(Index) != (int)(MaskEnd + Index))
350 return nullptr;
351 }
352
353 return const_cast<Value *>(Op);
354}
355
356// See if this BO is reachable from this Phi by walking forward through single
357// use BinaryOperators with the same opcode. If we get back then we know we've
358// found a loop and it is safe to step through this Add to find more leaves.
359static bool isReachableFromPHI(PHINode *Phi, BinaryOperator *BO) {
360 // The PHI itself should only have one use.
361 if (!Phi->hasOneUse())
362 return false;
363
364 Instruction *U = cast<Instruction>(*Phi->user_begin());
365 if (U == BO)
366 return true;
367
368 while (U->hasOneUse() && U->getOpcode() == BO->getOpcode())
369 U = cast<Instruction>(*U->user_begin());
370
371 return U == BO;
372}
373
374// Collect all the leaves of the tree of adds that feeds into the horizontal
375// reduction. Root is the Value that is used by the horizontal reduction.
376// We look through single use phis, single use adds, or adds that are used by
377// a phi that forms a loop with the add.
378static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
379 SmallPtrSet<Value *, 8> Visited;
380 SmallVector<Value *, 8> Worklist;
381 Worklist.push_back(Root);
382
383 while (!Worklist.empty()) {
384 Value *V = Worklist.pop_back_val();
385 if (!Visited.insert(V).second)
386 continue;
387
388 if (auto *PN = dyn_cast<PHINode>(V)) {
389 // PHI node should have single use unless it is the root node, then it
390 // has 2 uses.
391 if (!PN->hasNUses(PN == Root ? 2 : 1))
392 break;
393
394 // Push incoming values to the worklist.
395 append_range(Worklist, PN->incoming_values());
396
397 continue;
398 }
399
400 if (auto *BO = dyn_cast<BinaryOperator>(V)) {
401 if (BO->getOpcode() == Instruction::Add) {
402 // Simple case. Single use, just push its operands to the worklist.
403 if (BO->hasNUses(BO == Root ? 2 : 1)) {
404 append_range(Worklist, BO->operands());
405 continue;
406 }
407
408 // If there is additional use, make sure it is an unvisited phi that
409 // gets us back to this node.
410 if (BO->hasNUses(BO == Root ? 3 : 2)) {
411 PHINode *PN = nullptr;
412 for (auto *U : Root->users())
413 if (auto *P = dyn_cast<PHINode>(U))
414 if (!Visited.count(P))
415 PN = P;
416
417 // If we didn't find a 2-input PHI then this isn't a case we can
418 // handle.
419 if (!PN || PN->getNumIncomingValues() != 2)
420 continue;
421
422 // Walk forward from this phi to see if it reaches back to this add.
423 if (!isReachableFromPHI(PN, BO))
424 continue;
425
426 // The phi forms a loop with this Add, push its operands.
427 append_range(Worklist, BO->operands());
428 }
429 }
430 }
431
432 // Not an add or phi, make it a leaf.
433 if (auto *I = dyn_cast<Instruction>(V)) {
434 if (!V->hasNUses(I == Root ? 2 : 1))
435 continue;
436
437 // Add this as a leaf.
438 Leaves.push_back(I);
439 }
440 }
441}
442
443bool X86PartialReduction::runOnFunction(Function &F) {
444 if (skipFunction(F))
1
Assuming the condition is false
2
Taking false branch
445 return false;
446
447 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
448 if (!TPC)
3
Assuming 'TPC' is non-null
4
Taking false branch
449 return false;
450
451 auto &TM = TPC->getTM<X86TargetMachine>();
452 ST = TM.getSubtargetImpl(F);
453
454 DL = &F.getParent()->getDataLayout();
455
456 bool MadeChange = false;
457 for (auto &BB : F) {
458 for (auto &I : BB) {
459 auto *EE = dyn_cast<ExtractElementInst>(&I);
5
Assuming the object is a 'ExtractElementInst'
460 if (!EE
5.1
'EE' is non-null
5.1
'EE' is non-null
)
6
Taking false branch
461 continue;
462
463 // First find a reduction tree.
464 // FIXME: Do we need to handle other opcodes than Add?
465 Value *Root = matchAddReduction(*EE);
466 if (!Root
6.1
'Root' is non-null
6.1
'Root' is non-null
)
7
Taking false branch
467 continue;
468
469 SmallVector<Instruction *, 8> Leaves;
470 collectLeaves(Root, Leaves);
471
472 for (Instruction *I : Leaves) {
8
Assuming '__begin3' is not equal to '__end3'
473 if (tryMAddReplacement(I)) {
9
Taking false branch
474 MadeChange = true;
475 continue;
476 }
477
478 // Don't do SAD matching on the root node. SelectionDAG already
479 // has support for that and currently generates better code.
480 if (I != Root && trySADReplacement(I))
10
Assuming 'I' is not equal to 'Root'
11
Calling 'X86PartialReduction::trySADReplacement'
481 MadeChange = true;
482 }
483 }
484 }
485
486 return MadeChange;
487}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86Subtarget.h

1//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file declares the X86 specific subclass of TargetSubtargetInfo.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
14#define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
15
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrInfo.h"
19#include "X86SelectionDAGInfo.h"
20#include "llvm/ADT/Triple.h"
21#include "llvm/CodeGen/TargetSubtargetInfo.h"
22#include "llvm/IR/CallingConv.h"
23#include <climits>
24#include <memory>
25
26#define GET_SUBTARGETINFO_HEADER
27#include "X86GenSubtargetInfo.inc"
28
29namespace llvm {
30
31class CallLowering;
32class GlobalValue;
33class InstructionSelector;
34class LegalizerInfo;
35class RegisterBankInfo;
36class StringRef;
37class TargetMachine;
38
39/// The X86 backend supports a number of different styles of PIC.
40///
41namespace PICStyles {
42
43enum class Style {
44 StubPIC, // Used on i386-darwin in pic mode.
45 GOT, // Used on 32 bit elf on when in pic mode.
46 RIPRel, // Used on X86-64 when in pic mode.
47 None // Set when not in pic mode.
48};
49
50} // end namespace PICStyles
51
52class X86Subtarget final : public X86GenSubtargetInfo {
53 // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
54 // are not a good idea. We should be migrating away from these.
55 enum X86ProcFamilyEnum {
56 Others,
57 IntelAtom,
58 IntelSLM
59 };
60
61 enum X86SSEEnum {
62 NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
63 };
64
65 enum X863DNowEnum {
66 NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
67 };
68
69 /// X86 processor family: Intel Atom, and others
70 X86ProcFamilyEnum X86ProcFamily = Others;
71
72 /// Which PIC style to use
73 PICStyles::Style PICStyle;
74
75 const TargetMachine &TM;
76
77 /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
78 X86SSEEnum X86SSELevel = NoSSE;
79
80 /// MMX, 3DNow, 3DNow Athlon, or none supported.
81 X863DNowEnum X863DNowLevel = NoThreeDNow;
82
83 /// True if the processor supports X87 instructions.
84 bool HasX87 = false;
85
86 /// True if the processor supports CMPXCHG8B.
87 bool HasCmpxchg8b = false;
88
89 /// True if this processor has NOPL instruction
90 /// (generally pentium pro+).
91 bool HasNOPL = false;
92
93 /// True if this processor has conditional move instructions
94 /// (generally pentium pro+).
95 bool HasCMov = false;
96
97 /// True if the processor supports X86-64 instructions.
98 bool HasX86_64 = false;
99
100 /// True if the processor supports POPCNT.
101 bool HasPOPCNT = false;
102
103 /// True if the processor supports SSE4A instructions.
104 bool HasSSE4A = false;
105
106 /// Target has AES instructions
107 bool HasAES = false;
108 bool HasVAES = false;
109
110 /// Target has FXSAVE/FXRESTOR instructions
111 bool HasFXSR = false;
112
113 /// Target has XSAVE instructions
114 bool HasXSAVE = false;
115
116 /// Target has XSAVEOPT instructions
117 bool HasXSAVEOPT = false;
118
119 /// Target has XSAVEC instructions
120 bool HasXSAVEC = false;
121
122 /// Target has XSAVES instructions
123 bool HasXSAVES = false;
124
125 /// Target has carry-less multiplication
126 bool HasPCLMUL = false;
127 bool HasVPCLMULQDQ = false;
128
129 /// Target has Galois Field Arithmetic instructions
130 bool HasGFNI = false;
131
132 /// Target has 3-operand fused multiply-add
133 bool HasFMA = false;
134
135 /// Target has 4-operand fused multiply-add
136 bool HasFMA4 = false;
137
138 /// Target has XOP instructions
139 bool HasXOP = false;
140
141 /// Target has TBM instructions.
142 bool HasTBM = false;
143
144 /// Target has LWP instructions
145 bool HasLWP = false;
146
147 /// True if the processor has the MOVBE instruction.
148 bool HasMOVBE = false;
149
150 /// True if the processor has the RDRAND instruction.
151 bool HasRDRAND = false;
152
153 /// Processor has 16-bit floating point conversion instructions.
154 bool HasF16C = false;
155
156 /// Processor has FS/GS base insturctions.
157 bool HasFSGSBase = false;
158
159 /// Processor has LZCNT instruction.
160 bool HasLZCNT = false;
161
162 /// Processor has BMI1 instructions.
163 bool HasBMI = false;
164
165 /// Processor has BMI2 instructions.
166 bool HasBMI2 = false;
167
168 /// Processor has VBMI instructions.
169 bool HasVBMI = false;
170
171 /// Processor has VBMI2 instructions.
172 bool HasVBMI2 = false;
173
174 /// Processor has Integer Fused Multiply Add
175 bool HasIFMA = false;
176
177 /// Processor has RTM instructions.
178 bool HasRTM = false;
179
180 /// Processor has ADX instructions.
181 bool HasADX = false;
182
183 /// Processor has SHA instructions.
184 bool HasSHA = false;
185
186 /// Processor has PRFCHW instructions.
187 bool HasPRFCHW = false;
188
189 /// Processor has RDSEED instructions.
190 bool HasRDSEED = false;
191
192 /// Processor has LAHF/SAHF instructions in 64-bit mode.
193 bool HasLAHFSAHF64 = false;
194
195 /// Processor has MONITORX/MWAITX instructions.
196 bool HasMWAITX = false;
197
198 /// Processor has Cache Line Zero instruction
199 bool HasCLZERO = false;
200
201 /// Processor has Cache Line Demote instruction
202 bool HasCLDEMOTE = false;
203
204 /// Processor has MOVDIRI instruction (direct store integer).
205 bool HasMOVDIRI = false;
206
207 /// Processor has MOVDIR64B instruction (direct store 64 bytes).
208 bool HasMOVDIR64B = false;
209
210 /// Processor has ptwrite instruction.
211 bool HasPTWRITE = false;
212
213 /// Processor has Prefetch with intent to Write instruction
214 bool HasPREFETCHWT1 = false;
215
216 /// True if SHLD instructions are slow.
217 bool IsSHLDSlow = false;
218
219 /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
220 // PMULUDQ.
221 bool IsPMULLDSlow = false;
222
223 /// True if the PMADDWD instruction is slow compared to PMULLD.
224 bool IsPMADDWDSlow = false;
225
226 /// True if unaligned memory accesses of 16-bytes are slow.
227 bool IsUAMem16Slow = false;
228
229 /// True if unaligned memory accesses of 32-bytes are slow.
230 bool IsUAMem32Slow = false;
231
232 /// True if SSE operations can have unaligned memory operands.
233 /// This may require setting a configuration bit in the processor.
234 bool HasSSEUnalignedMem = false;
235
236 /// True if this processor has the CMPXCHG16B instruction;
237 /// this is true for most x86-64 chips, but not the first AMD chips.
238 bool HasCmpxchg16b = false;
239
240 /// True if the LEA instruction should be used for adjusting
241 /// the stack pointer. This is an optimization for Intel Atom processors.
242 bool UseLeaForSP = false;
243
244 /// True if POPCNT instruction has a false dependency on the destination register.
245 bool HasPOPCNTFalseDeps = false;
246
247 /// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
248 bool HasLZCNTFalseDeps = false;
249
250 /// True if its preferable to combine to a single cross-lane shuffle
251 /// using a variable mask over multiple fixed shuffles.
252 bool HasFastVariableCrossLaneShuffle = false;
253
254 /// True if its preferable to combine to a single per-lane shuffle
255 /// using a variable mask over multiple fixed shuffles.
256 bool HasFastVariablePerLaneShuffle = false;
257
258 /// True if vzeroupper instructions should be inserted after code that uses
259 /// ymm or zmm registers.
260 bool InsertVZEROUPPER = false;
261
262 /// True if there is no performance penalty for writing NOPs with up to
263 /// 7 bytes.
264 bool HasFast7ByteNOP = false;
265
266 /// True if there is no performance penalty for writing NOPs with up to
267 /// 11 bytes.
268 bool HasFast11ByteNOP = false;
269
270 /// True if there is no performance penalty for writing NOPs with up to
271 /// 15 bytes.
272 bool HasFast15ByteNOP = false;
273
274 /// True if gather is reasonably fast. This is true for Skylake client and
275 /// all AVX-512 CPUs.
276 bool HasFastGather = false;
277
278 /// True if hardware SQRTSS instruction is at least as fast (latency) as
279 /// RSQRTSS followed by a Newton-Raphson iteration.
280 bool HasFastScalarFSQRT = false;
281
282 /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
283 /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
284 bool HasFastVectorFSQRT = false;
285
286 /// True if 8-bit divisions are significantly faster than
287 /// 32-bit divisions and should be used when possible.
288 bool HasSlowDivide32 = false;
289
290 /// True if 32-bit divides are significantly faster than
291 /// 64-bit divisions and should be used when possible.
292 bool HasSlowDivide64 = false;
293
294 /// True if LZCNT instruction is fast.
295 bool HasFastLZCNT = false;
296
297 /// True if SHLD based rotate is fast.
298 bool HasFastSHLDRotate = false;
299
300 /// True if the processor supports macrofusion.
301 bool HasMacroFusion = false;
302
303 /// True if the processor supports branch fusion.
304 bool HasBranchFusion = false;
305
306 /// True if the processor has enhanced REP MOVSB/STOSB.
307 bool HasERMSB = false;
308
309 /// True if the processor has fast short REP MOV.
310 bool HasFSRM = false;
311
312 /// True if the short functions should be padded to prevent
313 /// a stall when returning too early.
314 bool PadShortFunctions = false;
315
316 /// True if two memory operand instructions should use a temporary register
317 /// instead.
318 bool SlowTwoMemOps = false;
319
320 /// True if the LEA instruction inputs have to be ready at address generation
321 /// (AG) time.
322 bool LEAUsesAG = false;
323
324 /// True if the LEA instruction with certain arguments is slow
325 bool SlowLEA = false;
326
327 /// True if the LEA instruction has all three source operands: base, index,
328 /// and offset or if the LEA instruction uses base and index registers where
329 /// the base is EBP, RBP,or R13
330 bool Slow3OpsLEA = false;
331
332 /// True if INC and DEC instructions are slow when writing to flags
333 bool SlowIncDec = false;
334
335 /// Processor has AVX-512 PreFetch Instructions
336 bool HasPFI = false;
337
338 /// Processor has AVX-512 Exponential and Reciprocal Instructions
339 bool HasERI = false;
340
341 /// Processor has AVX-512 Conflict Detection Instructions
342 bool HasCDI = false;
343
344 /// Processor has AVX-512 population count Instructions
345 bool HasVPOPCNTDQ = false;
346
347 /// Processor has AVX-512 Doubleword and Quadword instructions
348 bool HasDQI = false;
349
350 /// Processor has AVX-512 Byte and Word instructions
351 bool HasBWI = false;
352
353 /// Processor has AVX-512 Vector Length eXtenstions
354 bool HasVLX = false;
355
356 /// Processor has PKU extenstions
357 bool HasPKU = false;
358
359 /// Processor has AVX-512 Vector Neural Network Instructions
360 bool HasVNNI = false;
361
362 /// Processor has AVX Vector Neural Network Instructions
363 bool HasAVXVNNI = false;
364
365 /// Processor has AVX-512 bfloat16 floating-point extensions
366 bool HasBF16 = false;
367
368 /// Processor supports ENQCMD instructions
369 bool HasENQCMD = false;
370
371 /// Processor has AVX-512 Bit Algorithms instructions
372 bool HasBITALG = false;
373
374 /// Processor has AVX-512 vp2intersect instructions
375 bool HasVP2INTERSECT = false;
376
377 /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
378 /// using Shadow Stack
379 bool HasSHSTK = false;
380
381 /// Processor supports Invalidate Process-Context Identifier
382 bool HasINVPCID = false;
383
384 /// Processor has Software Guard Extensions
385 bool HasSGX = false;
386
387 /// Processor supports Flush Cache Line instruction
388 bool HasCLFLUSHOPT = false;
389
390 /// Processor supports Cache Line Write Back instruction
391 bool HasCLWB = false;
392
393 /// Processor supports Write Back No Invalidate instruction
394 bool HasWBNOINVD = false;
395
396 /// Processor support RDPID instruction
397 bool HasRDPID = false;
398
399 /// Processor supports WaitPKG instructions
400 bool HasWAITPKG = false;
401
402 /// Processor supports PCONFIG instruction
403 bool HasPCONFIG = false;
404
405 /// Processor support key locker instructions
406 bool HasKL = false;
407
408 /// Processor support key locker wide instructions
409 bool HasWIDEKL = false;
410
411 /// Processor supports HRESET instruction
412 bool HasHRESET = false;
413
414 /// Processor supports SERIALIZE instruction
415 bool HasSERIALIZE = false;
416
417 /// Processor supports TSXLDTRK instruction
418 bool HasTSXLDTRK = false;
419
420 /// Processor has AMX support
421 bool HasAMXTILE = false;
422 bool HasAMXBF16 = false;
423 bool HasAMXINT8 = false;
424
425 /// Processor supports User Level Interrupt instructions
426 bool HasUINTR = false;
427
428 /// Processor has a single uop BEXTR implementation.
429 bool HasFastBEXTR = false;
430
431 /// Try harder to combine to horizontal vector ops if they are fast.
432 bool HasFastHorizontalOps = false;
433
434 /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
435 bool HasFastScalarShiftMasks = false;
436
437 /// Prefer a left/right vector logical shifts pair over a shift+and pair.
438 bool HasFastVectorShiftMasks = false;
439
440 /// Prefer a movbe over a single-use load + bswap / single-use bswap + store.
441 bool HasFastMOVBE = false;
442
443 /// Use a retpoline thunk rather than indirect calls to block speculative
444 /// execution.
445 bool UseRetpolineIndirectCalls = false;
446
447 /// Use a retpoline thunk or remove any indirect branch to block speculative
448 /// execution.
449 bool UseRetpolineIndirectBranches = false;
450
451 /// Deprecated flag, query `UseRetpolineIndirectCalls` and
452 /// `UseRetpolineIndirectBranches` instead.
453 bool DeprecatedUseRetpoline = false;
454
455 /// When using a retpoline thunk, call an externally provided thunk rather
456 /// than emitting one inside the compiler.
457 bool UseRetpolineExternalThunk = false;
458
459 /// Prevent generation of indirect call/branch instructions from memory,
460 /// and force all indirect call/branch instructions from a register to be
461 /// preceded by an LFENCE. Also decompose RET instructions into a
462 /// POP+LFENCE+JMP sequence.
463 bool UseLVIControlFlowIntegrity = false;
464
465 /// Enable Speculative Execution Side Effect Suppression
466 bool UseSpeculativeExecutionSideEffectSuppression = false;
467
468 /// Insert LFENCE instructions to prevent data speculatively injected into
469 /// loads from being used maliciously.
470 bool UseLVILoadHardening = false;
471
472 /// Use software floating point for code generation.
473 bool UseSoftFloat = false;
474
475 /// Use alias analysis during code generation.
476 bool UseAA = false;
477
478 /// The minimum alignment known to hold of the stack frame on
479 /// entry to the function and which must be maintained by every function.
480 Align stackAlignment = Align(4);
481
482 Align TileConfigAlignment = Align(4);
483
484 /// Whether function prologues should save register arguments on the stack.
485 bool SaveArgs = false;
486
487 /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
488 ///
489 // FIXME: this is a known good value for Yonah. How about others?
490 unsigned MaxInlineSizeThreshold = 128;
491
492 /// Indicates target prefers 128 bit instructions.
493 bool Prefer128Bit = false;
494
495 /// Indicates target prefers 256 bit instructions.
496 bool Prefer256Bit = false;
497
498 /// Indicates target prefers AVX512 mask registers.
499 bool PreferMaskRegisters = false;
500
501 /// Use Goldmont specific floating point div/sqrt costs.
502 bool UseGLMDivSqrtCosts = false;
503
504 /// What processor and OS we're targeting.
505 Triple TargetTriple;
506
507 /// GlobalISel related APIs.
508 std::unique_ptr<CallLowering> CallLoweringInfo;
509 std::unique_ptr<LegalizerInfo> Legalizer;
510 std::unique_ptr<RegisterBankInfo> RegBankInfo;
511 std::unique_ptr<InstructionSelector> InstSelector;
512
513private:
514 /// Override the stack alignment.
515 MaybeAlign StackAlignOverride;
516
517 /// Preferred vector width from function attribute.
518 unsigned PreferVectorWidthOverride;
519
520 /// Resolved preferred vector width from function attribute and subtarget
521 /// features.
522 unsigned PreferVectorWidth = UINT32_MAX0xffffffffU;
523
524 /// Required vector width from function attribute.
525 unsigned RequiredVectorWidth;
526
527 /// True if compiling for 64-bit, false for 16-bit or 32-bit.
528 bool In64BitMode = false;
529
530 /// True if compiling for 32-bit, false for 16-bit or 64-bit.
531 bool In32BitMode = false;
532
533 /// True if compiling for 16-bit, false for 32-bit or 64-bit.
534 bool In16BitMode = false;
535
536 X86SelectionDAGInfo TSInfo;
537 // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
538 // X86TargetLowering needs.
539 X86InstrInfo InstrInfo;
540 X86TargetLowering TLInfo;
541 X86FrameLowering FrameLowering;
542
543public:
544 /// This constructor initializes the data members to match that
545 /// of the specified triple.
546 ///
547 X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
548 const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
549 unsigned PreferVectorWidthOverride,
550 unsigned RequiredVectorWidth);
551
552 const X86TargetLowering *getTargetLowering() const override {
553 return &TLInfo;
554 }
555
556 const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
557
558 const X86FrameLowering *getFrameLowering() const override {
559 return &FrameLowering;
560 }
561
562 const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
563 return &TSInfo;
564 }
565
566 const X86RegisterInfo *getRegisterInfo() const override {
567 return &getInstrInfo()->getRegisterInfo();
568 }
569
570 bool getSaveArgs() const { return SaveArgs; }
571
572 unsigned getTileConfigSize() const { return 64; }
573 Align getTileConfigAlignment() const { return TileConfigAlignment; }
574
575 /// Returns the minimum alignment known to hold of the
576 /// stack frame on entry to the function and which must be maintained by every
577 /// function for this subtarget.
578 Align getStackAlignment() const { return stackAlignment; }
579
580 /// Returns the maximum memset / memcpy size
581 /// that still makes it profitable to inline the call.
582 unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
583
584 /// ParseSubtargetFeatures - Parses features string setting specified
585 /// subtarget options. Definition of function is auto generated by tblgen.
586 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
587
588 /// Methods used by Global ISel
589 const CallLowering *getCallLowering() const override;
590 InstructionSelector *getInstructionSelector() const override;
591 const LegalizerInfo *getLegalizerInfo() const override;
592 const RegisterBankInfo *getRegBankInfo() const override;
593
594private:
595 /// Initialize the full set of dependencies so we can use an initializer
596 /// list for X86Subtarget.
597 X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
598 StringRef TuneCPU,
599 StringRef FS);
600 void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
601
602public:
603 /// Is this x86_64? (disregarding specific ABI / programming model)
604 bool is64Bit() const {
605 return In64BitMode;
606 }
607
608 bool is32Bit() const {
609 return In32BitMode;
610 }
611
612 bool is16Bit() const {
613 return In16BitMode;
614 }
615
616 /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
617 bool isTarget64BitILP32() const {
618 return In64BitMode && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
619 }
620
621 /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
622 bool isTarget64BitLP64() const {
623 return In64BitMode && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
624 }
625
626 PICStyles::Style getPICStyle() const { return PICStyle; }
627 void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
628
629 bool hasX87() const { return HasX87; }
630 bool hasCmpxchg8b() const { return HasCmpxchg8b; }
631 bool hasNOPL() const { return HasNOPL; }
632 // SSE codegen depends on cmovs, and all SSE1+ processors support them.
633 // All 64-bit processors support cmov.
634 bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
635 bool hasSSE1() const { return X86SSELevel >= SSE1; }
636 bool hasSSE2() const { return X86SSELevel
12.1
Field 'X86SSELevel' is >= SSE2
12.1
Field 'X86SSELevel' is >= SSE2
>= SSE2
; }
13
Returning the value 1, which participates in a condition later
637 bool hasSSE3() const { return X86SSELevel >= SSE3; }
638 bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
639 bool hasSSE41() const { return X86SSELevel >= SSE41; }
640 bool hasSSE42() const { return X86SSELevel >= SSE42; }
641 bool hasAVX() const { return X86SSELevel >= AVX; }
642 bool hasAVX2() const { return X86SSELevel >= AVX2; }
643 bool hasAVX512() const { return X86SSELevel >= AVX512F; }
644 bool hasInt256() const { return hasAVX2(); }
645 bool hasSSE4A() const { return HasSSE4A; }
646 bool hasMMX() const { return X863DNowLevel >= MMX; }
647 bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
648 bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
649 bool hasPOPCNT() const { return HasPOPCNT; }
650 bool hasAES() const { return HasAES; }
651 bool hasVAES() const { return HasVAES; }
652 bool hasFXSR() const { return HasFXSR; }
653 bool hasXSAVE() const { return HasXSAVE; }
654 bool hasXSAVEOPT() const { return HasXSAVEOPT; }
655 bool hasXSAVEC() const { return HasXSAVEC; }
656 bool hasXSAVES() const { return HasXSAVES; }
657 bool hasPCLMUL() const { return HasPCLMUL; }
658 bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
659 bool hasGFNI() const { return HasGFNI; }
660 // Prefer FMA4 to FMA - its better for commutation/memory folding and
661 // has equal or better performance on all supported targets.
662 bool hasFMA() const { return HasFMA; }
663 bool hasFMA4() const { return HasFMA4; }
664 bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
665 bool hasXOP() const { return HasXOP; }
666 bool hasTBM() const { return HasTBM; }
667 bool hasLWP() const { return HasLWP; }
668 bool hasMOVBE() const { return HasMOVBE; }
669 bool hasRDRAND() const { return HasRDRAND; }
670 bool hasF16C() const { return HasF16C; }
671 bool hasFSGSBase() const { return HasFSGSBase; }
672 bool hasLZCNT() const { return HasLZCNT; }
673 bool hasBMI() const { return HasBMI; }
674 bool hasBMI2() const { return HasBMI2; }
675 bool hasVBMI() const { return HasVBMI; }
676 bool hasVBMI2() const { return HasVBMI2; }
677 bool hasIFMA() const { return HasIFMA; }
678 bool hasRTM() const { return HasRTM; }
679 bool hasADX() const { return HasADX; }
680 bool hasSHA() const { return HasSHA; }
681 bool hasPRFCHW() const { return HasPRFCHW; }
682 bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
683 bool hasPrefetchW() const {
684 // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
685 // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
686 // it and KNL has another that prefetches to L2 cache. We assume the
687 // L1 version exists if the L2 version does.
688 return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
689 }
690 bool hasSSEPrefetch() const {
691 // We implicitly enable these when we have a write prefix supporting cache
692 // level OR if we have prfchw, but don't already have a read prefetch from
693 // 3dnow.
694 return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
695 }
696 bool hasRDSEED() const { return HasRDSEED; }
697 bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); }
698 bool hasMWAITX() const { return HasMWAITX; }
699 bool hasCLZERO() const { return HasCLZERO; }
700 bool hasCLDEMOTE() const { return HasCLDEMOTE; }
701 bool hasMOVDIRI() const { return HasMOVDIRI; }
702 bool hasMOVDIR64B() const { return HasMOVDIR64B; }
703 bool hasPTWRITE() const { return HasPTWRITE; }
704 bool isSHLDSlow() const { return IsSHLDSlow; }
705 bool isPMULLDSlow() const { return IsPMULLDSlow; }
706 bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
707 bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
708 bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
709 bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
710 bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
711 bool useLeaForSP() const { return UseLeaForSP; }
712 bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
713 bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
714 bool hasFastVariableCrossLaneShuffle() const {
715 return HasFastVariableCrossLaneShuffle;
716 }
717 bool hasFastVariablePerLaneShuffle() const {
718 return HasFastVariablePerLaneShuffle;
719 }
720 bool insertVZEROUPPER() const { return InsertVZEROUPPER; }
721 bool hasFastGather() const { return HasFastGather; }
722 bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
723 bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
724 bool hasFastLZCNT() const { return HasFastLZCNT; }
725 bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
726 bool hasFastBEXTR() const { return HasFastBEXTR; }
727 bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
728 bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
729 bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
730 bool hasFastMOVBE() const { return HasFastMOVBE; }
731 bool hasMacroFusion() const { return HasMacroFusion; }
732 bool hasBranchFusion() const { return HasBranchFusion; }
733 bool hasERMSB() const { return HasERMSB; }
734 bool hasFSRM() const { return HasFSRM; }
735 bool hasSlowDivide32() const { return HasSlowDivide32; }
736 bool hasSlowDivide64() const { return HasSlowDivide64; }
737 bool padShortFunctions() const { return PadShortFunctions; }
738 bool slowTwoMemOps() const { return SlowTwoMemOps; }
739 bool LEAusesAG() const { return LEAUsesAG; }
740 bool slowLEA() const { return SlowLEA; }
741 bool slow3OpsLEA() const { return Slow3OpsLEA; }
742 bool slowIncDec() const { return SlowIncDec; }
743 bool hasCDI() const { return HasCDI; }
744 bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
745 bool hasPFI() const { return HasPFI; }
746 bool hasERI() const { return HasERI; }
747 bool hasDQI() const { return HasDQI; }
748 bool hasBWI() const { return HasBWI; }
749 bool hasVLX() const { return HasVLX; }
750 bool hasPKU() const { return HasPKU; }
751 bool hasVNNI() const { return HasVNNI; }
752 bool hasBF16() const { return HasBF16; }
753 bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
754 bool hasBITALG() const { return HasBITALG; }
755 bool hasSHSTK() const { return HasSHSTK; }
756 bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
757 bool hasCLWB() const { return HasCLWB; }
758 bool hasWBNOINVD() const { return HasWBNOINVD; }
759 bool hasRDPID() const { return HasRDPID; }
760 bool hasWAITPKG() const { return HasWAITPKG; }
761 bool hasPCONFIG() const { return HasPCONFIG; }
762 bool hasSGX() const { return HasSGX; }
763 bool hasINVPCID() const { return HasINVPCID; }
764 bool hasENQCMD() const { return HasENQCMD; }
765 bool hasKL() const { return HasKL; }
766 bool hasWIDEKL() const { return HasWIDEKL; }
767 bool hasHRESET() const { return HasHRESET; }
768 bool hasSERIALIZE() const { return HasSERIALIZE; }
769 bool hasTSXLDTRK() const { return HasTSXLDTRK; }
770 bool hasUINTR() const { return HasUINTR; }
771 bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
772 bool useRetpolineIndirectBranches() const {
773 return UseRetpolineIndirectBranches;
774 }
775 bool hasAVXVNNI() const { return HasAVXVNNI; }
776 bool hasAMXTILE() const { return HasAMXTILE; }
777 bool hasAMXBF16() const { return HasAMXBF16; }
778 bool hasAMXINT8() const { return HasAMXINT8; }
779 bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
780
781 // These are generic getters that OR together all of the thunk types
782 // supported by the subtarget. Therefore useIndirectThunk*() will return true
783 // if any respective thunk feature is enabled.
784 bool useIndirectThunkCalls() const {
785 return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
786 }
787 bool useIndirectThunkBranches() const {
788 return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
789 }
790
791 bool preferMaskRegisters() const { return PreferMaskRegisters; }
792 bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
793 bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
794 bool useLVILoadHardening() const { return UseLVILoadHardening; }
795 bool useSpeculativeExecutionSideEffectSuppression() const {
796 return UseSpeculativeExecutionSideEffectSuppression;
797 }
798
799 unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
800 unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
801
802 // Helper functions to determine when we should allow widening to 512-bit
803 // during codegen.
804 // TODO: Currently we're always allowing widening on CPUs without VLX,
805 // because for many cases we don't have a better option.
806 bool canExtendTo512DQ() const {
807 return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
808 }
809 bool canExtendTo512BW() const {
810 return hasBWI() && canExtendTo512DQ();
811 }
812
813 // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
814 // disable them in the legalizer.
815 bool useAVX512Regs() const {
816 return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
817 }
818
819 bool useBWIRegs() const {
820 return hasBWI() && useAVX512Regs();
821 }
822
823 bool isXRaySupported() const override { return is64Bit(); }
824
825 /// TODO: to be removed later and replaced with suitable properties
826 bool isAtom() const { return X86ProcFamily == IntelAtom; }
827 bool isSLM() const { return X86ProcFamily == IntelSLM; }
828 bool useSoftFloat() const { return UseSoftFloat; }
829 bool useAA() const override { return UseAA; }
830
831 /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
832 /// no-sse2). There isn't any reason to disable it if the target processor
833 /// supports it.
834 bool hasMFence() const { return hasSSE2() || is64Bit(); }
835
836 const Triple &getTargetTriple() const { return TargetTriple; }
837
838 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
839 bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
840 bool isTargetOpenBSD() const { return TargetTriple.isOSOpenBSD(); }
841 bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
842 bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
843 bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
844
845 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
846 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
847 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
848
849 bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
850 bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
851 bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
852 bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
853 bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
854 bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
855 bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
856 bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
857 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
858
859 bool isTargetWindowsMSVC() const {
860 return TargetTriple.isWindowsMSVCEnvironment();
861 }
862
863 bool isTargetWindowsCoreCLR() const {
864 return TargetTriple.isWindowsCoreCLREnvironment();
865 }
866
867 bool isTargetWindowsCygwin() const {
868 return TargetTriple.isWindowsCygwinEnvironment();
869 }
870
871 bool isTargetWindowsGNU() const {
872 return TargetTriple.isWindowsGNUEnvironment();
873 }
874
875 bool isTargetWindowsItanium() const {
876 return TargetTriple.isWindowsItaniumEnvironment();
877 }
878
879 bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
880
881 bool isOSWindows() const { return TargetTriple.isOSWindows(); }
882
883 bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
884
885 bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
886
887 bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
888 bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
889
890 bool isPICStyleStubPIC() const {
891 return PICStyle == PICStyles::Style::StubPIC;
892 }
893
894 bool isPositionIndependent() const;
895
896 bool isCallingConvWin64(CallingConv::ID CC) const {
897 switch (CC) {
898 // On Win64, all these conventions just use the default convention.
899 case CallingConv::C:
900 case CallingConv::Fast:
901 case CallingConv::Tail:
902 case CallingConv::Swift:
903 case CallingConv::SwiftTail:
904 case CallingConv::X86_FastCall:
905 case CallingConv::X86_StdCall:
906 case CallingConv::X86_ThisCall:
907 case CallingConv::X86_VectorCall:
908 case CallingConv::Intel_OCL_BI:
909 return isTargetWin64();
910 // This convention allows using the Win64 convention on other targets.
911 case CallingConv::Win64:
912 return true;
913 // This convention allows using the SysV convention on Windows targets.
914 case CallingConv::X86_64_SysV:
915 return false;
916 // Otherwise, who knows what this is.
917 default:
918 return false;
919 }
920 }
921
922 /// Classify a global variable reference for the current subtarget according
923 /// to how we should reference it in a non-pcrel context.
924 unsigned char classifyLocalReference(const GlobalValue *GV) const;
925
926 unsigned char classifyGlobalReference(const GlobalValue *GV,
927 const Module &M) const;
928 unsigned char classifyGlobalReference(const GlobalValue *GV) const;
929
930 /// Classify a global function reference for the current subtarget.
931 unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
932 const Module &M) const;
933 unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
934
935 /// Classify a blockaddress reference for the current subtarget according to
936 /// how we should reference it in a non-pcrel context.
937 unsigned char classifyBlockAddressReference() const;
938
939 /// Return true if the subtarget allows calls to immediate address.
940 bool isLegalToCallImmediateAddr() const;
941
942 /// If we are using indirect thunks, we need to expand indirectbr to avoid it
943 /// lowering to an actual indirect jump.
944 bool enableIndirectBrExpand() const override {
945 return useIndirectThunkBranches();
946 }
947
948 /// Enable the MachineScheduler pass for all X86 subtargets.
949 bool enableMachineScheduler() const override { return true; }
950
951 bool enableEarlyIfConversion() const override;
952
953 void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
954 &Mutations) const override;
955
956 AntiDepBreakMode getAntiDepBreakMode() const override {
957 return TargetSubtargetInfo::ANTIDEP_CRITICAL;
958 }
959
960 bool enableAdvancedRASplitCost() const override { return false; }
961};
962
963} // end namespace llvm
964
965#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H